1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 package jdk.nashorn.internal.runtime.regexp.joni.ast;
  21 
  22 import jdk.nashorn.internal.runtime.regexp.joni.BitSet;
  23 import jdk.nashorn.internal.runtime.regexp.joni.CodeRangeBuffer;
  24 import jdk.nashorn.internal.runtime.regexp.joni.Config;
  25 import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper;
  26 import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
  27 import jdk.nashorn.internal.runtime.regexp.joni.Syntax;
  28 import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
  29 import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
  30 import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
  31 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
  32 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
  33 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
  34 import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
  35 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
  36 
  37 @SuppressWarnings("javadoc")
  38 public final class CClassNode extends Node {
  39     private static final int FLAG_NCCLASS_NOT = 1<<0;
  40     private static final int FLAG_NCCLASS_SHARE = 1<<1;
  41 
  42     int flags;
  43     public final BitSet bs = new BitSet();  // conditional creation ?
  44     public CodeRangeBuffer mbuf;            /* multi-byte info or NULL */
  45 
  46     private int ctype;                      // for hashing purposes
  47 
  48     private final static short AsciiCtypeTable[] = {
  49             0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
  50             0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
  51             0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
  52             0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
  53             0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
  54             0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
  55             0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
  56             0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
  57             0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
  58             0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
  59             0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
  60             0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
  61             0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
  62             0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
  63             0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
  64             0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
  65             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  66             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  67             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  68             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  69             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  70             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  71             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  72             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  73             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  74             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  75             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  76             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  77             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  78             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  79             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  80             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
  81     };
  82 
  83     // node_new_cclass
  84     public CClassNode() {}
  85 
  86     public void clear() {
  87         bs.clear();
  88         flags = 0;
  89         mbuf = null;
  90     }
  91 
  92     @Override
  93     public int getType() {
  94         return CCLASS;
  95     }
  96 
  97     @Override
  98     public String getName() {
  99         return "Character Class";
 100     }
 101 
 102     @Override
 103     public boolean equals(final Object other) {
 104         if (!(other instanceof CClassNode)) {
 105             return false;
 106         }
 107         final CClassNode cc = (CClassNode)other;
 108         return ctype == cc.ctype && isNot() == cc.isNot();
 109     }
 110 
 111     @Override
 112     public int hashCode() {
 113         if (Config.USE_SHARED_CCLASS_TABLE) {
 114             int hash = 0;
 115             hash += ctype;
 116             if (isNot()) {
 117                 hash++;
 118             }
 119             return hash + (hash >> 5);
 120         }
 121         return super.hashCode();
 122     }
 123 
 124     @Override
 125     public String toString(final int level) {
 126         final StringBuilder value = new StringBuilder();
 127         value.append("\n  flags: ").append(flagsToString());
 128         value.append("\n  bs: ").append(pad(bs, level + 1));
 129         value.append("\n  mbuf: ").append(pad(mbuf, level + 1));
 130 
 131         return value.toString();
 132     }
 133 
 134     public String flagsToString() {
 135         final StringBuilder f = new StringBuilder();
 136         if (isNot()) {
 137             f.append("NOT ");
 138         }
 139         if (isShare()) {
 140             f.append("SHARE ");
 141         }
 142         return f.toString();
 143     }
 144 
 145     public boolean isEmpty() {
 146         return mbuf == null && bs.isEmpty();
 147     }
 148 
 149     public void addCodeRangeToBuf(final int from, final int to) {
 150         mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
 151     }
 152 
 153     public void addCodeRange(final ScanEnvironment env, final int from, final int to) {
 154         mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
 155     }
 156 
 157     public void addAllMultiByteRange() {
 158         mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf);
 159     }
 160 
 161     public void clearNotFlag() {
 162         if (isNot()) {
 163             bs.invert();
 164 
 165             mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf);
 166             clearNot();
 167         }
 168     }
 169 
 170     // and_cclass
 171     public void and(final CClassNode other) {
 172         final boolean not1 = isNot();
 173         BitSet bsr1 = bs;
 174         final CodeRangeBuffer buf1 = mbuf;
 175         final boolean not2 = other.isNot();
 176         BitSet bsr2 = other.bs;
 177         final CodeRangeBuffer buf2 = other.mbuf;
 178 
 179         if (not1) {
 180             final BitSet bs1 = new BitSet();
 181             bsr1.invertTo(bs1);
 182             bsr1 = bs1;
 183         }
 184 
 185         if (not2) {
 186             final BitSet bs2 = new BitSet();
 187             bsr2.invertTo(bs2);
 188             bsr2 = bs2;
 189         }
 190 
 191         bsr1.and(bsr2);
 192 
 193         if (bsr1 != bs) {
 194             bs.copy(bsr1);
 195             bsr1 = bs;
 196         }
 197 
 198         if (not1) {
 199             bs.invert();
 200         }
 201 
 202         CodeRangeBuffer pbuf = null;
 203 
 204         if (not1 && not2) {
 205             pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false);
 206         } else {
 207             pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);
 208 
 209             if (not1) {
 210                 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
 211             }
 212         }
 213         mbuf = pbuf;
 214 
 215     }
 216 
 217     // or_cclass
 218     public void or(final CClassNode other) {
 219         final boolean not1 = isNot();
 220         BitSet bsr1 = bs;
 221         final CodeRangeBuffer buf1 = mbuf;
 222         final boolean not2 = other.isNot();
 223         BitSet bsr2 = other.bs;
 224         final CodeRangeBuffer buf2 = other.mbuf;
 225 
 226         if (not1) {
 227             final BitSet bs1 = new BitSet();
 228             bsr1.invertTo(bs1);
 229             bsr1 = bs1;
 230         }
 231 
 232         if (not2) {
 233             final BitSet bs2 = new BitSet();
 234             bsr2.invertTo(bs2);
 235             bsr2 = bs2;
 236         }
 237 
 238         bsr1.or(bsr2);
 239 
 240         if (bsr1 != bs) {
 241             bs.copy(bsr1);
 242             bsr1 = bs;
 243         }
 244 
 245         if (not1) {
 246             bs.invert();
 247         }
 248 
 249         CodeRangeBuffer pbuf = null;
 250         if (not1 && not2) {
 251             pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
 252         } else {
 253             pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2);
 254             if (not1) {
 255                 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
 256             }
 257         }
 258         mbuf = pbuf;
 259     }
 260 
 261     // add_ctype_to_cc_by_range // Encoding out!
 262     public void addCTypeByRange(final int ct, final boolean not, final int sbOut, final int mbr[]) {
 263         final int n = mbr[0];
 264 
 265         if (!not) {
 266             for (int i=0; i<n; i++) {
 267                 for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
 268                     if (j >= sbOut) {
 269                         if (Config.VANILLA) {
 270                             if (j == mbr[i * 2 + 2]) {
 271                                 i++;
 272                             } else if (j > mbr[i * 2 + 1]) {
 273                                 addCodeRangeToBuf(j, mbr[i * 2 + 2]);
 274                                 i++;
 275                             }
 276                         } else {
 277                             if (j >= mbr[i * 2 + 1]) {
 278                                 addCodeRangeToBuf(j, mbr[i * 2 + 2]);
 279                                 i++;
 280                             }
 281                         }
 282                         // !goto sb_end!, remove duplication!
 283                         for (; i<n; i++) {
 284                             addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
 285                         }
 286                         return;
 287                     }
 288                     bs.set(j);
 289                 }
 290             }
 291             // !sb_end:!
 292             for (int i=0; i<n; i++) {
 293                 addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
 294             }
 295 
 296         } else {
 297             int prev = 0;
 298 
 299             for (int i=0; i<n; i++) {
 300                 for (int j=prev; j < mbr[2 * i + 1]; j++) {
 301                     if (j >= sbOut) {
 302                         // !goto sb_end2!, remove duplication
 303                         prev = sbOut;
 304                         for (i=0; i<n; i++) {
 305                             if (prev < mbr[2 * i + 1]) {
 306                                 addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
 307                             }
 308                             prev = mbr[i * 2 + 2] + 1;
 309                         }
 310                         if (prev < 0x7fffffff/*!!!*/) {
 311                             addCodeRangeToBuf(prev, 0x7fffffff);
 312                         }
 313                         return;
 314                     }
 315                     bs.set(j);
 316                 }
 317                 prev = mbr[2 * i + 2] + 1;
 318             }
 319 
 320             for (int j=prev; j<sbOut; j++) {
 321                 bs.set(j);
 322             }
 323 
 324             // !sb_end2:!
 325             prev = sbOut;
 326             for (int i=0; i<n; i++) {
 327                 if (prev < mbr[2 * i + 1]) {
 328                     addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
 329                 }
 330                 prev = mbr[i * 2 + 2] + 1;
 331             }
 332             if (prev < 0x7fffffff/*!!!*/) {
 333                 addCodeRangeToBuf(prev, 0x7fffffff);
 334             }
 335         }
 336     }
 337 
 338     public void addCType(final int ctp, final boolean not, final ScanEnvironment env, final IntHolder sbOut) {
 339         int ct = ctp;
 340         if (Config.NON_UNICODE_SDW) {
 341             switch (ct) {
 342             case CharacterType.D:
 343             case CharacterType.S:
 344             case CharacterType.W:
 345                 ct ^= CharacterType.SPECIAL_MASK;
 346 
 347                 if (env.syntax == Syntax.JAVASCRIPT && ct == CharacterType.SPACE) {
 348                     // \s in JavaScript includes unicode characters.
 349                     break;
 350                 }
 351 
 352                 if (not) {
 353                     for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
 354                         // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
 355                         if ((AsciiCtypeTable[c] & (1 << ct)) == 0) {
 356                             bs.set(c);
 357                         }
 358                     }
 359                     addAllMultiByteRange();
 360                 } else {
 361                     for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
 362                         // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
 363                         if ((AsciiCtypeTable[c] & (1 << ct)) != 0) {
 364                             bs.set(c);
 365                         }
 366                     }
 367                 }
 368                 return;
 369             default:
 370                 break;
 371             }
 372         }
 373 
 374         final int[] ranges = EncodingHelper.ctypeCodeRange(ct, sbOut);
 375         if (ranges != null) {
 376             addCTypeByRange(ct, not, sbOut.value, ranges);
 377             return;
 378         }
 379 
 380         switch(ct) {
 381         case CharacterType.ALPHA:
 382         case CharacterType.BLANK:
 383         case CharacterType.CNTRL:
 384         case CharacterType.DIGIT:
 385         case CharacterType.LOWER:
 386         case CharacterType.PUNCT:
 387         case CharacterType.SPACE:
 388         case CharacterType.UPPER:
 389         case CharacterType.XDIGIT:
 390         case CharacterType.ASCII:
 391         case CharacterType.ALNUM:
 392             if (not) {
 393                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 394                     if (!EncodingHelper.isCodeCType(c, ct)) {
 395                         bs.set(c);
 396                     }
 397                 }
 398                 addAllMultiByteRange();
 399             } else {
 400                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 401                     if (EncodingHelper.isCodeCType(c, ct)) {
 402                         bs.set(c);
 403                     }
 404                 }
 405             }
 406             break;
 407 
 408         case CharacterType.GRAPH:
 409         case CharacterType.PRINT:
 410             if (not) {
 411                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 412                     if (!EncodingHelper.isCodeCType(c, ct)) {
 413                         bs.set(c);
 414                     }
 415                 }
 416             } else {
 417                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 418                     if (EncodingHelper.isCodeCType(c, ct)) {
 419                         bs.set(c);
 420                     }
 421                 }
 422                 addAllMultiByteRange();
 423             }
 424             break;
 425 
 426         case CharacterType.WORD:
 427             if (!not) {
 428                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 429                     if (EncodingHelper.isWord(c)) {
 430                         bs.set(c);
 431                     }
 432                 }
 433 
 434                 addAllMultiByteRange();
 435             } else {
 436                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 437                     if (!EncodingHelper.isWord(c)) {
 438                         bs.set(c);
 439                     }
 440                 }
 441             }
 442             break;
 443 
 444         default:
 445             throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
 446         } // switch
 447     }
 448 
 449     public static final class CCStateArg {
 450         public int v;
 451         public int vs;
 452         public boolean vsIsRaw;
 453         public boolean vIsRaw;
 454         public CCVALTYPE inType;
 455         public CCVALTYPE type;
 456         public CCSTATE state;
 457     }
 458 
 459     public void nextStateClass(final CCStateArg arg, final ScanEnvironment env) {
 460         if (arg.state == CCSTATE.RANGE) {
 461             throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
 462         }
 463 
 464         if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
 465             if (arg.type == CCVALTYPE.SB) {
 466                 bs.set(arg.vs);
 467             } else if (arg.type == CCVALTYPE.CODE_POINT) {
 468                 addCodeRange(env, arg.vs, arg.vs);
 469             }
 470         }
 471         arg.state = CCSTATE.VALUE;
 472         arg.type = CCVALTYPE.CLASS;
 473     }
 474 
 475     public void nextStateValue(final CCStateArg arg, final ScanEnvironment env) {
 476 
 477         switch(arg.state) {
 478         case VALUE:
 479             if (arg.type == CCVALTYPE.SB) {
 480                 if (arg.vs > 0xff) {
 481                     throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
 482                 }
 483                 bs.set(arg.vs);
 484             } else if (arg.type == CCVALTYPE.CODE_POINT) {
 485                 addCodeRange(env, arg.vs, arg.vs);
 486             }
 487             break;
 488 
 489         case RANGE:
 490             if (arg.inType == arg.type) {
 491                 if (arg.inType == CCVALTYPE.SB) {
 492                     if (arg.vs > 0xff || arg.v > 0xff) {
 493                         throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
 494                     }
 495 
 496                     if (arg.vs > arg.v) {
 497                         if (env.syntax.allowEmptyRangeInCC()) {
 498                             // goto ccs_range_end
 499                             arg.state = CCSTATE.COMPLETE;
 500                             break;
 501                         }
 502                         throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
 503                     }
 504                     bs.setRange(arg.vs, arg.v);
 505                 } else {
 506                     addCodeRange(env, arg.vs, arg.v);
 507                 }
 508             } else {
 509                 if (arg.vs > arg.v) {
 510                     if (env.syntax.allowEmptyRangeInCC()) {
 511                         // goto ccs_range_end
 512                         arg.state = CCSTATE.COMPLETE;
 513                         break;
 514                     }
 515                     throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
 516                 }
 517                 bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
 518                 addCodeRange(env, arg.vs, arg.v);
 519             }
 520             // ccs_range_end:
 521             arg.state = CCSTATE.COMPLETE;
 522             break;
 523 
 524         case COMPLETE:
 525         case START:
 526             arg.state = CCSTATE.VALUE;
 527             break;
 528 
 529         default:
 530             break;
 531 
 532         } // switch
 533 
 534         arg.vsIsRaw = arg.vIsRaw;
 535         arg.vs = arg.v;
 536         arg.type = arg.inType;
 537     }
 538 
 539     // onig_is_code_in_cc_len
 540     public boolean isCodeInCCLength(final int code) {
 541         boolean found;
 542 
 543         if (code > 0xff) {
 544             found = mbuf != null && mbuf.isInCodeRange(code);
 545         } else {
 546             found = bs.at(code);
 547         }
 548 
 549         if (isNot()) {
 550             return !found;
 551         }
 552         return found;
 553     }
 554 
 555     // onig_is_code_in_cc
 556     public boolean isCodeInCC(final int code) {
 557          return isCodeInCCLength(code);
 558     }
 559 
 560     public void setNot() {
 561         flags |= FLAG_NCCLASS_NOT;
 562     }
 563 
 564     public void clearNot() {
 565         flags &= ~FLAG_NCCLASS_NOT;
 566     }
 567 
 568     public boolean isNot() {
 569         return (flags & FLAG_NCCLASS_NOT) != 0;
 570     }
 571 
 572     public void setShare() {
 573         flags |= FLAG_NCCLASS_SHARE;
 574     }
 575 
 576     public void clearShare() {
 577         flags &= ~FLAG_NCCLASS_SHARE;
 578     }
 579 
 580     public boolean isShare() {
 581         return (flags & FLAG_NCCLASS_SHARE) != 0;
 582     }
 583 
 584 }