1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 package jdk.nashorn.internal.runtime.regexp.joni.ast;
  21 
  22 import jdk.nashorn.internal.runtime.regexp.joni.*;
  23 import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
  24 import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
  25 import jdk.nashorn.internal.runtime.regexp.joni.encoding.AsciiTables;
  26 import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
  27 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
  28 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
  29 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
  30 import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
  31 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
  32 
  33 public final class CClassNode extends Node {
  34     private static final int FLAG_NCCLASS_NOT = 1<<0;
  35     private static final int FLAG_NCCLASS_SHARE = 1<<1;
  36 
  37     int flags;
  38     public final BitSet bs = new BitSet();  // conditional creation ?
  39     public CodeRangeBuffer mbuf;            /* multi-byte info or NULL */
  40 
  41     private int ctype;                      // for hashing purposes
  42 
  43     // node_new_cclass
  44     public CClassNode() {}
  45 
  46     public CClassNode(int ctype, boolean not, int sbOut, int[]ranges) {
  47         this(not, sbOut, ranges);
  48         this.ctype = ctype;
  49     }
  50 
  51     public void clear() {
  52         bs.clear();
  53         flags = 0;
  54         mbuf = null;
  55     }
  56 
  57     // node_new_cclass_by_codepoint_range, only used by shared Char Classes
  58     public CClassNode(boolean not, int sbOut, int[]ranges) {
  59         if (not) setNot();
  60         // bs.clear();
  61 
  62         if (sbOut > 0 && ranges != null) {
  63             int n = ranges[0];
  64             for (int i=0; i<n; i++) {
  65                 int from = ranges[i * 2 + 1];
  66                 int to = ranges[i * 2 + 2];
  67                 for (int j=from; j<=to; j++) {
  68                     if (j >= sbOut) {
  69                         setupBuffer(ranges);
  70                         return;
  71                     }
  72                     bs.set(j);
  73                 }
  74             }
  75         }
  76         setupBuffer(ranges);
  77     }
  78 
  79     @Override
  80     public int getType() {
  81         return CCLASS;
  82     }
  83 
  84     @Override
  85     public String getName() {
  86         return "Character Class";
  87     }
  88 
  89     @Override
  90     public boolean equals(Object other) {
  91         if (!(other instanceof CClassNode)) return false;
  92         CClassNode cc = (CClassNode)other;
  93         return ctype == cc.ctype && isNot() == cc.isNot();
  94     }
  95 
  96     @Override
  97     public int hashCode() {
  98         if (Config.USE_SHARED_CCLASS_TABLE) {
  99             int hash = 0;
 100             hash += ctype;
 101             if (isNot()) hash++;
 102             return hash + (hash >> 5);
 103         } else {
 104             return super.hashCode();
 105         }
 106     }
 107 
 108     @Override
 109     public String toString(int level) {
 110         StringBuilder value = new StringBuilder();
 111         value.append("\n  flags: " + flagsToString());
 112         value.append("\n  bs: " + pad(bs, level + 1));
 113         value.append("\n  mbuf: " + pad(mbuf, level + 1));
 114 
 115         return value.toString();
 116     }
 117 
 118     public String flagsToString() {
 119         StringBuilder flags = new StringBuilder();
 120         if (isNot()) flags.append("NOT ");
 121         if (isShare()) flags.append("SHARE ");
 122         return flags.toString();
 123     }
 124 
 125     private void setupBuffer(int[]ranges) {
 126         if (ranges != null) {
 127             if (ranges[0] == 0) return;
 128             mbuf = new CodeRangeBuffer(ranges);
 129         }
 130     }
 131 
 132     public boolean isEmpty() {
 133         return mbuf == null && bs.isEmpty();
 134     }
 135 
 136     public void addCodeRangeToBuf(int from, int to) {
 137         mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
 138     }
 139 
 140     public void addCodeRange(ScanEnvironment env, int from, int to) {
 141         mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
 142     }
 143 
 144     public void addAllMultiByteRange() {
 145         mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf);
 146     }
 147 
 148     public void clearNotFlag() {
 149         if (isNot()) {
 150             bs.invert();
 151 
 152             mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf);
 153             clearNot();
 154         }
 155     }
 156 
 157     // and_cclass
 158     public void and(CClassNode other) {
 159         boolean not1 = isNot();
 160         BitSet bsr1 = bs;
 161         CodeRangeBuffer buf1 = mbuf;
 162         boolean not2 = other.isNot();
 163         BitSet bsr2 = other.bs;
 164         CodeRangeBuffer buf2 = other.mbuf;
 165 
 166         if (not1) {
 167             BitSet bs1 = new BitSet();
 168             bsr1.invertTo(bs1);
 169             bsr1 = bs1;
 170         }
 171 
 172         if (not2) {
 173             BitSet bs2 = new BitSet();
 174             bsr2.invertTo(bs2);
 175             bsr2 = bs2;
 176         }
 177 
 178         bsr1.and(bsr2);
 179 
 180         if (bsr1 != bs) {
 181             bs.copy(bsr1);
 182             bsr1 = bs;
 183         }
 184 
 185         if (not1) {
 186             bs.invert();
 187         }
 188 
 189         CodeRangeBuffer pbuf = null;
 190 
 191         if (not1 && not2) {
 192             pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false);
 193         } else {
 194             pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);
 195 
 196             if (not1) {
 197                 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
 198             }
 199         }
 200         mbuf = pbuf;
 201 
 202     }
 203 
 204     // or_cclass
 205     public void or(CClassNode other) {
 206         boolean not1 = isNot();
 207         BitSet bsr1 = bs;
 208         CodeRangeBuffer buf1 = mbuf;
 209         boolean not2 = other.isNot();
 210         BitSet bsr2 = other.bs;
 211         CodeRangeBuffer buf2 = other.mbuf;
 212 
 213         if (not1) {
 214             BitSet bs1 = new BitSet();
 215             bsr1.invertTo(bs1);
 216             bsr1 = bs1;
 217         }
 218 
 219         if (not2) {
 220             BitSet bs2 = new BitSet();
 221             bsr2.invertTo(bs2);
 222             bsr2 = bs2;
 223         }
 224 
 225         bsr1.or(bsr2);
 226 
 227         if (bsr1 != bs) {
 228             bs.copy(bsr1);
 229             bsr1 = bs;
 230         }
 231 
 232         if (not1) {
 233             bs.invert();
 234         }
 235 
 236         CodeRangeBuffer pbuf = null;
 237         if (not1 && not2) {
 238             pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
 239         } else {
 240             pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2);
 241             if (not1) {
 242                 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
 243             }
 244         }
 245         mbuf = pbuf;
 246     }
 247 
 248     // add_ctype_to_cc_by_range // Encoding out!
 249     public void addCTypeByRange(int ctype, boolean not, int sbOut, int mbr[]) {
 250         int n = mbr[0];
 251 
 252         if (!not) {
 253             for (int i=0; i<n; i++) {
 254                 for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
 255                     if (j >= sbOut) {
 256                         if (Config.VANILLA) {
 257                             if (j == mbr[i * 2 + 2]) {
 258                                 i++;
 259                             } else if (j > mbr[i * 2 + 1]) {
 260                                 addCodeRangeToBuf(j, mbr[i * 2 + 2]);
 261                                 i++;
 262                             }
 263                         } else {
 264                             if (j >= mbr[i * 2 + 1]) {
 265                                 addCodeRangeToBuf(j, mbr[i * 2 + 2]);
 266                                 i++;
 267                             }
 268                         }
 269                         // !goto sb_end!, remove duplication!
 270                         for (; i<n; i++) {
 271                             addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
 272                         }
 273                         return;
 274                     }
 275                     bs.set(j);
 276                 }
 277             }
 278             // !sb_end:!
 279             for (int i=0; i<n; i++) {
 280                 addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
 281             }
 282 
 283         } else {
 284             int prev = 0;
 285 
 286             for (int i=0; i<n; i++) {
 287                 for (int j=prev; j < mbr[2 * i + 1]; j++) {
 288                     if (j >= sbOut) {
 289                         // !goto sb_end2!, remove duplication
 290                         prev = sbOut;
 291                         for (i=0; i<n; i++) {
 292                             if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
 293                             prev = mbr[i * 2 + 2] + 1;
 294                         }
 295                         if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
 296                         return;
 297                     }
 298                     bs.set(j);
 299                 }
 300                 prev = mbr[2 * i + 2] + 1;
 301             }
 302 
 303             for (int j=prev; j<sbOut; j++) {
 304                 bs.set(j);
 305             }
 306 
 307             // !sb_end2:!
 308             prev = sbOut;
 309             for (int i=0; i<n; i++) {
 310                 if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
 311                 prev = mbr[i * 2 + 2] + 1;
 312             }
 313             if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
 314         }
 315     }
 316 
 317     public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
 318         if (Config.NON_UNICODE_SDW) {
 319             switch(ctype) {
 320             case CharacterType.D:
 321             case CharacterType.S:
 322             case CharacterType.W:
 323                 ctype ^= CharacterType.SPECIAL_MASK;
 324 
 325                 if (env.syntax == Syntax.JAVASCRIPT && ctype == CharacterType.SPACE) {
 326                     // \s in JavaScript includes unicode characters.
 327                     break;
 328                 }
 329 
 330                 if (not) {
 331                     for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
 332                         // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
 333                         if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c);
 334                     }
 335                     addAllMultiByteRange();
 336                 } else {
 337                     for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
 338                         // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
 339                         if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c);
 340                     }
 341                 }
 342                 return;
 343             }
 344         }
 345 
 346         int[] ranges = EncodingHelper.ctypeCodeRange(ctype, sbOut);
 347         if (ranges != null) {
 348             addCTypeByRange(ctype, not, sbOut.value, ranges);
 349             return;
 350         }
 351 
 352         switch(ctype) {
 353         case CharacterType.ALPHA:
 354         case CharacterType.BLANK:
 355         case CharacterType.CNTRL:
 356         case CharacterType.DIGIT:
 357         case CharacterType.LOWER:
 358         case CharacterType.PUNCT:
 359         case CharacterType.SPACE:
 360         case CharacterType.UPPER:
 361         case CharacterType.XDIGIT:
 362         case CharacterType.ASCII:
 363         case CharacterType.ALNUM:
 364             if (not) {
 365                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 366                     if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
 367                 }
 368                 addAllMultiByteRange();
 369             } else {
 370                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 371                     if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
 372                 }
 373             }
 374             break;
 375 
 376         case CharacterType.GRAPH:
 377         case CharacterType.PRINT:
 378             if (not) {
 379                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 380                     if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
 381                 }
 382             } else {
 383                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 384                     if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
 385                 }
 386                 addAllMultiByteRange();
 387             }
 388             break;
 389 
 390         case CharacterType.WORD:
 391             if (!not) {
 392                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 393                     if (EncodingHelper.isWord(c)) bs.set(c);
 394                 }
 395 
 396                 addAllMultiByteRange();
 397             } else {
 398                 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
 399                     if (!EncodingHelper.isWord(c)) bs.set(c);
 400                 }
 401             }
 402             break;
 403 
 404         default:
 405             throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
 406         } // switch
 407     }
 408 
 409     public static final class CCStateArg {
 410         public int v;
 411         public int vs;
 412         public boolean vsIsRaw;
 413         public boolean vIsRaw;
 414         public CCVALTYPE inType;
 415         public CCVALTYPE type;
 416         public CCSTATE state;
 417     }
 418 
 419     public void nextStateClass(CCStateArg arg, ScanEnvironment env) {
 420         if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
 421 
 422         if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
 423             if (arg.type == CCVALTYPE.SB) {
 424                 bs.set(arg.vs);
 425             } else if (arg.type == CCVALTYPE.CODE_POINT) {
 426                 addCodeRange(env, arg.vs, arg.vs);
 427             }
 428         }
 429         arg.state = CCSTATE.VALUE;
 430         arg.type = CCVALTYPE.CLASS;
 431     }
 432 
 433     public void nextStateValue(CCStateArg arg, ScanEnvironment env) {
 434 
 435         switch(arg.state) {
 436         case VALUE:
 437             if (arg.type == CCVALTYPE.SB) {
 438                 if (arg.vs > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
 439                 bs.set(arg.vs);
 440             } else if (arg.type == CCVALTYPE.CODE_POINT) {
 441                 addCodeRange(env, arg.vs, arg.vs);
 442             }
 443             break;
 444 
 445         case RANGE:
 446             if (arg.inType == arg.type) {
 447                 if (arg.inType == CCVALTYPE.SB) {
 448                     if (arg.vs > 0xff || arg.v > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
 449 
 450                     if (arg.vs > arg.v) {
 451                         if (env.syntax.allowEmptyRangeInCC()) {
 452                             // goto ccs_range_end
 453                             arg.state = CCSTATE.COMPLETE;
 454                             break;
 455                         } else {
 456                             throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
 457                         }
 458                     }
 459                     bs.setRange(arg.vs, arg.v);
 460                 } else {
 461                     addCodeRange(env, arg.vs, arg.v);
 462                 }
 463             } else {
 464                 if (arg.vs > arg.v) {
 465                     if (env.syntax.allowEmptyRangeInCC()) {
 466                         // goto ccs_range_end
 467                         arg.state = CCSTATE.COMPLETE;
 468                         break;
 469                     } else {
 470                         throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
 471                     }
 472                 }
 473                 bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
 474                 addCodeRange(env, arg.vs, arg.v);
 475             }
 476             // ccs_range_end:
 477             arg.state = CCSTATE.COMPLETE;
 478             break;
 479 
 480         case COMPLETE:
 481         case START:
 482             arg.state = CCSTATE.VALUE;
 483             break;
 484 
 485         default:
 486             break;
 487 
 488         } // switch
 489 
 490         arg.vsIsRaw = arg.vIsRaw;
 491         arg.vs = arg.v;
 492         arg.type = arg.inType;
 493     }
 494 
 495     // onig_is_code_in_cc_len
 496     public boolean isCodeInCCLength(int code) {
 497         boolean found;
 498 
 499         if (code > 0xff) {
 500             if (mbuf == null) {
 501                 found = false;
 502             } else {
 503                 found = EncodingHelper.isInCodeRange(mbuf.getCodeRange(), code);
 504             }
 505         } else {
 506             found = bs.at(code);
 507         }
 508 
 509         if (isNot()) {
 510             return !found;
 511         } else {
 512             return found;
 513         }
 514     }
 515 
 516     // onig_is_code_in_cc
 517     public boolean isCodeInCC(int code) {
 518          return isCodeInCCLength(code);
 519     }
 520 
 521     public void setNot() {
 522         flags |= FLAG_NCCLASS_NOT;
 523     }
 524 
 525     public void clearNot() {
 526         flags &= ~FLAG_NCCLASS_NOT;
 527     }
 528 
 529     public boolean isNot() {
 530         return (flags & FLAG_NCCLASS_NOT) != 0;
 531     }
 532 
 533     public void setShare() {
 534         flags |= FLAG_NCCLASS_SHARE;
 535     }
 536 
 537     public void clearShare() {
 538         flags &= ~FLAG_NCCLASS_SHARE;
 539     }
 540 
 541     public boolean isShare() {
 542         return (flags & FLAG_NCCLASS_SHARE) != 0;
 543     }
 544 
 545 }