1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 package jdk.nashorn.internal.joni;
  21 
  22 import static jdk.nashorn.internal.joni.BitStatus.bsOnAtSimple;
  23 import static jdk.nashorn.internal.joni.BitStatus.bsOnOff;
  24 import static jdk.nashorn.internal.joni.Option.isDontCaptureGroup;
  25 import static jdk.nashorn.internal.joni.Option.isIgnoreCase;
  26 
  27 import jdk.nashorn.internal.joni.encoding.CharacterType;
  28 import jdk.nashorn.internal.joni.encoding.PosixBracket;
  29 import jdk.nashorn.internal.joni.encoding.Ptr;
  30 import jdk.nashorn.internal.joni.ast.AnchorNode;
  31 import jdk.nashorn.internal.joni.ast.AnyCharNode;
  32 import jdk.nashorn.internal.joni.ast.BackRefNode;
  33 import jdk.nashorn.internal.joni.ast.CClassNode;
  34 import jdk.nashorn.internal.joni.ast.CTypeNode;
  35 import jdk.nashorn.internal.joni.ast.CallNode;
  36 import jdk.nashorn.internal.joni.ast.ConsAltNode;
  37 import jdk.nashorn.internal.joni.ast.EncloseNode;
  38 import jdk.nashorn.internal.joni.ast.Node;
  39 import jdk.nashorn.internal.joni.ast.QuantifierNode;
  40 import jdk.nashorn.internal.joni.ast.StringNode;
  41 import jdk.nashorn.internal.joni.ast.CClassNode.CCStateArg;
  42 import jdk.nashorn.internal.joni.constants.AnchorType;
  43 import jdk.nashorn.internal.joni.constants.CCSTATE;
  44 import jdk.nashorn.internal.joni.constants.CCVALTYPE;
  45 import jdk.nashorn.internal.joni.constants.EncloseType;
  46 import jdk.nashorn.internal.joni.constants.NodeType;
  47 import jdk.nashorn.internal.joni.constants.TokenType;
  48 
  49 class Parser extends Lexer {
  50 
  51     protected final Regex regex;
  52     protected Node root;
  53 
  54     protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
  55                               // this approach will not affect recursive calls
  56 
  57     protected Parser(ScanEnvironment env, char[] chars, int p, int end) {
  58         super(env, chars, p, end);
  59         regex = env.reg;
  60     }
  61 
  62     // onig_parse_make_tree
  63     protected final Node parse() {
  64         root = parseRegexp();
  65         regex.numMem = env.numMem;
  66         return root;
  67     }
  68 
  69     private static final int POSIX_BRACKET_NAME_MIN_LEN            = 4;
  70     private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH      = 20;
  71     private static final char BRACKET_END[]                        = ":]".toCharArray();
  72     private boolean parsePosixBracket(CClassNode cc) {
  73         mark();
  74 
  75         boolean not;
  76         if (peekIs('^')) {
  77             inc();
  78             not = true;
  79         } else {
  80             not = false;
  81         }
  82         if (stop - p >= POSIX_BRACKET_NAME_MIN_LEN + 3) { // else goto not_posix_bracket
  83             char[][] pbs = PosixBracket.PBSNamesLower;
  84             for (int i=0; i<pbs.length; i++) {
  85                 char[] name = pbs[i];
  86                 // hash lookup here ?
  87                 if (EncodingHelper.strNCmp(chars, p, stop, name, 0, name.length) == 0) {
  88                     p += name.length;
  89                     if (EncodingHelper.strNCmp(chars, p, stop, BRACKET_END, 0, BRACKET_END.length) != 0) {
  90                         newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
  91                     }
  92                     cc.addCType(PosixBracket.PBSValues[i], not, env, this);
  93                     inc();
  94                     inc();
  95                     return false;
  96                 }
  97             }
  98 
  99         }
 100 
 101         // not_posix_bracket:
 102         c = 0;
 103         int i= 0;
 104         while (left() && ((c=peek()) != ':') && c != ']') {
 105             inc();
 106             if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
 107         }
 108 
 109         if (c == ':' && left()) {
 110             inc();
 111             if (left()) {
 112                 fetch();
 113                 if (c == ']') newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
 114             }
 115         }
 116         restore();
 117         return true; /* 1: is not POSIX bracket, but no error. */
 118     }
 119 
 120     private CClassNode parseCharProperty() {
 121         int ctype = fetchCharPropertyToCType();
 122         CClassNode n = new CClassNode();
 123         n.addCType(ctype, false, env, this);
 124         if (token.getPropNot()) n.setNot();
 125         return n;
 126     }
 127 
 128     private boolean codeExistCheck(int code, boolean ignoreEscaped) {
 129         mark();
 130 
 131         boolean inEsc = false;
 132         while (left()) {
 133             if (ignoreEscaped && inEsc) {
 134                 inEsc = false;
 135             } else {
 136                 fetch();
 137                 if (c == code) {
 138                     restore();
 139                     return true;
 140                 }
 141                 if (c == syntax.metaCharTable.esc) inEsc = true;
 142             }
 143         }
 144 
 145         restore();
 146         return false;
 147     }
 148 
 149     private CClassNode parseCharClass() {
 150         fetchTokenInCC();
 151 
 152         final boolean neg;
 153         if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
 154             neg = true;
 155             fetchTokenInCC();
 156         } else {
 157             neg = false;
 158         }
 159 
 160         if (token.type == TokenType.CC_CLOSE) {
 161             if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
 162             env.ccEscWarn("]");
 163             token.type = TokenType.CHAR; /* allow []...] */
 164         }
 165 
 166         CClassNode cc = new CClassNode();
 167         CClassNode prevCC = null;
 168         CClassNode workCC = null;
 169 
 170         CCStateArg arg = new CCStateArg();
 171 
 172         boolean andStart = false;
 173         arg.state = CCSTATE.START;
 174 
 175         while (token.type != TokenType.CC_CLOSE) {
 176             boolean fetched = false;
 177 
 178             switch (token.type) {
 179 
 180             case CHAR:
 181                 if (token.getC() > 0xff) {
 182                     arg.inType = CCVALTYPE.CODE_POINT;
 183                 } else {
 184                     arg.inType = CCVALTYPE.SB; // sb_char:
 185                 }
 186                 arg.v = token.getC();
 187                 arg.vIsRaw = false;
 188                 parseCharClassValEntry2(cc, arg); // goto val_entry2
 189                 break;
 190 
 191             case RAW_BYTE:
 192                 if (token.base != 0) { /* tok->base != 0 : octal or hexadec. */
 193                     byte[] buf = new byte[4];
 194                     int psave = p;
 195                     int base = token.base;
 196                     buf[0] = (byte)token.getC();
 197                     int i;
 198                     for (i=1; i<4; i++) {
 199                         fetchTokenInCC();
 200                         if (token.type != TokenType.RAW_BYTE || token.base != base) {
 201                             fetched = true;
 202                             break;
 203                         }
 204                         buf[i] = (byte)token.getC();
 205                     }
 206 
 207                     if (i == 1) {
 208                         arg.v = buf[0] & 0xff;
 209                         arg.inType = CCVALTYPE.SB; // goto raw_single
 210                     } else {
 211                         arg.v = EncodingHelper.mbcToCode(buf, 0, buf.length);
 212                         arg.inType = CCVALTYPE.CODE_POINT;
 213                     }
 214                 } else {
 215                     arg.v = token.getC();
 216                     arg.inType = CCVALTYPE.SB; // raw_single:
 217                 }
 218                 arg.vIsRaw = true;
 219                 parseCharClassValEntry2(cc, arg); // goto val_entry2
 220                 break;
 221 
 222             case CODE_POINT:
 223                 arg.v = token.getCode();
 224                 arg.vIsRaw = true;
 225                 parseCharClassValEntry(cc, arg); // val_entry:, val_entry2
 226                 break;
 227 
 228             case POSIX_BRACKET_OPEN:
 229                 if (parsePosixBracket(cc)) { /* true: is not POSIX bracket */
 230                     env.ccEscWarn("[");
 231                     p = token.backP;
 232                     arg.v = token.getC();
 233                     arg.vIsRaw = false;
 234                     parseCharClassValEntry(cc, arg); // goto val_entry
 235                     break;
 236                 }
 237                 cc.nextStateClass(arg, env); // goto next_class
 238                 break;
 239 
 240             case CHAR_TYPE:
 241                 cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
 242                 cc.nextStateClass(arg, env); // next_class:
 243                 break;
 244 
 245             case CHAR_PROPERTY:
 246                 int ctype = fetchCharPropertyToCType();
 247                 cc.addCType(ctype, token.getPropNot(), env, this);
 248                 cc.nextStateClass(arg, env); // goto next_class
 249                 break;
 250 
 251             case CC_RANGE:
 252                 if (arg.state == CCSTATE.VALUE) {
 253                     fetchTokenInCC();
 254                     fetched = true;
 255                     if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
 256                         parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry;
 257                         break;
 258                     } else if (token.type == TokenType.CC_AND) {
 259                         env.ccEscWarn("-");
 260                         parseCharClassRangeEndVal(cc, arg); // goto range_end_val
 261                         break;
 262                     }
 263                     arg.state = CCSTATE.RANGE;
 264                 } else if (arg.state == CCSTATE.START) {
 265                     arg.v = token.getC(); /* [-xa] is allowed */
 266                     arg.vIsRaw = false;
 267                     fetchTokenInCC();
 268                     fetched = true;
 269                     if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */
 270                     parseCharClassValEntry(cc, arg); // goto val_entry
 271                     break;
 272                 } else if (arg.state == CCSTATE.RANGE) {
 273                     env.ccEscWarn("-");
 274                     parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */
 275                     break;
 276                 } else { /* CCS_COMPLETE */
 277                     fetchTokenInCC();
 278                     fetched = true;
 279                     if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
 280                         parseCharClassRangeEndVal(cc, arg); // goto range_end_val
 281                         break;
 282                     } else if (token.type == TokenType.CC_AND) {
 283                         env.ccEscWarn("-");
 284                         parseCharClassRangeEndVal(cc, arg); // goto range_end_val
 285                         break;
 286                     }
 287 
 288                     if (syntax.allowDoubleRangeOpInCC()) {
 289                         env.ccEscWarn("-");
 290                         parseCharClassSbChar(cc, arg); // goto sb_char /* [0-9-a] is allowed as [0-9\-a] */
 291                         break;
 292                     }
 293                     newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
 294                 }
 295                 break;
 296 
 297             case CC_CC_OPEN: /* [ */
 298                 CClassNode acc = parseCharClass();
 299                 cc.or(acc);
 300                 break;
 301 
 302             case CC_AND:     /* && */
 303                 if (arg.state == CCSTATE.VALUE) {
 304                     arg.v = 0; // ??? safe v ?
 305                     arg.vIsRaw = false;
 306                     cc.nextStateValue(arg, env);
 307                 }
 308                 /* initialize local variables */
 309                 andStart = true;
 310                 arg.state = CCSTATE.START;
 311                 if (prevCC != null) {
 312                     prevCC.and(cc);
 313                 } else {
 314                     prevCC = cc;
 315                     if (workCC == null) workCC = new CClassNode();
 316                     cc = workCC;
 317                 }
 318                 cc.clear();
 319                 break;
 320 
 321             case EOT:
 322                 newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
 323 
 324             default:
 325                 newInternalException(ERR_PARSER_BUG);
 326             } // switch
 327 
 328             if (!fetched) fetchTokenInCC();
 329 
 330         } // while
 331 
 332         if (arg.state == CCSTATE.VALUE) {
 333             arg.v = 0; // ??? safe v ?
 334             arg.vIsRaw = false;
 335             cc.nextStateValue(arg, env);
 336         }
 337 
 338         if (prevCC != null) {
 339             prevCC.and(cc);
 340             cc = prevCC;
 341         }
 342 
 343         if (neg) {
 344             cc.setNot();
 345         } else {
 346             cc.clearNot();
 347         }
 348 
 349         if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
 350             if (!cc.isEmpty()) {
 351                 final int NEW_LINE = 0x0a;
 352                 if (EncodingHelper.isNewLine(NEW_LINE)) {
 353                     cc.bs.set(NEW_LINE);
 354                 }
 355             }
 356         }
 357 
 358         return cc;
 359     }
 360 
 361     private void parseCharClassSbChar(CClassNode cc, CCStateArg arg) {
 362         arg.inType = CCVALTYPE.SB;
 363         arg.v = token.getC();
 364         arg.vIsRaw = false;
 365         parseCharClassValEntry2(cc, arg); // goto val_entry2
 366     }
 367 
 368     private void parseCharClassRangeEndVal(CClassNode cc, CCStateArg arg) {
 369         arg.v = '-';
 370         arg.vIsRaw = false;
 371         parseCharClassValEntry(cc, arg); // goto val_entry
 372     }
 373 
 374     private void parseCharClassValEntry(CClassNode cc, CCStateArg arg) {
 375         arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
 376         parseCharClassValEntry2(cc, arg); // val_entry2:
 377     }
 378 
 379     private void parseCharClassValEntry2(CClassNode cc, CCStateArg arg) {
 380         cc.nextStateValue(arg, env);
 381     }
 382 
 383     private Node parseEnclose(TokenType term) {
 384         Node node = null;
 385 
 386         if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
 387 
 388         int option = env.option;
 389 
 390         if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
 391             inc();
 392             if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
 393 
 394             boolean listCapture = false;
 395 
 396             fetch();
 397             switch(c) {
 398             case ':':  /* (?:...) grouping only */
 399                 fetchToken(); // group:
 400                 node = parseSubExp(term);
 401                 returnCode = 1; /* group */
 402                 return node;
 403             case '=':
 404                 node = new AnchorNode(AnchorType.PREC_READ);
 405                 break;
 406             case '!':  /*         preceding read */
 407                 node = new AnchorNode(AnchorType.PREC_READ_NOT);
 408                 break;
 409             case '>':  /* (?>...) stop backtrack */
 410                 node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
 411                 break;
 412             case '\'':
 413                 if (Config.USE_NAMED_GROUP) {
 414                     if (syntax.op2QMarkLtNamedGroup()) {
 415                         listCapture = false; // goto named_group1
 416                         node = parseEncloseNamedGroup2(listCapture);
 417                         break;
 418                     } else {
 419                         newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 420                     }
 421                 } // USE_NAMED_GROUP
 422                 break;
 423             case '<':  /* look behind (?<=...), (?<!...) */
 424                 fetch();
 425                 if (c == '=') {
 426                     node = new AnchorNode(AnchorType.LOOK_BEHIND);
 427                 } else if (c == '!') {
 428                     node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
 429                 } else {
 430                     if (Config.USE_NAMED_GROUP) {
 431                         if (syntax.op2QMarkLtNamedGroup()) {
 432                             unfetch();
 433                             c = '<';
 434 
 435                             listCapture = false; // named_group1:
 436                             node = parseEncloseNamedGroup2(listCapture); // named_group2:
 437                             break;
 438                         } else {
 439                             newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 440                         }
 441 
 442                     } else { // USE_NAMED_GROUP
 443                         newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 444                     } // USE_NAMED_GROUP
 445                 }
 446                 break;
 447             case '@':
 448                 if (syntax.op2AtMarkCaptureHistory()) {
 449                     if (Config.USE_NAMED_GROUP) {
 450                         if (syntax.op2QMarkLtNamedGroup()) {
 451                             fetch();
 452                             if (c == '<' || c == '\'') {
 453                                 listCapture = true;
 454                                 node = parseEncloseNamedGroup2(listCapture); // goto named_group2 /* (?@<name>...) */
 455                             }
 456                             unfetch();
 457                         }
 458                     } // USE_NAMED_GROUP
 459                     EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory
 460                     int num = env.addMemEntry();
 461                     if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
 462                     en.regNum = num;
 463                     node = en;
 464                 } else {
 465                     newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 466                 }
 467                 break;
 468 
 469             // case 'p': #ifdef USE_POSIXLINE_OPTION
 470             case '-':
 471             case 'i':
 472             case 'm':
 473             case 's':
 474             case 'x':
 475                 boolean neg = false;
 476                 while (true) {
 477                     switch(c) {
 478                     case ':':
 479                     case ')':
 480                         break;
 481                     case '-':
 482                         neg = true;
 483                         break;
 484                     case 'x':
 485                         option = bsOnOff(option, Option.EXTEND, neg);
 486                         break;
 487                     case 'i':
 488                         option = bsOnOff(option, Option.IGNORECASE, neg);
 489                         break;
 490                     case 's':
 491                         if (syntax.op2OptionPerl()) {
 492                             option = bsOnOff(option, Option.MULTILINE, neg);
 493                         } else {
 494                             newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 495                         }
 496                         break;
 497                     case 'm':
 498                         if (syntax.op2OptionPerl()) {
 499                             option = bsOnOff(option, Option.SINGLELINE, !neg);
 500                         } else if (syntax.op2OptionRuby()) {
 501                             option = bsOnOff(option, Option.MULTILINE, neg);
 502                         } else {
 503                             newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 504                         }
 505                         break;
 506                     // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
 507                     // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
 508                     // break;
 509 
 510                     default:
 511                         newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 512                     } // switch
 513 
 514                     if (c == ')') {
 515                         EncloseNode en = new EncloseNode(option, 0); // node_new_option
 516                         node = en;
 517                         returnCode = 2; /* option only */
 518                         return node;
 519                     } else if (c == ':') {
 520                         int prev = env.option;
 521                         env.option = option;
 522                         fetchToken();
 523                         Node target = parseSubExp(term);
 524                         env.option = prev;
 525                         EncloseNode en = new EncloseNode(option, 0); // node_new_option
 526                         en.setTarget(target);
 527                         node = en;
 528                         returnCode = 0;
 529                         return node;
 530                     }
 531                     if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
 532                     fetch();
 533                 } // while
 534 
 535             default:
 536                 newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 537             } // switch
 538 
 539         } else {
 540             if (isDontCaptureGroup(env.option)) {
 541                 fetchToken(); // goto group
 542                 node = parseSubExp(term);
 543                 returnCode = 1; /* group */
 544                 return node;
 545             }
 546             EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory
 547             int num = env.addMemEntry();
 548             en.regNum = num;
 549             node = en;
 550         }
 551 
 552         fetchToken();
 553         Node target = parseSubExp(term);
 554 
 555         if (node.getType() == NodeType.ANCHOR) {
 556             AnchorNode an = (AnchorNode) node;
 557             an.setTarget(target);
 558         } else {
 559             EncloseNode en = (EncloseNode)node;
 560             en.setTarget(target);
 561             if (en.type == EncloseType.MEMORY) {
 562                 /* Don't move this to previous of parse_subexp() */
 563                 env.setMemNode(en.regNum, node);
 564             }
 565         }
 566         returnCode = 0;
 567         return node; // ??
 568     }
 569 
 570     private Node parseEncloseNamedGroup2(boolean listCapture) {
 571         int nm = p;
 572         int num = fetchName(c, false);
 573         int nameEnd = value;
 574         num = env.addMemEntry();
 575         if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
 576 
 577         regex.nameAdd(chars, nm, nameEnd, num, syntax);
 578         EncloseNode en = new EncloseNode(env.option, true); // node_new_enclose_memory
 579         en.regNum = num;
 580 
 581         Node node = en;
 582 
 583         if (listCapture) env.captureHistory = bsOnAtSimple(env.captureHistory, num);
 584         env.numNamed++;
 585         return node;
 586     }
 587 
 588     private int findStrPosition(int[]s, int n, int from, int to, Ptr nextChar) {
 589         int x;
 590         int q;
 591         int p = from;
 592         int i = 0;
 593         while (p < to) {
 594             x = chars[p];
 595             q = p + 1;
 596             if (x == s[0]) {
 597                 for (i=1; i<n && q<to; i++) {
 598                     x = chars[q];
 599                     if (x != s[i]) break;
 600                     q++;
 601                 }
 602                 if (i >= n) {
 603                     if (chars[nextChar.p] != 0) nextChar.p = q; // we may need zero term semantics...
 604                     return p;
 605                 }
 606             }
 607             p = q;
 608         }
 609         return -1;
 610     }
 611 
 612     private Node parseExp(TokenType term) {
 613         if (token.type == term) return StringNode.EMPTY; // goto end_of_token
 614 
 615         Node node = null;
 616         boolean group = false;
 617 
 618         switch(token.type) {
 619         case ALT:
 620         case EOT:
 621             return StringNode.EMPTY; // end_of_token:, node_new_empty
 622 
 623         case SUBEXP_OPEN:
 624             node = parseEnclose(TokenType.SUBEXP_CLOSE);
 625             if (returnCode == 1) {
 626                 group = true;
 627             } else if (returnCode == 2) { /* option only */
 628                 int prev = env.option;
 629                 EncloseNode en = (EncloseNode)node;
 630                 env.option = en.option;
 631                 fetchToken();
 632                 Node target = parseSubExp(term);
 633                 env.option = prev;
 634                 en.setTarget(target);
 635                 return node;
 636             }
 637             break;
 638         case SUBEXP_CLOSE:
 639             if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
 640             if (token.escaped) {
 641                 return parseExpTkRawByte(group); // goto tk_raw_byte
 642             } else {
 643                 return parseExpTkByte(group); // goto tk_byte
 644             }
 645         case STRING:
 646             return parseExpTkByte(group); // tk_byte:
 647 
 648         case RAW_BYTE:
 649             return parseExpTkRawByte(group); // tk_raw_byte:
 650         case CODE_POINT:
 651             char[] buf = new char[] {(char)token.getCode()};
 652             // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
 653             node = new StringNode(buf, 0, 1);
 654             break;
 655 
 656         case QUOTE_OPEN:
 657             int[] endOp = new int[] {syntax.metaCharTable.esc, 'E'};
 658             int qstart = p;
 659             Ptr nextChar = new Ptr();
 660             int qend = findStrPosition(endOp, endOp.length, qstart, stop, nextChar);
 661             if (qend == -1) nextChar.p = qend = stop;
 662             node = new StringNode(chars, qstart, qend);
 663             p = nextChar.p;
 664             break;
 665 
 666         case CHAR_TYPE:
 667             switch(token.getPropCType()) {
 668             case CharacterType.D:
 669             case CharacterType.S:
 670             case CharacterType.W:
 671                 if (Config.NON_UNICODE_SDW) {
 672                     CClassNode cc = new CClassNode();
 673                     cc.addCType(token.getPropCType(), false, env, this);
 674                     if (token.getPropNot()) cc.setNot();
 675                     node = cc;
 676                 }
 677                 break;
 678 
 679             case CharacterType.WORD:
 680                 node = new CTypeNode(token.getPropCType(), token.getPropNot());
 681                 break;
 682 
 683             case CharacterType.SPACE:
 684             case CharacterType.DIGIT:
 685             case CharacterType.XDIGIT:
 686                 // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
 687                 CClassNode ccn = new CClassNode();
 688                 ccn.addCType(token.getPropCType(), false, env, this);
 689                 if (token.getPropNot()) ccn.setNot();
 690                 node = ccn;
 691                 break;
 692 
 693             default:
 694                 newInternalException(ERR_PARSER_BUG);
 695 
 696             } // inner switch
 697             break;
 698 
 699         case CHAR_PROPERTY:
 700             node = parseCharProperty();
 701             break;
 702 
 703         case CC_CC_OPEN:
 704             CClassNode cc = parseCharClass();
 705             node = cc;
 706             if (isIgnoreCase(env.option)) {
 707                 ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
 708                 EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
 709 
 710                 if (arg.altRoot != null) {
 711                     node = ConsAltNode.newAltNode(node, arg.altRoot);
 712                 }
 713             }
 714             break;
 715 
 716         case ANYCHAR:
 717             node = new AnyCharNode();
 718             break;
 719 
 720         case ANYCHAR_ANYTIME:
 721             node = new AnyCharNode();
 722             QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
 723             qn.setTarget(node);
 724             node = qn;
 725             break;
 726 
 727         case BACKREF:
 728             int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
 729             node = new BackRefNode(token.getBackrefNum(),
 730                             backRefs,
 731                             token.getBackrefByName(),
 732                             token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
 733                             token.getBackrefLevel(),      // ...
 734                             env);
 735 
 736             break;
 737 
 738         case CALL:
 739             if (Config.USE_SUBEXP_CALL) {
 740                 int gNum = token.getCallGNum();
 741 
 742                 if (gNum < 0) {
 743                     gNum = backrefRelToAbs(gNum);
 744                     if (gNum <= 0) newValueException(ERR_INVALID_BACKREF);
 745                 }
 746                 node = new CallNode(chars, token.getCallNameP(), token.getCallNameEnd(), gNum);
 747                 env.numCall++;
 748             } // USE_SUBEXP_CALL
 749             break;
 750 
 751         case ANCHOR:
 752             node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
 753             break;
 754 
 755         case OP_REPEAT:
 756         case INTERVAL:
 757             if (syntax.contextIndepRepeatOps()) {
 758                 if (syntax.contextInvalidRepeatOps()) {
 759                     newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
 760                 } else {
 761                     node = StringNode.EMPTY; // node_new_empty
 762                 }
 763             } else {
 764                 return parseExpTkByte(group); // goto tk_byte
 765             }
 766             break;
 767 
 768         default:
 769             newInternalException(ERR_PARSER_BUG);
 770         } //switch
 771 
 772         //targetp = node;
 773 
 774         fetchToken(); // re_entry:
 775 
 776         return parseExpRepeat(node, group); // repeat:
 777     }
 778 
 779     private Node parseExpTkByte(boolean group) {
 780         StringNode node = new StringNode(chars, token.backP, p); // tk_byte:
 781         while (true) {
 782             fetchToken();
 783             if (token.type != TokenType.STRING) break;
 784 
 785             if (token.backP == node.end) {
 786                 node.end = p; // non escaped character, remain shared, just increase shared range
 787             } else {
 788                 node.cat(chars, token.backP, p); // non continuous string stream, need to COW
 789             }
 790         }
 791         // targetp = node;
 792         return parseExpRepeat(node, group); // string_end:, goto repeat
 793     }
 794 
 795     private Node parseExpTkRawByte(boolean group) {
 796         // tk_raw_byte:
 797 
 798         // important: we don't use 0xff mask here neither in the compiler
 799         // (in the template string) so we won't have to mask target
 800         // strings when comparing against them in the matcher
 801         StringNode node = new StringNode((char)token.getC());
 802         node.setRaw();
 803 
 804         int len = 1;
 805         while (true) {
 806             if (len >= 1) {
 807                 if (len == 1) {
 808                     fetchToken();
 809                     node.clearRaw();
 810                     // !goto string_end;!
 811                     return parseExpRepeat(node, group);
 812                 }
 813             }
 814 
 815             fetchToken();
 816             if (token.type != TokenType.RAW_BYTE) {
 817                 /* Don't use this, it is wrong for little endian encodings. */
 818                 // USE_PAD_TO_SHORT_BYTE_CHAR ...
 819 
 820                 newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
 821             }
 822 
 823             // important: we don't use 0xff mask here neither in the compiler
 824             // (in the template string) so we won't have to mask target
 825             // strings when comparing against them in the matcher
 826             node.cat((char)token.getC());
 827             len++;
 828         } // while
 829     }
 830 
 831     private Node parseExpRepeat(Node target, boolean group) {
 832         while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
 833             if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
 834 
 835             QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
 836                                                      token.getRepeatUpper(),
 837                                                      token.type == TokenType.INTERVAL);
 838 
 839             qtfr.greedy = token.getRepeatGreedy();
 840             int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd());
 841             Node qn = qtfr;
 842 
 843             if (token.getRepeatPossessive()) {
 844                 EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
 845                 en.setTarget(qn);
 846                 qn = en;
 847             }
 848 
 849             if (ret == 0) {
 850                 target = qn;
 851             } else if (ret == 2) { /* split case: /abc+/ */
 852                 target = ConsAltNode.newListNode(target, null);
 853                 ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
 854 
 855                 fetchToken();
 856                 return parseExpRepeatForCar(target, tmp, group);
 857             }
 858             fetchToken(); // goto re_entry
 859         }
 860         return target;
 861     }
 862 
 863     private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) {
 864         while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
 865             if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
 866 
 867             QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
 868                                                      token.getRepeatUpper(),
 869                                                      token.type == TokenType.INTERVAL);
 870 
 871             qtfr.greedy = token.getRepeatGreedy();
 872             int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd());
 873             Node qn = qtfr;
 874 
 875             if (token.getRepeatPossessive()) {
 876                 EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
 877                 en.setTarget(qn);
 878                 qn = en;
 879             }
 880 
 881             if (ret == 0) {
 882                 target.setCar(qn);
 883             } else if (ret == 2) { /* split case: /abc+/ */
 884                 assert false;
 885             }
 886             fetchToken(); // goto re_entry
 887         }
 888         return top;
 889     }
 890 
 891     private Node parseBranch(TokenType term) {
 892         Node node = parseExp(term);
 893 
 894         if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) {
 895             return node;
 896         } else {
 897             ConsAltNode top = ConsAltNode.newListNode(node, null);
 898             ConsAltNode t = top;
 899 
 900             while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
 901                 node = parseExp(term);
 902                 if (node.getType() == NodeType.LIST) {
 903                     t.setCdr((ConsAltNode)node);
 904                     while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr;
 905 
 906                     t = ((ConsAltNode)node);
 907                 } else {
 908                     t.setCdr(ConsAltNode.newListNode(node, null));
 909                     t = t.cdr;
 910                 }
 911             }
 912             return top;
 913         }
 914     }
 915 
 916     /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
 917     private Node parseSubExp(TokenType term) {
 918         Node node = parseBranch(term);
 919 
 920         if (token.type == term) {
 921             return node;
 922         } else if (token.type == TokenType.ALT) {
 923             ConsAltNode top = ConsAltNode.newAltNode(node, null);
 924             ConsAltNode t = top;
 925             while (token.type == TokenType.ALT) {
 926                 fetchToken();
 927                 node = parseBranch(term);
 928 
 929                 t.setCdr(ConsAltNode.newAltNode(node, null));
 930                 t = t.cdr;
 931             }
 932 
 933             if (token.type != term) parseSubExpError(term);
 934             return top;
 935         } else {
 936             parseSubExpError(term);
 937             return null; //not reached
 938         }
 939     }
 940 
 941     private void parseSubExpError(TokenType term) {
 942         if (term == TokenType.SUBEXP_CLOSE) {
 943             newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
 944         } else {
 945             newInternalException(ERR_PARSER_BUG);
 946         }
 947     }
 948 
 949     private Node parseRegexp() {
 950         fetchToken();
 951         return parseSubExp(TokenType.EOT);
 952     }
 953 }