1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 package jdk.nashorn.internal.runtime.regexp.joni;
  21 
  22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff;
  23 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
  24 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
  25 import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
  26 import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode;
  27 import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
  28 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
  29 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg;
  30 import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
  31 import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
  32 import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
  33 import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
  34 import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
  35 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
  36 import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
  37 import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
  38 import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
  39 import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
  40 import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
  41 import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
  42 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
  43 import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
  44 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
  45 
  46 class Parser extends Lexer {
  47 
  48     protected final Regex regex;
  49     protected Node root;
  50 
  51     protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
  52                               // this approach will not affect recursive calls
  53 
  54     protected Parser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
  55         super(env, chars, p, end);
  56         regex = env.reg;
  57     }
  58 
  59     // onig_parse_make_tree
  60     protected final Node parse() {
  61         root = parseRegexp();
  62         regex.numMem = env.numMem;
  63         return root;
  64     }
  65 
  66     private boolean codeExistCheck(final int code, final boolean ignoreEscaped) {
  67         mark();
  68 
  69         boolean inEsc = false;
  70         while (left()) {
  71             if (ignoreEscaped && inEsc) {
  72                 inEsc = false;
  73             } else {
  74                 fetch();
  75                 if (c == code) {
  76                     restore();
  77                     return true;
  78                 }
  79                 if (c == syntax.metaCharTable.esc) {
  80                     inEsc = true;
  81                 }
  82             }
  83         }
  84 
  85         restore();
  86         return false;
  87     }
  88 
  89     private CClassNode parseCharClass() {
  90         fetchTokenInCC();
  91 
  92         final boolean neg;
  93         if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
  94             neg = true;
  95             fetchTokenInCC();
  96         } else {
  97             neg = false;
  98         }
  99 
 100         if (token.type == TokenType.CC_CLOSE) {
 101             if (!codeExistCheck(']', true)) {
 102                 throw new SyntaxException(ERR_EMPTY_CHAR_CLASS);
 103             }
 104             env.ccEscWarn("]");
 105             token.type = TokenType.CHAR; /* allow []...] */
 106         }
 107 
 108         CClassNode cc = new CClassNode();
 109         CClassNode prevCC = null;
 110         CClassNode workCC = null;
 111 
 112         final CCStateArg arg = new CCStateArg();
 113 
 114         boolean andStart = false;
 115         arg.state = CCSTATE.START;
 116 
 117         while (token.type != TokenType.CC_CLOSE) {
 118             boolean fetched = false;
 119 
 120             switch (token.type) {
 121 
 122             case CHAR:
 123                 if (token.getC() > 0xff) {
 124                     arg.inType = CCVALTYPE.CODE_POINT;
 125                 } else {
 126                     arg.inType = CCVALTYPE.SB; // sb_char:
 127                 }
 128                 arg.v = token.getC();
 129                 arg.vIsRaw = false;
 130                 parseCharClassValEntry2(cc, arg); // goto val_entry2
 131                 break;
 132 
 133             case RAW_BYTE:
 134                 arg.v = token.getC();
 135                 arg.inType = CCVALTYPE.SB; // raw_single:
 136                 arg.vIsRaw = true;
 137                 parseCharClassValEntry2(cc, arg); // goto val_entry2
 138                 break;
 139 
 140             case CODE_POINT:
 141                 arg.v = token.getCode();
 142                 arg.vIsRaw = true;
 143                 parseCharClassValEntry(cc, arg); // val_entry:, val_entry2
 144                 break;
 145 
 146             case CHAR_TYPE:
 147                 cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
 148                 cc.nextStateClass(arg, env); // next_class:
 149                 break;
 150 
 151             case CC_RANGE:
 152                 if (arg.state == CCSTATE.VALUE) {
 153                     fetchTokenInCC();
 154                     fetched = true;
 155                     if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
 156                         parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry;
 157                         break;
 158                     } else if (token.type == TokenType.CC_AND) {
 159                         env.ccEscWarn("-");
 160                         parseCharClassRangeEndVal(cc, arg); // goto range_end_val
 161                         break;
 162                     }
 163                     arg.state = CCSTATE.RANGE;
 164                 } else if (arg.state == CCSTATE.START) {
 165                     arg.v = token.getC(); /* [-xa] is allowed */
 166                     arg.vIsRaw = false;
 167                     fetchTokenInCC();
 168                     fetched = true;
 169                     if (token.type == TokenType.CC_RANGE || andStart) {
 170                         env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */
 171                     }
 172                     parseCharClassValEntry(cc, arg); // goto val_entry
 173                     break;
 174                 } else if (arg.state == CCSTATE.RANGE) {
 175                     env.ccEscWarn("-");
 176                     parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */
 177                     break;
 178                 } else { /* CCS_COMPLETE */
 179                     fetchTokenInCC();
 180                     fetched = true;
 181                     if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
 182                         parseCharClassRangeEndVal(cc, arg); // goto range_end_val
 183                         break;
 184                     } else if (token.type == TokenType.CC_AND) {
 185                         env.ccEscWarn("-");
 186                         parseCharClassRangeEndVal(cc, arg); // goto range_end_val
 187                         break;
 188                     }
 189 
 190                     if (syntax.allowDoubleRangeOpInCC()) {
 191                         env.ccEscWarn("-");
 192                         arg.inType = CCVALTYPE.SB;
 193                         arg.v = '-';
 194                         arg.vIsRaw = false;
 195                         parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */
 196                         break;
 197                     }
 198                     throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
 199                 }
 200                 break;
 201 
 202             case CC_CC_OPEN: /* [ */
 203                 final CClassNode acc = parseCharClass();
 204                 cc.or(acc);
 205                 break;
 206 
 207             case CC_AND:     /* && */
 208                 if (arg.state == CCSTATE.VALUE) {
 209                     arg.v = 0; // ??? safe v ?
 210                     arg.vIsRaw = false;
 211                     cc.nextStateValue(arg, env);
 212                 }
 213                 /* initialize local variables */
 214                 andStart = true;
 215                 arg.state = CCSTATE.START;
 216                 if (prevCC != null) {
 217                     prevCC.and(cc);
 218                 } else {
 219                     prevCC = cc;
 220                     if (workCC == null) {
 221                         workCC = new CClassNode();
 222                     }
 223                     cc = workCC;
 224                 }
 225                 cc.clear();
 226                 break;
 227 
 228             case EOT:
 229                 throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
 230 
 231             default:
 232                 throw new InternalException(ERR_PARSER_BUG);
 233             } // switch
 234 
 235             if (!fetched) {
 236                 fetchTokenInCC();
 237             }
 238 
 239         } // while
 240 
 241         if (arg.state == CCSTATE.VALUE) {
 242             arg.v = 0; // ??? safe v ?
 243             arg.vIsRaw = false;
 244             cc.nextStateValue(arg, env);
 245         }
 246 
 247         if (prevCC != null) {
 248             prevCC.and(cc);
 249             cc = prevCC;
 250         }
 251 
 252         if (neg) {
 253             cc.setNot();
 254         } else {
 255             cc.clearNot();
 256         }
 257 
 258         if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
 259             if (!cc.isEmpty()) {
 260                 final int NEW_LINE = 0x0a;
 261                 if (EncodingHelper.isNewLine(NEW_LINE)) {
 262                     cc.bs.set(NEW_LINE);
 263                 }
 264             }
 265         }
 266 
 267         return cc;
 268     }
 269 
 270     private void parseCharClassSbChar(final CClassNode cc, final CCStateArg arg) {
 271         arg.inType = CCVALTYPE.SB;
 272         arg.v = token.getC();
 273         arg.vIsRaw = false;
 274         parseCharClassValEntry2(cc, arg); // goto val_entry2
 275     }
 276 
 277     private void parseCharClassRangeEndVal(final CClassNode cc, final CCStateArg arg) {
 278         arg.v = '-';
 279         arg.vIsRaw = false;
 280         parseCharClassValEntry(cc, arg); // goto val_entry
 281     }
 282 
 283     private void parseCharClassValEntry(final CClassNode cc, final CCStateArg arg) {
 284         arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
 285         parseCharClassValEntry2(cc, arg); // val_entry2:
 286     }
 287 
 288     private void parseCharClassValEntry2(final CClassNode cc, final CCStateArg arg) {
 289         cc.nextStateValue(arg, env);
 290     }
 291 
 292     private Node parseEnclose(final TokenType term) {
 293         Node node = null;
 294 
 295         if (!left()) {
 296             throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
 297         }
 298 
 299         int option = env.option;
 300 
 301         if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
 302             inc();
 303             if (!left()) {
 304                 throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
 305             }
 306 
 307             fetch();
 308             switch(c) {
 309             case ':':  /* (?:...) grouping only */
 310                 fetchToken(); // group:
 311                 node = parseSubExp(term);
 312                 returnCode = 1; /* group */
 313                 return node;
 314             case '=':
 315                 node = new AnchorNode(AnchorType.PREC_READ);
 316                 break;
 317             case '!':  /*         preceding read */
 318                 node = new AnchorNode(AnchorType.PREC_READ_NOT);
 319                 break;
 320             case '>':  /* (?>...) stop backtrack */
 321                 node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
 322                 break;
 323             case '\'':
 324                 break;
 325             case '<':  /* look behind (?<=...), (?<!...) */
 326                 fetch();
 327                 if (c == '=') {
 328                     node = new AnchorNode(AnchorType.LOOK_BEHIND);
 329                 } else if (c == '!') {
 330                     node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
 331                 } else {
 332                     throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 333                 }
 334                 break;
 335             case '@':
 336                 if (syntax.op2AtMarkCaptureHistory()) {
 337                     final EncloseNode en = new EncloseNode(); // node_new_enclose_memory
 338                     final int num = env.addMemEntry();
 339                     if (num >= BitStatus.BIT_STATUS_BITS_NUM) {
 340                         throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
 341                     }
 342                     en.regNum = num;
 343                     node = en;
 344                 } else {
 345                     throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 346                 }
 347                 break;
 348 
 349             // case 'p': #ifdef USE_POSIXLINE_OPTION
 350             case '-':
 351             case 'i':
 352             case 'm':
 353             case 's':
 354             case 'x':
 355                 boolean neg = false;
 356                 while (true) {
 357                     switch(c) {
 358                     case ':':
 359                     case ')':
 360                         break;
 361                     case '-':
 362                         neg = true;
 363                         break;
 364                     case 'x':
 365                         option = bsOnOff(option, Option.EXTEND, neg);
 366                         break;
 367                     case 'i':
 368                         option = bsOnOff(option, Option.IGNORECASE, neg);
 369                         break;
 370                     case 's':
 371                         if (syntax.op2OptionPerl()) {
 372                             option = bsOnOff(option, Option.MULTILINE, neg);
 373                         } else {
 374                             throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 375                         }
 376                         break;
 377                     case 'm':
 378                         if (syntax.op2OptionPerl()) {
 379                             option = bsOnOff(option, Option.SINGLELINE, !neg);
 380                         } else if (syntax.op2OptionRuby()) {
 381                             option = bsOnOff(option, Option.MULTILINE, neg);
 382                         } else {
 383                             throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 384                         }
 385                         break;
 386                     // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
 387                     // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
 388                     // break;
 389 
 390                     default:
 391                         throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 392                     } // switch
 393 
 394                     if (c == ')') {
 395                         final EncloseNode en = new EncloseNode(option, 0); // node_new_option
 396                         node = en;
 397                         returnCode = 2; /* option only */
 398                         return node;
 399                     } else if (c == ':') {
 400                         final int prev = env.option;
 401                         env.option = option;
 402                         fetchToken();
 403                         final Node target = parseSubExp(term);
 404                         env.option = prev;
 405                         final EncloseNode en = new EncloseNode(option, 0); // node_new_option
 406                         en.setTarget(target);
 407                         node = en;
 408                         returnCode = 0;
 409                         return node;
 410                     }
 411                     if (!left()) {
 412                         throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
 413                     }
 414                     fetch();
 415                 } // while
 416 
 417             default:
 418                 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
 419             } // switch
 420 
 421         } else {
 422             if (isDontCaptureGroup(env.option)) {
 423                 fetchToken(); // goto group
 424                 node = parseSubExp(term);
 425                 returnCode = 1; /* group */
 426                 return node;
 427             }
 428             final EncloseNode en = new EncloseNode(); // node_new_enclose_memory
 429             final int num = env.addMemEntry();
 430             en.regNum = num;
 431             node = en;
 432         }
 433 
 434         fetchToken();
 435         final Node target = parseSubExp(term);
 436 
 437         if (node.getType() == NodeType.ANCHOR) {
 438             final AnchorNode an = (AnchorNode) node;
 439             an.setTarget(target);
 440         } else {
 441             final EncloseNode en = (EncloseNode)node;
 442             en.setTarget(target);
 443             if (en.type == EncloseType.MEMORY) {
 444                 /* Don't move this to previous of parse_subexp() */
 445                 env.setMemNode(en.regNum, node);
 446             }
 447         }
 448         returnCode = 0;
 449         return node; // ??
 450     }
 451 
 452     private Node parseExp(final TokenType term) {
 453         if (token.type == term)
 454          {
 455             return StringNode.createEmpty(); // goto end_of_token
 456         }
 457 
 458         Node node = null;
 459         boolean group = false;
 460 
 461         switch(token.type) {
 462         case ALT:
 463         case EOT:
 464             return StringNode.createEmpty(); // end_of_token:, node_new_empty
 465 
 466         case SUBEXP_OPEN:
 467             node = parseEnclose(TokenType.SUBEXP_CLOSE);
 468             if (returnCode == 1) {
 469                 group = true;
 470             } else if (returnCode == 2) { /* option only */
 471                 final int prev = env.option;
 472                 final EncloseNode en = (EncloseNode)node;
 473                 env.option = en.option;
 474                 fetchToken();
 475                 final Node target = parseSubExp(term);
 476                 env.option = prev;
 477                 en.setTarget(target);
 478                 return node;
 479             }
 480             break;
 481         case SUBEXP_CLOSE:
 482             if (!syntax.allowUnmatchedCloseSubexp()) {
 483                 throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
 484             }
 485             if (token.escaped) {
 486                 return parseExpTkRawByte(group); // goto tk_raw_byte
 487             }
 488             return parseExpTkByte(group); // goto tk_byte
 489         case STRING:
 490             return parseExpTkByte(group); // tk_byte:
 491 
 492         case RAW_BYTE:
 493             return parseExpTkRawByte(group); // tk_raw_byte:
 494         case CODE_POINT:
 495             final char[] buf = new char[] {(char)token.getCode()};
 496             // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
 497             node = new StringNode(buf, 0, 1);
 498             break;
 499 
 500         case CHAR_TYPE:
 501             switch(token.getPropCType()) {
 502             case CharacterType.D:
 503             case CharacterType.S:
 504             case CharacterType.W:
 505                 if (Config.NON_UNICODE_SDW) {
 506                     final CClassNode cc = new CClassNode();
 507                     cc.addCType(token.getPropCType(), false, env, this);
 508                     if (token.getPropNot()) {
 509                         cc.setNot();
 510                     }
 511                     node = cc;
 512                 }
 513                 break;
 514 
 515             case CharacterType.SPACE:
 516             case CharacterType.DIGIT:
 517             case CharacterType.XDIGIT:
 518                 // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
 519                 final CClassNode ccn = new CClassNode();
 520                 ccn.addCType(token.getPropCType(), false, env, this);
 521                 if (token.getPropNot()) {
 522                     ccn.setNot();
 523                 }
 524                 node = ccn;
 525                 break;
 526 
 527             default:
 528                 throw new InternalException(ERR_PARSER_BUG);
 529 
 530             } // inner switch
 531             break;
 532 
 533         case CC_CC_OPEN:
 534             final CClassNode cc = parseCharClass();
 535             node = cc;
 536             if (isIgnoreCase(env.option)) {
 537                 final ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
 538                 EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
 539 
 540                 if (arg.altRoot != null) {
 541                     node = ConsAltNode.newAltNode(node, arg.altRoot);
 542                 }
 543             }
 544             break;
 545 
 546         case ANYCHAR:
 547             node = new AnyCharNode();
 548             break;
 549 
 550         case ANYCHAR_ANYTIME:
 551             node = new AnyCharNode();
 552             final QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
 553             qn.setTarget(node);
 554             node = qn;
 555             break;
 556 
 557         case BACKREF:
 558             final int backRef = token.getBackrefRef();
 559             node = new BackRefNode(backRef, env);
 560             break;
 561 
 562         case ANCHOR:
 563             node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
 564             break;
 565 
 566         case OP_REPEAT:
 567         case INTERVAL:
 568             if (syntax.contextIndepRepeatOps()) {
 569                 if (syntax.contextInvalidRepeatOps()) {
 570                     throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
 571                 }
 572                 node = StringNode.createEmpty(); // node_new_empty
 573             } else {
 574                 return parseExpTkByte(group); // goto tk_byte
 575             }
 576             break;
 577 
 578         default:
 579             throw new InternalException(ERR_PARSER_BUG);
 580         } //switch
 581 
 582         //targetp = node;
 583 
 584         fetchToken(); // re_entry:
 585 
 586         return parseExpRepeat(node, group); // repeat:
 587     }
 588 
 589     private Node parseExpTkByte(final boolean group) {
 590         final StringNode node = new StringNode(chars, token.backP, p); // tk_byte:
 591         while (true) {
 592             fetchToken();
 593             if (token.type != TokenType.STRING) {
 594                 break;
 595             }
 596 
 597             if (token.backP == node.end) {
 598                 node.end = p; // non escaped character, remain shared, just increase shared range
 599             } else {
 600                 node.cat(chars, token.backP, p); // non continuous string stream, need to COW
 601             }
 602         }
 603         // targetp = node;
 604         return parseExpRepeat(node, group); // string_end:, goto repeat
 605     }
 606 
 607     private Node parseExpTkRawByte(final boolean group) {
 608         // tk_raw_byte:
 609 
 610         // important: we don't use 0xff mask here neither in the compiler
 611         // (in the template string) so we won't have to mask target
 612         // strings when comparing against them in the matcher
 613         final StringNode node = new StringNode((char)token.getC());
 614         node.setRaw();
 615 
 616         fetchToken();
 617         node.clearRaw();
 618         // !goto string_end;!
 619         return parseExpRepeat(node, group);
 620     }
 621 
 622     private Node parseExpRepeat(final Node targetp, final boolean group) {
 623         Node target = targetp;
 624         while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
 625             if (target.isInvalidQuantifier()) {
 626                 throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
 627             }
 628 
 629             final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
 630                                                      token.getRepeatUpper(),
 631                                                      token.type == TokenType.INTERVAL);
 632 
 633             qtfr.greedy = token.getRepeatGreedy();
 634             final int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd());
 635             Node qn = qtfr;
 636 
 637             if (token.getRepeatPossessive()) {
 638                 final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
 639                 en.setTarget(qn);
 640                 qn = en;
 641             }
 642 
 643             if (ret == 0) {
 644                 target = qn;
 645             } else if (ret == 2) { /* split case: /abc+/ */
 646                 target = ConsAltNode.newListNode(target, null);
 647                 final ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
 648 
 649                 fetchToken();
 650                 return parseExpRepeatForCar(target, tmp, group);
 651             }
 652             fetchToken(); // goto re_entry
 653         }
 654         return target;
 655     }
 656 
 657     private Node parseExpRepeatForCar(final Node top, final ConsAltNode target, final boolean group) {
 658         while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
 659             if (target.car.isInvalidQuantifier()) {
 660                 throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
 661             }
 662 
 663             final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
 664                                                      token.getRepeatUpper(),
 665                                                      token.type == TokenType.INTERVAL);
 666 
 667             qtfr.greedy = token.getRepeatGreedy();
 668             final int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd());
 669             Node qn = qtfr;
 670 
 671             if (token.getRepeatPossessive()) {
 672                 final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
 673                 en.setTarget(qn);
 674                 qn = en;
 675             }
 676 
 677             if (ret == 0) {
 678                 target.setCar(qn);
 679             } else if (ret == 2) { /* split case: /abc+/ */
 680                 assert false;
 681             }
 682             fetchToken(); // goto re_entry
 683         }
 684         return top;
 685     }
 686 
 687     private Node parseBranch(final TokenType term) {
 688         Node node = parseExp(term);
 689 
 690         if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) {
 691             return node;
 692         }
 693         final ConsAltNode top = ConsAltNode.newListNode(node, null);
 694         ConsAltNode t = top;
 695 
 696         while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
 697             node = parseExp(term);
 698             if (node.getType() == NodeType.LIST) {
 699                 t.setCdr((ConsAltNode)node);
 700                 while (((ConsAltNode)node).cdr != null ) {
 701                     node = ((ConsAltNode)node).cdr;
 702                 }
 703 
 704                 t = ((ConsAltNode)node);
 705             } else {
 706                 t.setCdr(ConsAltNode.newListNode(node, null));
 707                 t = t.cdr;
 708             }
 709         }
 710         return top;
 711     }
 712 
 713     /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
 714     private Node parseSubExp(final TokenType term) {
 715         Node node = parseBranch(term);
 716 
 717         if (token.type == term) {
 718             return node;
 719         } else if (token.type == TokenType.ALT) {
 720             final ConsAltNode top = ConsAltNode.newAltNode(node, null);
 721             ConsAltNode t = top;
 722             while (token.type == TokenType.ALT) {
 723                 fetchToken();
 724                 node = parseBranch(term);
 725 
 726                 t.setCdr(ConsAltNode.newAltNode(node, null));
 727                 t = t.cdr;
 728             }
 729 
 730             if (token.type != term) {
 731                 parseSubExpError(term);
 732             }
 733             return top;
 734         } else {
 735             parseSubExpError(term);
 736             return null; //not reached
 737         }
 738     }
 739 
 740     private static void parseSubExpError(final TokenType term) {
 741         if (term == TokenType.SUBEXP_CLOSE) {
 742             throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
 743         }
 744         throw new InternalException(ERR_PARSER_BUG);
 745     }
 746 
 747     private Node parseRegexp() {
 748         fetchToken();
 749         return parseSubExp(TokenType.EOT);
 750     }
 751 }