1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.joni; 21 22 import static jdk.nashorn.internal.joni.BitStatus.bsAll; 23 import static jdk.nashorn.internal.joni.BitStatus.bsAt; 24 import static jdk.nashorn.internal.joni.BitStatus.bsClear; 25 import static jdk.nashorn.internal.joni.BitStatus.bsOnAt; 26 import static jdk.nashorn.internal.joni.BitStatus.bsOnAtSimple; 27 import static jdk.nashorn.internal.joni.Option.isCaptureGroup; 28 import static jdk.nashorn.internal.joni.Option.isFindCondition; 29 import static jdk.nashorn.internal.joni.Option.isIgnoreCase; 30 import static jdk.nashorn.internal.joni.Option.isMultiline; 31 import static jdk.nashorn.internal.joni.ast.ConsAltNode.newAltNode; 32 import static jdk.nashorn.internal.joni.ast.QuantifierNode.isRepeatInfinite; 33 34 import java.util.HashSet; 35 36 import jdk.nashorn.internal.joni.ast.AnchorNode; 37 import jdk.nashorn.internal.joni.ast.BackRefNode; 38 import jdk.nashorn.internal.joni.ast.CClassNode; 39 import jdk.nashorn.internal.joni.ast.CTypeNode; 40 import jdk.nashorn.internal.joni.ast.CallNode; 41 import jdk.nashorn.internal.joni.ast.ConsAltNode; 42 import jdk.nashorn.internal.joni.ast.EncloseNode; 43 import jdk.nashorn.internal.joni.ast.Node; 44 import jdk.nashorn.internal.joni.ast.QuantifierNode; 45 import jdk.nashorn.internal.joni.ast.StringNode; 46 import jdk.nashorn.internal.joni.constants.AnchorType; 47 import jdk.nashorn.internal.joni.constants.EncloseType; 48 import jdk.nashorn.internal.joni.constants.NodeType; 49 import jdk.nashorn.internal.joni.constants.RegexState; 50 import jdk.nashorn.internal.joni.constants.StackPopLevel; 51 import jdk.nashorn.internal.joni.constants.TargetInfo; 52 import jdk.nashorn.internal.joni.encoding.CharacterType; 53 import jdk.nashorn.internal.joni.encoding.ObjPtr; 54 import jdk.nashorn.internal.joni.encoding.Ptr; 55 56 final class Analyser extends Parser { 57 58 protected Analyser(ScanEnvironment env, char[] chars, int p, int end) { 59 super(env, chars, p, end); 60 } 61 62 protected final void compile() { 63 regex.state = RegexState.COMPILING; 64 65 if (Config.DEBUG) { 66 Config.log.println(new String(chars, getBegin(), getEnd())); 67 } 68 69 reset(); 70 71 regex.numMem = 0; 72 regex.numRepeat = 0; 73 regex.numNullCheck = 0; 74 //regex.repeatRangeAlloc = 0; 75 regex.repeatRangeLo = null; 76 regex.repeatRangeHi = null; 77 regex.numCombExpCheck = 0; 78 79 if (Config.USE_COMBINATION_EXPLOSION_CHECK) regex.numCombExpCheck = 0; 80 81 parse(); 82 83 if (Config.USE_NAMED_GROUP) { 84 /* mixed use named group and no-named group */ 85 if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(regex.options)) { 86 if (env.numNamed != env.numMem) { 87 root = disableNoNameGroupCapture(root); 88 } else { 89 numberedRefCheck(root); 90 } 91 } 92 } // USE_NAMED_GROUP 93 94 if (Config.USE_NAMED_GROUP) { 95 if (env.numCall > 0) { 96 env.unsetAddrList = new UnsetAddrList(env.numCall); 97 setupSubExpCall(root); 98 // r != 0 ??? 99 subexpRecursiveCheckTrav(root); 100 // r < 0 -< err, FOUND_CALLED_NODE = 1 101 subexpInfRecursiveCheckTrav(root); 102 // r != 0 recursion infinite ??? 103 regex.numCall = env.numCall; 104 } else { 105 regex.numCall = 0; 106 } 107 } // USE_NAMED_GROUP 108 109 if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) { 110 Config.log.println("<RAW TREE>"); 111 Config.log.println(root + "\n"); 112 } 113 114 root = setupTree(root, 0); 115 if (Config.DEBUG_PARSE_TREE) { 116 if (Config.DEBUG_PARSE_TREE_RAW) Config.log.println("<TREE>"); 117 root.verifyTree(new HashSet<Node>(), env.reg.warnings); 118 Config.log.println(root + "\n"); 119 } 120 121 regex.captureHistory = env.captureHistory; 122 regex.btMemStart = env.btMemStart; 123 regex.btMemEnd = env.btMemEnd; 124 125 if (isFindCondition(regex.options)) { 126 regex.btMemEnd = bsAll(); 127 } else { 128 regex.btMemEnd = env.btMemEnd; 129 regex.btMemEnd |= regex.captureHistory; 130 } 131 132 if (Config.USE_COMBINATION_EXPLOSION_CHECK) { 133 if (env.backrefedMem == 0 || (Config.USE_SUBEXP_CALL && env.numCall == 0)) { 134 setupCombExpCheck(root, 0); 135 136 if (Config.USE_SUBEXP_CALL && env.hasRecursion) { 137 env.numCombExpCheck = 0; 138 } else { // USE_SUBEXP_CALL 139 if (env.combExpMaxRegNum > 0) { 140 for (int i=1; i<env.combExpMaxRegNum; i++) { 141 if (bsAt(env.backrefedMem, i)) { 142 env.numCombExpCheck = 0; 143 break; 144 } 145 } 146 } 147 } 148 149 } // USE_SUBEXP_CALL 150 regex.numCombExpCheck = env.numCombExpCheck; 151 } // USE_COMBINATION_EXPLOSION_CHECK 152 153 regex.clearOptimizeInfo(); 154 155 if (!Config.DONT_OPTIMIZE) setOptimizedInfoFromTree(root); 156 157 env.memNodes = null; 158 159 new ArrayCompiler(this).compile(); 160 //new AsmCompiler(this).compile(); 161 162 if (regex.numRepeat != 0 || regex.btMemEnd != 0) { 163 regex.stackPopLevel = StackPopLevel.ALL; 164 } else { 165 if (regex.btMemStart != 0) { 166 regex.stackPopLevel = StackPopLevel.MEM_START; 167 } else { 168 regex.stackPopLevel = StackPopLevel.FREE; 169 } 170 } 171 172 if (Config.DEBUG_COMPILE) { 173 if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString()); 174 Config.log.println("stack used: " + regex.stackNeeded); 175 if (Config.USE_STRING_TEMPLATES) Config.log.print("templates: " + regex.templateNum + "\n"); 176 Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); 177 178 } // DEBUG_COMPILE 179 180 regex.state = RegexState.NORMAL; 181 } 182 183 private void noNameDisableMapFor_cosAlt(Node node, int[]map, Ptr counter) { 184 ConsAltNode can = (ConsAltNode)node; 185 do { 186 can.setCar(noNameDisableMap(can.car, map, counter)); 187 } while ((can = can.cdr) != null); 188 } 189 190 private void noNameDisableMapFor_quantifier(Node node, int[]map, Ptr counter) { 191 QuantifierNode qn = (QuantifierNode)node; 192 Node target = qn.target; 193 Node old = target; 194 target = noNameDisableMap(target, map, counter); 195 196 if (target != old) { 197 qn.setTarget(target); 198 if (target.getType() == NodeType.QTFR) qn.reduceNestedQuantifier((QuantifierNode)target); 199 } 200 } 201 202 private Node noNameDisableMapFor_enclose(Node node, int[]map, Ptr counter) { 203 EncloseNode en = (EncloseNode)node; 204 if (en.type == EncloseType.MEMORY) { 205 if (en.isNamedGroup()) { 206 counter.p++; 207 map[en.regNum] = counter.p; 208 en.regNum = counter.p; 209 //en.target = noNameDisableMap(en.target, map, counter); 210 en.setTarget(noNameDisableMap(en.target, map, counter)); // ??? 211 } else { 212 node = en.target; 213 en.target = null; // remove first enclose: /(a)(?<b>c)/ 214 node = noNameDisableMap(node, map, counter); 215 } 216 } else { 217 //en.target = noNameDisableMap(en.target, map, counter); 218 en.setTarget(noNameDisableMap(en.target, map, counter)); // ??? 219 } 220 return node; 221 } 222 223 private void noNameDisableMapFor_anchor(Node node, int[]map, Ptr counter) { 224 AnchorNode an = (AnchorNode)node; 225 switch (an.type) { 226 case AnchorNode.PREC_READ: 227 case AnchorNode.PREC_READ_NOT: 228 case AnchorNode.LOOK_BEHIND: 229 case AnchorNode.LOOK_BEHIND_NOT: 230 an.setTarget(noNameDisableMap(an.target, map, counter)); 231 } 232 } 233 234 private Node noNameDisableMap(Node node, int[]map, Ptr counter) { 235 switch (node.getType()) { 236 case NodeType.LIST: 237 case NodeType.ALT: 238 noNameDisableMapFor_cosAlt(node, map, counter); 239 break; 240 case NodeType.QTFR: 241 noNameDisableMapFor_quantifier(node, map, counter); 242 break; 243 case NodeType.ENCLOSE: 244 node = noNameDisableMapFor_enclose(node, map, counter); 245 break; 246 case NodeType.ANCHOR: 247 noNameDisableMapFor_anchor(node, map, counter); 248 break; 249 } // switch 250 return node; 251 } 252 253 private void renumberByMap(Node node, int[]map) { 254 switch (node.getType()) { 255 case NodeType.LIST: 256 case NodeType.ALT: 257 ConsAltNode can = (ConsAltNode)node; 258 do { 259 renumberByMap(can.car, map); 260 } while ((can = can.cdr) != null); 261 break; 262 263 case NodeType.QTFR: 264 renumberByMap(((QuantifierNode)node).target, map); 265 break; 266 267 case NodeType.ENCLOSE: 268 renumberByMap(((EncloseNode)node).target, map); 269 break; 270 271 case NodeType.BREF: 272 ((BackRefNode)node).renumber(map); 273 break; 274 } // switch 275 } 276 277 protected final void numberedRefCheck(Node node) { 278 switch (node.getType()) { 279 case NodeType.LIST: 280 case NodeType.ALT: 281 ConsAltNode can = (ConsAltNode)node; 282 do { 283 numberedRefCheck(can.car); 284 } while ((can = can.cdr) != null); 285 break; 286 287 case NodeType.QTFR: 288 numberedRefCheck(((QuantifierNode)node).target); 289 break; 290 291 case NodeType.ENCLOSE: 292 numberedRefCheck(((EncloseNode)node).target); 293 break; 294 295 case NodeType.BREF: 296 BackRefNode br = (BackRefNode)node; 297 if (!br.isNameRef()) newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); 298 break; 299 } // switch 300 } 301 302 protected final Node disableNoNameGroupCapture(Node root) { 303 int[]map = new int[env.numMem + 1]; 304 305 for (int i=1; i<=env.numMem; i++) map[i] = 0; 306 307 root = noNameDisableMap(root, map, new Ptr(0)); 308 renumberByMap(root, map); 309 310 for (int i=1, pos=1; i<=env.numMem; i++) { 311 if (map[i] > 0) { 312 env.memNodes[pos] = env.memNodes[i]; 313 pos++; 314 } 315 } 316 317 int loc = env.captureHistory; 318 env.captureHistory = bsClear(); 319 320 for (int i=1; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) { 321 if (bsAt(loc, i)) { 322 env.captureHistory = bsOnAtSimple(env.captureHistory, map[i]); 323 } 324 } 325 326 env.numMem = env.numNamed; 327 regex.numMem = env.numNamed; 328 329 regex.renumberNameTable(map); 330 331 return root; 332 } 333 334 private void swap(Node a, Node b) { 335 a.swap(b); 336 337 if (root == b) { 338 root = a; 339 } else if (root == a) { 340 root = b; 341 } 342 } 343 344 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 345 private int quantifiersMemoryInfo(Node node) { 346 int info = 0; 347 348 switch(node.getType()) { 349 case NodeType.LIST: 350 case NodeType.ALT: 351 ConsAltNode can = (ConsAltNode)node; 352 do { 353 int v = quantifiersMemoryInfo(can.car); 354 if (v > info) info = v; 355 } while ((can = can.cdr) != null); 356 break; 357 358 case NodeType.CALL: 359 if (Config.USE_SUBEXP_CALL) { 360 CallNode cn = (CallNode)node; 361 if (cn.isRecursion()) { 362 return TargetInfo.IS_EMPTY_REC; /* tiny version */ 363 } else { 364 info = quantifiersMemoryInfo(cn.target); 365 } 366 } // USE_SUBEXP_CALL 367 break; 368 369 case NodeType.QTFR: 370 QuantifierNode qn = (QuantifierNode)node; 371 if (qn.upper != 0) { 372 info = quantifiersMemoryInfo(qn.target); 373 } 374 break; 375 376 case NodeType.ENCLOSE: 377 EncloseNode en = (EncloseNode)node; 378 switch (en.type) { 379 case EncloseType.MEMORY: 380 return TargetInfo.IS_EMPTY_MEM; 381 382 case EncloseType.OPTION: 383 case EncloseNode.STOP_BACKTRACK: 384 info = quantifiersMemoryInfo(en.target); 385 break; 386 387 default: 388 break; 389 } // inner switch 390 break; 391 392 case NodeType.BREF: 393 case NodeType.STR: 394 case NodeType.CTYPE: 395 case NodeType.CCLASS: 396 case NodeType.CANY: 397 case NodeType.ANCHOR: 398 default: 399 break; 400 } // switch 401 402 return info; 403 } 404 405 private int getMinMatchLength(Node node) { 406 int min = 0; 407 408 switch (node.getType()) { 409 case NodeType.BREF: 410 BackRefNode br = (BackRefNode)node; 411 if (br.isRecursion()) break; 412 413 if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF); 414 min = getMinMatchLength(env.memNodes[br.back[0]]); 415 416 for (int i=1; i<br.backNum; i++) { 417 if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF); 418 int tmin = getMinMatchLength(env.memNodes[br.back[i]]); 419 if (min > tmin) min = tmin; 420 } 421 break; 422 423 case NodeType.CALL: 424 if (Config.USE_SUBEXP_CALL) { 425 CallNode cn = (CallNode)node; 426 if (cn.isRecursion()) { 427 EncloseNode en = (EncloseNode)cn.target; 428 if (en.isMinFixed()) min = en.minLength; 429 } else { 430 min = getMinMatchLength(cn.target); 431 } 432 } // USE_SUBEXP_CALL 433 break; 434 435 case NodeType.LIST: 436 ConsAltNode can = (ConsAltNode)node; 437 do { 438 min += getMinMatchLength(can.car); 439 } while ((can = can.cdr) != null); 440 break; 441 442 case NodeType.ALT: 443 ConsAltNode y = (ConsAltNode)node; 444 do { 445 Node x = y.car; 446 int tmin = getMinMatchLength(x); 447 if (y == node) { 448 min = tmin; 449 } else if (min > tmin) { 450 min = tmin; 451 } 452 } while ((y = y.cdr) != null); 453 break; 454 455 case NodeType.STR: 456 min = ((StringNode)node).length(); 457 break; 458 459 case NodeType.CTYPE: 460 min = 1; 461 break; 462 463 case NodeType.CCLASS: 464 case NodeType.CANY: 465 min = 1; 466 break; 467 468 case NodeType.QTFR: 469 QuantifierNode qn = (QuantifierNode)node; 470 if (qn.lower > 0) { 471 min = getMinMatchLength(qn.target); 472 min = MinMaxLen.distanceMultiply(min, qn.lower); 473 } 474 break; 475 476 case NodeType.ENCLOSE: 477 EncloseNode en = (EncloseNode)node; 478 switch (en.type) { 479 case EncloseType.MEMORY: 480 if (Config.USE_SUBEXP_CALL) { 481 if (en.isMinFixed()) { 482 min = en.minLength; 483 } else { 484 min = getMinMatchLength(en.target); 485 en.minLength = min; 486 en.setMinFixed(); 487 } 488 } // USE_SUBEXP_CALL 489 break; 490 491 case EncloseType.OPTION: 492 case EncloseType.STOP_BACKTRACK: 493 min = getMinMatchLength(en.target); 494 break; 495 } // inner switch 496 break; 497 498 case NodeType.ANCHOR: 499 default: 500 break; 501 } // switch 502 503 return min; 504 } 505 506 private int getMaxMatchLength(Node node) { 507 int max = 0; 508 509 switch (node.getType()) { 510 case NodeType.LIST: 511 ConsAltNode ln = (ConsAltNode)node; 512 do { 513 int tmax = getMaxMatchLength(ln.car); 514 max = MinMaxLen.distanceAdd(max, tmax); 515 } while ((ln = ln.cdr) != null); 516 break; 517 518 case NodeType.ALT: 519 ConsAltNode an = (ConsAltNode)node; 520 do { 521 int tmax = getMaxMatchLength(an.car); 522 if (max < tmax) max = tmax; 523 } while ((an = an.cdr) != null); 524 break; 525 526 case NodeType.STR: 527 max = ((StringNode)node).length(); 528 break; 529 530 case NodeType.CTYPE: 531 max = 1; 532 break; 533 534 case NodeType.CCLASS: 535 case NodeType.CANY: 536 max = 1; 537 break; 538 539 case NodeType.BREF: 540 BackRefNode br = (BackRefNode)node; 541 if (br.isRecursion()) { 542 max = MinMaxLen.INFINITE_DISTANCE; 543 break; 544 } 545 546 for (int i=0; i<br.backNum; i++) { 547 if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF); 548 int tmax = getMaxMatchLength(env.memNodes[br.back[i]]); 549 if (max < tmax) max = tmax; 550 } 551 break; 552 553 case NodeType.CALL: 554 if (Config.USE_SUBEXP_CALL) { 555 CallNode cn = (CallNode)node; 556 if (!cn.isRecursion()) { 557 max = getMaxMatchLength(cn.target); 558 } else { 559 max = MinMaxLen.INFINITE_DISTANCE; 560 } 561 } // USE_SUBEXP_CALL 562 break; 563 564 case NodeType.QTFR: 565 QuantifierNode qn = (QuantifierNode)node; 566 if (qn.upper != 0) { 567 max = getMaxMatchLength(qn.target); 568 if (max != 0) { 569 if (!isRepeatInfinite(qn.upper)) { 570 max = MinMaxLen.distanceMultiply(max, qn.upper); 571 } else { 572 max = MinMaxLen.INFINITE_DISTANCE; 573 } 574 } 575 } 576 break; 577 578 case NodeType.ENCLOSE: 579 EncloseNode en = (EncloseNode)node; 580 switch (en.type) { 581 case EncloseType.MEMORY: 582 if (Config.USE_SUBEXP_CALL) { 583 if (en.isMaxFixed()) { 584 max = en.maxLength; 585 } else { 586 max = getMaxMatchLength(en.target); 587 en.maxLength = max; 588 en.setMaxFixed(); 589 } 590 } // USE_SUBEXP_CALL 591 break; 592 593 case EncloseType.OPTION: 594 case EncloseType.STOP_BACKTRACK: 595 max = getMaxMatchLength(en.target); 596 break; 597 } // inner switch 598 break; 599 600 case NodeType.ANCHOR: 601 default: 602 break; 603 } // switch 604 605 return max; 606 } 607 608 private static final int GET_CHAR_LEN_VARLEN = -1; 609 private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2; 610 protected final int getCharLengthTree(Node node) { 611 return getCharLengthTree(node, 0); 612 } 613 614 private int getCharLengthTree(Node node, int level) { 615 level++; 616 617 int len = 0; 618 returnCode = 0; 619 620 switch(node.getType()) { 621 case NodeType.LIST: 622 ConsAltNode ln = (ConsAltNode)node; 623 do { 624 int tlen = getCharLengthTree(ln.car, level); 625 if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen); 626 } while (returnCode == 0 && (ln = ln.cdr) != null); 627 break; 628 629 case NodeType.ALT: 630 ConsAltNode an = (ConsAltNode)node; 631 boolean varLen = false; 632 633 int tlen = getCharLengthTree(an.car, level); 634 while (returnCode == 0 && (an = an.cdr) != null) { 635 int tlen2 = getCharLengthTree(an.car, level); 636 if (returnCode == 0) { 637 if (tlen != tlen2) varLen = true; 638 } 639 } 640 641 if (returnCode == 0) { 642 if (varLen) { 643 if (level == 1) { 644 returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN; 645 } else { 646 returnCode = GET_CHAR_LEN_VARLEN; 647 } 648 } else { 649 len = tlen; 650 } 651 } 652 break; 653 654 case NodeType.STR: 655 StringNode sn = (StringNode)node; 656 len = sn.length(); 657 break; 658 659 case NodeType.QTFR: 660 QuantifierNode qn = (QuantifierNode)node; 661 if (qn.lower == qn.upper) { 662 tlen = getCharLengthTree(qn.target, level); 663 if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower); 664 } else { 665 returnCode = GET_CHAR_LEN_VARLEN; 666 } 667 break; 668 669 case NodeType.CALL: 670 if (Config.USE_SUBEXP_CALL) { 671 CallNode cn = (CallNode)node; 672 if (!cn.isRecursion()) { 673 len = getCharLengthTree(cn.target, level); 674 } else { 675 returnCode = GET_CHAR_LEN_VARLEN; 676 } 677 } // USE_SUBEXP_CALL 678 break; 679 680 case NodeType.CTYPE: 681 len = 1; 682 683 case NodeType.CCLASS: 684 case NodeType.CANY: 685 len = 1; 686 break; 687 688 case NodeType.ENCLOSE: 689 EncloseNode en = (EncloseNode)node; 690 switch(en.type) { 691 case EncloseType.MEMORY: 692 if (Config.USE_SUBEXP_CALL) { 693 if (en.isCLenFixed()) { 694 len = en.charLength; 695 } else { 696 len = getCharLengthTree(en.target, level); 697 if (returnCode == 0) { 698 en.charLength = len; 699 en.setCLenFixed(); 700 } 701 } 702 } // USE_SUBEXP_CALL 703 break; 704 705 case EncloseType.OPTION: 706 case EncloseType.STOP_BACKTRACK: 707 len = getCharLengthTree(en.target, level); 708 break; 709 } // inner switch 710 break; 711 712 case NodeType.ANCHOR: 713 break; 714 715 default: 716 returnCode = GET_CHAR_LEN_VARLEN; 717 } // switch 718 return len; 719 } 720 721 /* x is not included y ==> 1 : 0 */ 722 private boolean isNotIncluded(Node x, Node y) { 723 Node tmp; 724 725 // !retry:! 726 retry: while(true) { 727 728 int yType = y.getType(); 729 730 switch(x.getType()) { 731 case NodeType.CTYPE: 732 switch(yType) { 733 case NodeType.CTYPE: 734 CTypeNode cny = (CTypeNode)y; 735 CTypeNode cnx = (CTypeNode)x; 736 return cny.ctype == cnx.ctype && cny.not != cnx.not; 737 738 case NodeType.CCLASS: 739 // !swap:! 740 tmp = x; 741 x = y; 742 y = tmp; 743 // !goto retry;! 744 continue retry; 745 746 case NodeType.STR: 747 // !goto swap;! 748 tmp = x; 749 x = y; 750 y = tmp; 751 continue retry; 752 753 default: 754 break; 755 } // inner switch 756 break; 757 758 case NodeType.CCLASS: 759 CClassNode xc = (CClassNode)x; 760 761 switch(yType) { 762 case NodeType.CTYPE: 763 switch(((CTypeNode)y).ctype) { 764 case CharacterType.WORD: 765 if (!((CTypeNode)y).not) { 766 if (xc.mbuf == null && !xc.isNot()) { 767 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 768 if (xc.bs.at(i)) { 769 if (EncodingHelper.isWord(i)) return false; 770 } 771 } 772 return true; 773 } 774 return false; 775 } else { 776 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 777 if (!EncodingHelper.isWord(i)) { 778 if (!xc.isNot()) { 779 if (xc.bs.at(i)) return false; 780 } else { 781 if (!xc.bs.at(i)) return false; 782 } 783 } 784 } 785 return true; 786 } 787 // break; not reached 788 789 default: 790 break; 791 } // inner switch 792 break; 793 794 case NodeType.CCLASS: 795 CClassNode yc = (CClassNode)y; 796 797 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 798 boolean v = xc.bs.at(i); 799 if ((v && !xc.isNot()) || (!v && xc.isNot())) { 800 v = yc.bs.at(i); 801 if ((v && !yc.isNot()) || (!v && yc.isNot())) return false; 802 } 803 } 804 if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) return true; 805 return false; 806 // break; not reached 807 808 case NodeType.STR: 809 // !goto swap;! 810 tmp = x; 811 x = y; 812 y = tmp; 813 continue retry; 814 815 default: 816 break; 817 818 } // inner switch 819 break; // case NodeType.CCLASS 820 821 case NodeType.STR: 822 StringNode xs = (StringNode)x; 823 if (xs.length() == 0) break; 824 825 switch (yType) { 826 case NodeType.CTYPE: 827 CTypeNode cy = ((CTypeNode)y); 828 switch (cy.ctype) { 829 case CharacterType.WORD: 830 return !cy.not; 831 832 default: 833 break; 834 835 } // inner switch 836 break; 837 838 case NodeType.CCLASS: 839 CClassNode cc = (CClassNode)y; 840 int code = xs.chars[xs.p]; 841 return !cc.isCodeInCC(code); 842 843 case NodeType.STR: 844 StringNode ys = (StringNode)y; 845 int len = xs.length(); 846 if (len > ys.length()) len = ys.length(); 847 if (xs.isAmbig() || ys.isAmbig()) { 848 /* tiny version */ 849 return false; 850 } else { 851 for (int i=0, p=ys.p, q=xs.p; i<len; i++, p++, q++) { 852 if (ys.chars[p] != xs.chars[q]) return true; 853 } 854 } 855 break; 856 857 default: 858 break; 859 } // inner switch 860 861 break; // case NodeType.STR 862 863 } // switch 864 865 break; 866 } // retry: while 867 return false; 868 } 869 870 private Node getHeadValueNode(Node node, boolean exact) { 871 Node n = null; 872 873 switch(node.getType()) { 874 case NodeType.BREF: 875 case NodeType.ALT: 876 case NodeType.CANY: 877 break; 878 879 case NodeType.CALL: 880 break; // if (Config.USE_SUBEXP_CALL) 881 882 case NodeType.CTYPE: 883 case NodeType.CCLASS: 884 if (!exact) n = node; 885 break; 886 887 case NodeType.LIST: 888 n = getHeadValueNode(((ConsAltNode)node).car, exact); 889 break; 890 891 case NodeType.STR: 892 StringNode sn = (StringNode)node; 893 if (sn.end <= sn.p) break; // ??? 894 895 if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){ 896 // nothing 897 } else { 898 n = node; 899 } 900 break; 901 902 case NodeType.QTFR: 903 QuantifierNode qn = (QuantifierNode)node; 904 if (qn.lower > 0) { 905 if (qn.headExact != null) { 906 n = qn.headExact; 907 } else { 908 n = getHeadValueNode(qn.target, exact); 909 } 910 } 911 break; 912 913 case NodeType.ENCLOSE: 914 EncloseNode en = (EncloseNode)node; 915 916 switch (en.type) { 917 case EncloseType.OPTION: 918 int options = regex.options; 919 regex.options = en.option; 920 n = getHeadValueNode(en.target, exact); 921 regex.options = options; 922 break; 923 924 case EncloseType.MEMORY: 925 case EncloseType.STOP_BACKTRACK: 926 n = getHeadValueNode(en.target, exact); 927 break; 928 } // inner switch 929 break; 930 931 case NodeType.ANCHOR: 932 AnchorNode an = (AnchorNode)node; 933 if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact); 934 break; 935 936 default: 937 break; 938 } // switch 939 940 return n; 941 } 942 943 // true: invalid 944 private boolean checkTypeTree(Node node, int typeMask, int encloseMask, int anchorMask) { 945 if ((node.getType2Bit() & typeMask) == 0) return true; 946 947 boolean invalid = false; 948 949 switch(node.getType()) { 950 case NodeType.LIST: 951 case NodeType.ALT: 952 ConsAltNode can = (ConsAltNode)node; 953 do { 954 invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask); 955 } while (!invalid && (can = can.cdr) != null); 956 break; 957 958 case NodeType.QTFR: 959 invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask); 960 break; 961 962 case NodeType.ENCLOSE: 963 EncloseNode en = (EncloseNode)node; 964 if ((en.type & encloseMask) == 0) return true; 965 invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask); 966 break; 967 968 case NodeType.ANCHOR: 969 AnchorNode an = (AnchorNode)node; 970 if ((an.type & anchorMask) == 0) return true; 971 972 if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask); 973 break; 974 975 default: 976 break; 977 978 } // switch 979 980 return invalid; 981 } 982 983 private static final int RECURSION_EXIST = 1; 984 private static final int RECURSION_INFINITE = 2; 985 private int subexpInfRecursiveCheck(Node node, boolean head) { 986 int r = 0; 987 988 switch (node.getType()) { 989 case NodeType.LIST: 990 int min; 991 ConsAltNode x = (ConsAltNode)node; 992 do { 993 int ret = subexpInfRecursiveCheck(x.car, head); 994 if (ret == RECURSION_INFINITE) return ret; 995 r |= ret; 996 if (head) { 997 min = getMinMatchLength(x.car); 998 if (min != 0) head = false; 999 } 1000 } while ((x = x.cdr) != null); 1001 break; 1002 1003 case NodeType.ALT: 1004 ConsAltNode can = (ConsAltNode)node; 1005 r = RECURSION_EXIST; 1006 do { 1007 int ret = subexpInfRecursiveCheck(can.car, head); 1008 if (ret == RECURSION_INFINITE) return ret; 1009 r &= ret; 1010 } while ((can = can.cdr) != null); 1011 break; 1012 1013 case NodeType.QTFR: 1014 QuantifierNode qn = (QuantifierNode)node; 1015 r = subexpInfRecursiveCheck(qn.target, head); 1016 if (r == RECURSION_EXIST) { 1017 if (qn.lower == 0) r = 0; 1018 } 1019 break; 1020 1021 case NodeType.ANCHOR: 1022 AnchorNode an = (AnchorNode)node; 1023 switch (an.type) { 1024 case AnchorType.PREC_READ: 1025 case AnchorType.PREC_READ_NOT: 1026 case AnchorType.LOOK_BEHIND: 1027 case AnchorType.LOOK_BEHIND_NOT: 1028 r = subexpInfRecursiveCheck(an.target, head); 1029 break; 1030 } // inner switch 1031 break; 1032 1033 case NodeType.CALL: 1034 r = subexpInfRecursiveCheck(((CallNode)node).target, head); 1035 break; 1036 1037 case NodeType.ENCLOSE: 1038 EncloseNode en = (EncloseNode)node; 1039 if (en.isMark2()) { 1040 return 0; 1041 } else if (en.isMark1()) { 1042 return !head ? RECURSION_EXIST : RECURSION_INFINITE; 1043 // throw exception here ??? 1044 } else { 1045 en.setMark2(); 1046 r = subexpInfRecursiveCheck(en.target, head); 1047 en.clearMark2(); 1048 } 1049 break; 1050 1051 default: 1052 break; 1053 } // switch 1054 return r; 1055 } 1056 1057 protected final int subexpInfRecursiveCheckTrav(Node node) { 1058 int r = 0; 1059 1060 switch (node.getType()) { 1061 case NodeType.LIST: 1062 case NodeType.ALT: 1063 ConsAltNode can = (ConsAltNode)node; 1064 do { 1065 r = subexpInfRecursiveCheckTrav(can.car); 1066 } while (r == 0 && (can = can.cdr) != null); 1067 break; 1068 1069 case NodeType.QTFR: 1070 r = subexpInfRecursiveCheckTrav(((QuantifierNode)node).target); 1071 break; 1072 1073 case NodeType.ANCHOR: 1074 AnchorNode an = (AnchorNode)node; 1075 switch (an.type) { 1076 case AnchorType.PREC_READ: 1077 case AnchorType.PREC_READ_NOT: 1078 case AnchorType.LOOK_BEHIND: 1079 case AnchorType.LOOK_BEHIND_NOT: 1080 r = subexpInfRecursiveCheckTrav(an.target); 1081 break; 1082 } // inner switch 1083 break; 1084 1085 case NodeType.ENCLOSE: 1086 EncloseNode en = (EncloseNode)node; 1087 if (en.isRecursion()) { 1088 en.setMark1(); 1089 r = subexpInfRecursiveCheck(en.target, true); 1090 if (r > 0) newValueException(ERR_NEVER_ENDING_RECURSION); 1091 en.clearMark1(); 1092 } 1093 r = subexpInfRecursiveCheckTrav(en.target); 1094 break; 1095 1096 default: 1097 break; 1098 } // switch 1099 1100 return r; 1101 } 1102 1103 private int subexpRecursiveCheck(Node node) { 1104 int r = 0; 1105 1106 switch (node.getType()) { 1107 case NodeType.LIST: 1108 case NodeType.ALT: 1109 ConsAltNode can = (ConsAltNode)node; 1110 do { 1111 r |= subexpRecursiveCheck(can.car); 1112 } while ((can = can.cdr) != null); 1113 break; 1114 1115 case NodeType.QTFR: 1116 r = subexpRecursiveCheck(((QuantifierNode)node).target); 1117 break; 1118 1119 case NodeType.ANCHOR: 1120 AnchorNode an = (AnchorNode)node; 1121 switch (an.type) { 1122 case AnchorType.PREC_READ: 1123 case AnchorType.PREC_READ_NOT: 1124 case AnchorType.LOOK_BEHIND: 1125 case AnchorType.LOOK_BEHIND_NOT: 1126 r = subexpRecursiveCheck(an.target); 1127 break; 1128 } // inner switch 1129 break; 1130 1131 case NodeType.CALL: 1132 CallNode cn = (CallNode)node; 1133 r = subexpRecursiveCheck(cn.target); 1134 if (r != 0) cn.setRecursion(); 1135 break; 1136 1137 case NodeType.ENCLOSE: 1138 EncloseNode en = (EncloseNode)node; 1139 if (en.isMark2()) { 1140 return 0; 1141 } else if (en.isMark1()) { 1142 return 1; /* recursion */ 1143 } else { 1144 en.setMark2(); 1145 r = subexpRecursiveCheck(en.target); 1146 en.clearMark2(); 1147 } 1148 break; 1149 1150 default: 1151 break; 1152 } // switch 1153 1154 return r; 1155 } 1156 1157 private static final int FOUND_CALLED_NODE = 1; 1158 protected final int subexpRecursiveCheckTrav(Node node) { 1159 int r = 0; 1160 1161 switch (node.getType()) { 1162 case NodeType.LIST: 1163 case NodeType.ALT: 1164 ConsAltNode can = (ConsAltNode)node; 1165 do { 1166 int ret = subexpRecursiveCheckTrav(can.car); 1167 if (ret == FOUND_CALLED_NODE) { 1168 r = FOUND_CALLED_NODE; 1169 } 1170 // else if (ret < 0) return ret; ??? 1171 } while ((can = can.cdr) != null); 1172 break; 1173 1174 case NodeType.QTFR: 1175 QuantifierNode qn = (QuantifierNode)node; 1176 r = subexpRecursiveCheckTrav(qn.target); 1177 if (qn.upper == 0) { 1178 if (r == FOUND_CALLED_NODE) qn.isRefered = true; 1179 } 1180 break; 1181 1182 case NodeType.ANCHOR: 1183 AnchorNode an = (AnchorNode)node; 1184 switch (an.type) { 1185 case AnchorType.PREC_READ: 1186 case AnchorType.PREC_READ_NOT: 1187 case AnchorType.LOOK_BEHIND: 1188 case AnchorType.LOOK_BEHIND_NOT: 1189 r = subexpRecursiveCheckTrav(an.target); 1190 break; 1191 } // inner switch 1192 break; 1193 1194 case NodeType.ENCLOSE: 1195 EncloseNode en = (EncloseNode)node; 1196 if (!en.isRecursion()) { 1197 if (en.isCalled()) { 1198 en.setMark1(); 1199 r = subexpRecursiveCheck(en.target); 1200 if (r != 0) en.setRecursion(); 1201 en.clearMark1(); 1202 } 1203 } 1204 r = subexpRecursiveCheckTrav(en.target); 1205 if (en.isCalled()) r |= FOUND_CALLED_NODE; 1206 break; 1207 1208 default: 1209 break; 1210 } // switch 1211 1212 return r; 1213 } 1214 1215 private void setCallAttr(CallNode cn) { 1216 cn.target = env.memNodes[cn.groupNum]; // no setTarget in call nodes! 1217 if (cn.target == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd); 1218 1219 ((EncloseNode)cn.target).setCalled(); 1220 env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum); 1221 cn.unsetAddrList = env.unsetAddrList; 1222 } 1223 1224 protected final void setupSubExpCall(Node node) { 1225 1226 switch(node.getType()) { 1227 case NodeType.LIST: 1228 ConsAltNode ln = (ConsAltNode)node; 1229 do { 1230 setupSubExpCall(ln.car); 1231 } while ((ln = ln.cdr) != null); 1232 break; 1233 1234 case NodeType.ALT: 1235 ConsAltNode can = (ConsAltNode)node; 1236 do { 1237 setupSubExpCall(can.car); 1238 } while ((can = can.cdr) != null); 1239 break; 1240 1241 case NodeType.QTFR: 1242 setupSubExpCall(((QuantifierNode)node).target); 1243 break; 1244 1245 case NodeType.ENCLOSE: 1246 setupSubExpCall(((EncloseNode)node).target); 1247 break; 1248 1249 case NodeType.CALL: 1250 CallNode cn = (CallNode)node; 1251 1252 if (cn.groupNum != 0) { 1253 int gNum = cn.groupNum; 1254 1255 if (Config.USE_NAMED_GROUP) { 1256 if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) { 1257 newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); 1258 } 1259 } // USE_NAMED_GROUP 1260 if (gNum > env.numMem) newValueException(ERR_UNDEFINED_GROUP_REFERENCE, cn.nameP, cn.nameEnd); 1261 setCallAttr(cn); 1262 } else { 1263 if (Config.USE_NAMED_GROUP) { 1264 NameEntry ne = regex.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd); 1265 1266 if (ne == null) { 1267 newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd); 1268 } else if (ne.backNum > 1) { 1269 newValueException(ERR_MULTIPLEX_DEFINITION_NAME_CALL, cn.nameP, cn.nameEnd); 1270 } else { 1271 cn.groupNum = ne.backRef1; // ne.backNum == 1 ? ne.backRef1 : ne.backRefs[0]; // ??? need to check ? 1272 setCallAttr(cn); 1273 } 1274 } 1275 } 1276 break; 1277 1278 case NodeType.ANCHOR: 1279 AnchorNode an = (AnchorNode)node; 1280 switch (an.type) { 1281 case AnchorType.PREC_READ: 1282 case AnchorType.PREC_READ_NOT: 1283 case AnchorType.LOOK_BEHIND: 1284 case AnchorType.LOOK_BEHIND_NOT: 1285 setupSubExpCall(an.target); 1286 break; 1287 } 1288 break; 1289 1290 } // switch 1291 } 1292 1293 /* divide different length alternatives in look-behind. 1294 (?<=A|B) ==> (?<=A)|(?<=B) 1295 (?<!A|B) ==> (?<!A)(?<!B) 1296 */ 1297 private Node divideLookBehindAlternatives(Node node) { 1298 AnchorNode an = (AnchorNode)node; 1299 int anchorType = an.type; 1300 Node head = an.target; 1301 Node np = ((ConsAltNode)head).car; 1302 1303 swap(node, head); 1304 1305 Node tmp = node; 1306 node = head; 1307 head = tmp; 1308 1309 ((ConsAltNode)node).setCar(head); 1310 ((AnchorNode)head).setTarget(np); 1311 np = node; 1312 1313 while ((np = ((ConsAltNode)np).cdr) != null) { 1314 AnchorNode insert = new AnchorNode(anchorType); 1315 insert.setTarget(((ConsAltNode)np).car); 1316 ((ConsAltNode)np).setCar(insert); 1317 } 1318 1319 if (anchorType == AnchorType.LOOK_BEHIND_NOT) { 1320 np = node; 1321 do { 1322 ((ConsAltNode)np).toListNode(); /* alt -> list */ 1323 } while ((np = ((ConsAltNode)np).cdr) != null); 1324 } 1325 1326 return node; 1327 } 1328 1329 private Node setupLookBehind(Node node) { 1330 AnchorNode an = (AnchorNode)node; 1331 int len = getCharLengthTree(an.target); 1332 switch(returnCode) { 1333 case 0: 1334 an.charLength = len; 1335 break; 1336 case GET_CHAR_LEN_VARLEN: 1337 newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1338 break; 1339 case GET_CHAR_LEN_TOP_ALT_VARLEN: 1340 if (syntax.differentLengthAltLookBehind()) { 1341 return divideLookBehindAlternatives(node); 1342 } else { 1343 newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1344 } 1345 } 1346 return node; 1347 } 1348 1349 private void nextSetup(Node node, Node nextNode) { 1350 // retry: 1351 retry: while(true) { 1352 1353 int type = node.getType(); 1354 if (type == NodeType.QTFR) { 1355 QuantifierNode qn = (QuantifierNode)node; 1356 if (qn.greedy && isRepeatInfinite(qn.upper)) { 1357 if (Config.USE_QTFR_PEEK_NEXT) { 1358 StringNode n = (StringNode)getHeadValueNode(nextNode, true); 1359 /* '\0': for UTF-16BE etc... */ 1360 if (n != null && n.chars[n.p] != 0) { // ????????? 1361 qn.nextHeadExact = n; 1362 } 1363 } // USE_QTFR_PEEK_NEXT 1364 /* automatic posseivation a*b ==> (?>a*)b */ 1365 if (qn.lower <= 1) { 1366 if (qn.target.isSimple()) { 1367 Node x = getHeadValueNode(qn.target, false); 1368 if (x != null) { 1369 Node y = getHeadValueNode(nextNode, false); 1370 if (y != null && isNotIncluded(x, y)) { 1371 EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose 1372 en.setStopBtSimpleRepeat(); 1373 //en.setTarget(qn.target); // optimize it ?? 1374 swap(node, en); 1375 1376 en.setTarget(node); 1377 } 1378 } 1379 } 1380 } 1381 } 1382 } else if (type == NodeType.ENCLOSE) { 1383 EncloseNode en = (EncloseNode)node; 1384 if (en.isMemory()) { 1385 node = en.target; 1386 // !goto retry;! 1387 continue retry; 1388 } 1389 } 1390 1391 break; 1392 } // while 1393 } 1394 1395 private void updateStringNodeCaseFoldMultiByte(StringNode sn) { 1396 char[] chars = sn.chars; 1397 int end = sn.end; 1398 value = sn.p; 1399 int sp = 0; 1400 char buf; 1401 1402 while (value < end) { 1403 int ovalue = value; 1404 buf = Character.toLowerCase(chars[value++]); 1405 1406 if (chars[ovalue] != buf) { 1407 1408 char[] sbuf = new char[sn.length() << 1]; 1409 System.arraycopy(chars, sn.p, sbuf, 0, ovalue - sn.p); 1410 value = ovalue; 1411 while (value < end) { 1412 buf = Character.toLowerCase(chars[value++]); 1413 if (sp >= sbuf.length) { 1414 char[]tmp = new char[sbuf.length << 1]; 1415 System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); 1416 sbuf = tmp; 1417 } 1418 sbuf[sp++] = buf; 1419 } 1420 sn.set(sbuf, 0, sp); 1421 return; 1422 } 1423 sp++; 1424 } 1425 } 1426 1427 private void updateStringNodeCaseFold(Node node) { 1428 StringNode sn = (StringNode)node; 1429 updateStringNodeCaseFoldMultiByte(sn); 1430 } 1431 1432 private Node expandCaseFoldMakeRemString(char[] chars, int p, int end) { 1433 StringNode node = new StringNode(chars, p, end); 1434 1435 updateStringNodeCaseFold(node); 1436 node.setAmbig(); 1437 node.setDontGetOptInfo(); 1438 return node; 1439 } 1440 1441 private boolean expandCaseFoldStringAlt(int itemNum, char[] items, 1442 char[] chars, int p, int slen, int end, ObjPtr<Node> node) { 1443 1444 ConsAltNode altNode; 1445 node.p = altNode = newAltNode(null, null); 1446 1447 StringNode snode = new StringNode(chars, p, p + slen); 1448 altNode.setCar(snode); 1449 1450 for (int i=0; i<itemNum; i++) { 1451 snode = new StringNode(); 1452 1453 snode.catCode(items[i]); 1454 1455 ConsAltNode an = newAltNode(null, null); 1456 an.setCar(snode); 1457 altNode.setCdr(an); 1458 altNode = an; 1459 } 1460 return false; 1461 } 1462 1463 private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8; 1464 private Node expandCaseFoldString(Node node) { 1465 StringNode sn = (StringNode)node; 1466 1467 if (sn.isAmbig() || sn.length() <= 0) return node; 1468 1469 char[] chars = sn.chars; 1470 int p = sn.p; 1471 int end = sn.end; 1472 int altNum = 1; 1473 1474 ConsAltNode topRoot = null, root = null; 1475 ObjPtr<Node> prevNode = new ObjPtr<Node>(); 1476 StringNode stringNode = null; 1477 1478 while (p < end) { 1479 char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars[p]); 1480 1481 if (items.length == 0) { 1482 if (stringNode == null) { 1483 if (root == null && prevNode.p != null) { 1484 topRoot = root = ConsAltNode.listAdd(null, prevNode.p); 1485 } 1486 1487 prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL); 1488 1489 if (root != null) ConsAltNode.listAdd(root, stringNode); 1490 1491 } 1492 1493 stringNode.cat(chars, p, p + 1); 1494 } else { 1495 altNum *= (items.length + 1); 1496 if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; 1497 1498 if (root == null && prevNode.p != null) { 1499 topRoot = root = ConsAltNode.listAdd(null, prevNode.p); 1500 } 1501 1502 expandCaseFoldStringAlt(items.length, items, chars, p, 1, end, prevNode); 1503 if (root != null) ConsAltNode.listAdd(root, prevNode.p); 1504 stringNode = null; 1505 } 1506 p++; 1507 } 1508 1509 if (p < end) { 1510 Node srem = expandCaseFoldMakeRemString(chars, p, end); 1511 1512 if (prevNode.p != null && root == null) { 1513 topRoot = root = ConsAltNode.listAdd(null, prevNode.p); 1514 } 1515 1516 if (root == null) { 1517 prevNode.p = srem; 1518 } else { 1519 ConsAltNode.listAdd(root, srem); 1520 } 1521 } 1522 /* ending */ 1523 Node xnode = topRoot != null ? topRoot : prevNode.p; 1524 1525 swap(node, xnode); 1526 return xnode; 1527 } 1528 1529 private static final int CEC_THRES_NUM_BIG_REPEAT = 512; 1530 private static final int CEC_INFINITE_NUM = 0x7fffffff; 1531 1532 private static final int CEC_IN_INFINITE_REPEAT = (1<<0); 1533 private static final int CEC_IN_FINITE_REPEAT = (1<<1); 1534 private static final int CEC_CONT_BIG_REPEAT = (1<<2); 1535 1536 protected final int setupCombExpCheck(Node node, int state) { 1537 int r = state; 1538 int ret; 1539 1540 switch (node.getType()) { 1541 case NodeType.LIST: 1542 ConsAltNode ln = (ConsAltNode)node; 1543 1544 do { 1545 r = setupCombExpCheck(ln.car, r); 1546 //prev = ((ConsAltNode)node).car; 1547 } while (r >= 0 && (ln = ln.cdr) != null); 1548 break; 1549 1550 case NodeType.ALT: 1551 ConsAltNode an = (ConsAltNode)node; 1552 do { 1553 ret = setupCombExpCheck(an.car, state); 1554 r |= ret; 1555 } while (ret >= 0 && (an = an.cdr) != null); 1556 break; 1557 1558 case NodeType.QTFR: 1559 QuantifierNode qn = (QuantifierNode)node; 1560 int childState = state; 1561 int addState = 0; 1562 int varNum; 1563 1564 if (!isRepeatInfinite(qn.upper)) { 1565 if (qn.upper > 1) { 1566 /* {0,1}, {1,1} are allowed */ 1567 childState |= CEC_IN_FINITE_REPEAT; 1568 1569 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ 1570 if (env.backrefedMem == 0) { 1571 if (qn.target.getType() == NodeType.ENCLOSE) { 1572 EncloseNode en = (EncloseNode)qn.target; 1573 if (en.type == EncloseType.MEMORY) { 1574 if (en.target.getType() == NodeType.QTFR) { 1575 QuantifierNode q = (QuantifierNode)en.target; 1576 if (isRepeatInfinite(q.upper) && q.greedy == qn.greedy) { 1577 qn.upper = qn.lower == 0 ? 1 : qn.lower; 1578 if (qn.upper == 1) childState = state; 1579 } 1580 } 1581 } 1582 } 1583 } 1584 } 1585 } 1586 1587 if ((state & CEC_IN_FINITE_REPEAT) != 0) { 1588 qn.combExpCheckNum = -1; 1589 } else { 1590 if (isRepeatInfinite(qn.upper)) { 1591 varNum = CEC_INFINITE_NUM; 1592 childState |= CEC_IN_INFINITE_REPEAT; 1593 } else { 1594 varNum = qn.upper - qn.lower; 1595 } 1596 1597 if (varNum >= CEC_THRES_NUM_BIG_REPEAT) addState |= CEC_CONT_BIG_REPEAT; 1598 1599 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && varNum != 0) || 1600 ((state & CEC_CONT_BIG_REPEAT) != 0 && varNum >= CEC_THRES_NUM_BIG_REPEAT)) { 1601 if (qn.combExpCheckNum == 0) { 1602 env.numCombExpCheck++; 1603 qn.combExpCheckNum = env.numCombExpCheck; 1604 if (env.currMaxRegNum > env.combExpMaxRegNum) { 1605 env.combExpMaxRegNum = env.currMaxRegNum; 1606 } 1607 } 1608 } 1609 } 1610 r = setupCombExpCheck(qn.target, childState); 1611 r |= addState; 1612 break; 1613 1614 case NodeType.ENCLOSE: 1615 EncloseNode en = (EncloseNode)node; 1616 switch( en.type) { 1617 case EncloseNode.MEMORY: 1618 if (env.currMaxRegNum < en.regNum) { 1619 env.currMaxRegNum = en.regNum; 1620 } 1621 r = setupCombExpCheck(en.target, state); 1622 break; 1623 1624 default: 1625 r = setupCombExpCheck(en.target, state); 1626 } // inner switch 1627 break; 1628 1629 case NodeType.CALL: 1630 if (Config.USE_SUBEXP_CALL) { 1631 CallNode cn = (CallNode)node; 1632 if (cn.isRecursion()) { 1633 env.hasRecursion = true; 1634 } else { 1635 r = setupCombExpCheck(cn.target, state); 1636 } 1637 } // USE_SUBEXP_CALL 1638 break; 1639 1640 default: 1641 break; 1642 1643 } // switch 1644 1645 return r; 1646 } 1647 1648 private static final int IN_ALT = (1<<0); 1649 private static final int IN_NOT = (1<<1); 1650 private static final int IN_REPEAT = (1<<2); 1651 private static final int IN_VAR_REPEAT = (1<<3); 1652 private static final int EXPAND_STRING_MAX_LENGTH = 100; 1653 1654 /* setup_tree does the following work. 1655 1. check empty loop. (set qn->target_empty_info) 1656 2. expand ignore-case in char class. 1657 3. set memory status bit flags. (reg->mem_stats) 1658 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 1659 5. find invalid patterns in look-behind. 1660 6. expand repeated string. 1661 */ 1662 protected final Node setupTree(Node node, int state) { 1663 restart: while (true) { 1664 switch (node.getType()) { 1665 case NodeType.LIST: 1666 ConsAltNode lin = (ConsAltNode)node; 1667 Node prev = null; 1668 do { 1669 setupTree(lin.car, state); 1670 if (prev != null) { 1671 nextSetup(prev, lin.car); 1672 } 1673 prev = lin.car; 1674 } while ((lin = lin.cdr) != null); 1675 break; 1676 1677 case NodeType.ALT: 1678 ConsAltNode aln = (ConsAltNode)node; 1679 do { 1680 setupTree(aln.car, (state | IN_ALT)); 1681 } while ((aln = aln.cdr) != null); 1682 break; 1683 1684 case NodeType.CCLASS: 1685 break; 1686 1687 case NodeType.STR: 1688 if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) { 1689 node = expandCaseFoldString(node); 1690 } 1691 break; 1692 1693 case NodeType.CTYPE: 1694 case NodeType.CANY: 1695 break; 1696 1697 case NodeType.CALL: // if (Config.USE_SUBEXP_CALL) ? 1698 break; 1699 1700 case NodeType.BREF: 1701 BackRefNode br = (BackRefNode)node; 1702 for (int i=0; i<br.backNum; i++) { 1703 if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF); 1704 env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]); 1705 env.btMemStart = bsOnAt(env.btMemStart, br.back[i]); 1706 if (Config.USE_BACKREF_WITH_LEVEL) { 1707 if (br.isNestLevel()) { 1708 env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]); 1709 } 1710 } // USE_BACKREF_AT_LEVEL 1711 ((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed(); 1712 } 1713 break; 1714 1715 case NodeType.QTFR: 1716 QuantifierNode qn = (QuantifierNode)node; 1717 Node target = qn.target; 1718 1719 if ((state & IN_REPEAT) != 0) qn.setInRepeat(); 1720 1721 if (isRepeatInfinite(qn.upper) || qn.lower >= 1) { 1722 int d = getMinMatchLength(target); 1723 if (d == 0) { 1724 qn.targetEmptyInfo = TargetInfo.IS_EMPTY; 1725 if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { 1726 int info = quantifiersMemoryInfo(target); 1727 if (info > 0) qn.targetEmptyInfo = info; 1728 } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 1729 // strange stuff here (turned off) 1730 } 1731 } 1732 1733 state |= IN_REPEAT; 1734 if (qn.lower != qn.upper) state |= IN_VAR_REPEAT; 1735 1736 target = setupTree(target, state); 1737 1738 /* expand string */ 1739 if (target.getType() == NodeType.STR) { 1740 if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper && 1741 qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) { 1742 StringNode sn = (StringNode)target; 1743 int len = sn.length(); 1744 1745 if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) { 1746 StringNode str = qn.convertToString(sn.flag); 1747 int n = qn.lower; 1748 for (int i = 0; i < n; i++) { 1749 str.cat(sn.chars, sn.p, sn.end); 1750 } 1751 break; /* break case NT_QTFR: */ 1752 } 1753 1754 } 1755 } 1756 if (Config.USE_OP_PUSH_OR_JUMP_EXACT) { 1757 if (qn.greedy && qn.targetEmptyInfo != 0) { 1758 if (target.getType() == NodeType.QTFR) { 1759 QuantifierNode tqn = (QuantifierNode)target; 1760 if (tqn.headExact != null) { 1761 qn.headExact = tqn.headExact; 1762 tqn.headExact = null; 1763 } 1764 } else { 1765 qn.headExact = getHeadValueNode(qn.target, true); 1766 } 1767 } 1768 } // USE_OP_PUSH_OR_JUMP_EXACT 1769 break; 1770 1771 case NodeType.ENCLOSE: 1772 EncloseNode en = (EncloseNode)node; 1773 switch (en.type) { 1774 case EncloseType.OPTION: 1775 int options = regex.options; 1776 regex.options = en.option; 1777 setupTree(en.target, state); 1778 regex.options = options; 1779 break; 1780 1781 case EncloseType.MEMORY: 1782 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 1783 env.btMemStart = bsOnAt(env.btMemStart, en.regNum); 1784 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 1785 1786 } 1787 setupTree(en.target, state); 1788 break; 1789 1790 case EncloseType.STOP_BACKTRACK: 1791 setupTree(en.target, state); 1792 if (en.target.getType() == NodeType.QTFR) { 1793 QuantifierNode tqn = (QuantifierNode)en.target; 1794 if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) { 1795 /* (?>a*), a*+ etc... */ 1796 if (tqn.target.isSimple()) en.setStopBtSimpleRepeat(); 1797 } 1798 } 1799 break; 1800 1801 } // inner switch 1802 break; 1803 1804 case NodeType.ANCHOR: 1805 AnchorNode an = (AnchorNode)node; 1806 switch (an.type) { 1807 case AnchorType.PREC_READ: 1808 setupTree(an.target, state); 1809 break; 1810 1811 case AnchorType.PREC_READ_NOT: 1812 setupTree(an.target, (state | IN_NOT)); 1813 break; 1814 1815 case AnchorType.LOOK_BEHIND: 1816 if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1817 node = setupLookBehind(node); 1818 if (node.getType() != NodeType.ANCHOR) continue restart; 1819 setupTree(((AnchorNode)node).target, state); 1820 break; 1821 1822 case AnchorType.LOOK_BEHIND_NOT: 1823 if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1824 node = setupLookBehind(node); 1825 if (node.getType() != NodeType.ANCHOR) continue restart; 1826 setupTree(((AnchorNode)node).target, (state | IN_NOT)); 1827 break; 1828 1829 } // inner switch 1830 break; 1831 } // switch 1832 return node; 1833 } // restart: while 1834 } 1835 1836 private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5; 1837 private void optimizeNodeLeft(Node node, NodeOptInfo opt, OptEnvironment oenv) { // oenv remove, pass mmd 1838 opt.clear(); 1839 opt.setBoundNode(oenv.mmd); 1840 1841 switch (node.getType()) { 1842 case NodeType.LIST: { 1843 OptEnvironment nenv = new OptEnvironment(); 1844 NodeOptInfo nopt = new NodeOptInfo(); 1845 nenv.copy(oenv); 1846 ConsAltNode lin = (ConsAltNode)node; 1847 do { 1848 optimizeNodeLeft(lin.car, nopt, nenv); 1849 nenv.mmd.add(nopt.length); 1850 opt.concatLeftNode(nopt); 1851 } while ((lin = lin.cdr) != null); 1852 break; 1853 } 1854 1855 case NodeType.ALT: { 1856 NodeOptInfo nopt = new NodeOptInfo(); 1857 ConsAltNode aln = (ConsAltNode)node; 1858 do { 1859 optimizeNodeLeft(aln.car, nopt, oenv); 1860 if (aln == node) { 1861 opt.copy(nopt); 1862 } else { 1863 opt.altMerge(nopt, oenv); 1864 } 1865 } while ((aln = aln.cdr) != null); 1866 break; 1867 } 1868 1869 case NodeType.STR: { 1870 StringNode sn = (StringNode)node; 1871 1872 int slen = sn.length(); 1873 1874 if (!sn.isAmbig()) { 1875 opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); 1876 1877 if (slen > 0) { 1878 opt.map.addChar(sn.chars[sn.p]); 1879 } 1880 1881 opt.length.set(slen, slen); 1882 } else { 1883 int max; 1884 if (sn.isDontGetOptInfo()) { 1885 max = sn.length(); 1886 } else { 1887 opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); 1888 opt.exb.ignoreCase = true; 1889 1890 if (slen > 0) { 1891 opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag); 1892 } 1893 1894 max = slen; 1895 } 1896 opt.length.set(slen, max); 1897 } 1898 1899 if (opt.exb.length == slen) { 1900 opt.exb.reachEnd = true; 1901 } 1902 break; 1903 } 1904 1905 case NodeType.CCLASS: { 1906 CClassNode cc = (CClassNode)node; 1907 /* no need to check ignore case. (setted in setup_tree()) */ 1908 if (cc.mbuf != null || cc.isNot()) { 1909 opt.length.set(1, 1); 1910 } else { 1911 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 1912 boolean z = cc.bs.at(i); 1913 if ((z && !cc.isNot()) || (!z && cc.isNot())) { 1914 opt.map.addChar(i); 1915 } 1916 } 1917 opt.length.set(1, 1); 1918 } 1919 break; 1920 } 1921 1922 case NodeType.CTYPE: { 1923 int min; 1924 int max = 1; 1925 if (max == 1) { 1926 min = 1; 1927 CTypeNode cn = (CTypeNode)node; 1928 1929 switch (cn.ctype) { 1930 case CharacterType.WORD: 1931 if (cn.not) { 1932 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 1933 if (!EncodingHelper.isWord(i)) { 1934 opt.map.addChar(i); 1935 } 1936 } 1937 } else { 1938 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 1939 if (EncodingHelper.isWord(i)) { 1940 opt.map.addChar(i); 1941 } 1942 } 1943 } 1944 break; 1945 } // inner switch 1946 } else { 1947 min = 1; 1948 } 1949 opt.length.set(min, max); 1950 break; 1951 } 1952 1953 case NodeType.CANY: { 1954 opt.length.set(1, 1); 1955 break; 1956 } 1957 1958 case NodeType.ANCHOR: { 1959 AnchorNode an = (AnchorNode)node; 1960 switch (an.type) { 1961 case AnchorType.BEGIN_BUF: 1962 case AnchorType.BEGIN_POSITION: 1963 case AnchorType.BEGIN_LINE: 1964 case AnchorType.END_BUF: 1965 case AnchorType.SEMI_END_BUF: 1966 case AnchorType.END_LINE: 1967 opt.anchor.add(an.type); 1968 break; 1969 1970 case AnchorType.PREC_READ: 1971 NodeOptInfo nopt = new NodeOptInfo(); 1972 optimizeNodeLeft(an.target, nopt, oenv); 1973 if (nopt.exb.length > 0) { 1974 opt.expr.copy(nopt.exb); 1975 } else if (nopt.exm.length > 0) { 1976 opt.expr.copy(nopt.exm); 1977 } 1978 opt.expr.reachEnd = false; 1979 if (nopt.map.value > 0) opt.map.copy(nopt.map); 1980 break; 1981 1982 case AnchorType.PREC_READ_NOT: 1983 case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */ 1984 case AnchorType.LOOK_BEHIND_NOT: 1985 break; 1986 1987 } // inner switch 1988 break; 1989 } 1990 1991 case NodeType.BREF: { 1992 BackRefNode br = (BackRefNode)node; 1993 1994 if (br.isRecursion()) { 1995 opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); 1996 break; 1997 } 1998 1999 Node[]nodes = oenv.scanEnv.memNodes; 2000 2001 int min = getMinMatchLength(nodes[br.back[0]]); 2002 int max = getMaxMatchLength(nodes[br.back[0]]); 2003 2004 for (int i=1; i<br.backNum; i++) { 2005 int tmin = getMinMatchLength(nodes[br.back[i]]); 2006 int tmax = getMaxMatchLength(nodes[br.back[i]]); 2007 if (min > tmin) min = tmin; 2008 if (max < tmax) max = tmax; 2009 } 2010 opt.length.set(min, max); 2011 break; 2012 } 2013 2014 case NodeType.CALL: { 2015 if (Config.USE_SUBEXP_CALL) { 2016 CallNode cn = (CallNode)node; 2017 if (cn.isRecursion()) { 2018 opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); 2019 } else { 2020 int safe = oenv.options; 2021 oenv.options = ((EncloseNode)cn.target).option; 2022 optimizeNodeLeft(cn.target, opt, oenv); 2023 oenv.options = safe; 2024 } 2025 } // USE_SUBEXP_CALL 2026 break; 2027 } 2028 2029 case NodeType.QTFR: { 2030 NodeOptInfo nopt = new NodeOptInfo(); 2031 QuantifierNode qn = (QuantifierNode)node; 2032 optimizeNodeLeft(qn.target, nopt, oenv); 2033 if (qn.lower == 0 && isRepeatInfinite(qn.upper)) { 2034 if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) { 2035 if (isMultiline(oenv.options)) { 2036 opt.anchor.add(AnchorType.ANYCHAR_STAR_ML); 2037 } else { 2038 opt.anchor.add(AnchorType.ANYCHAR_STAR); 2039 } 2040 } 2041 } else { 2042 if (qn.lower > 0) { 2043 opt.copy(nopt); 2044 if (nopt.exb.length > 0) { 2045 if (nopt.exb.reachEnd) { 2046 int i; 2047 for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) { 2048 opt.exb.concat(nopt.exb); 2049 } 2050 if (i < qn.lower) { 2051 opt.exb.reachEnd = false; 2052 } 2053 } 2054 } 2055 if (qn.lower != qn.upper) { 2056 opt.exb.reachEnd = false; 2057 opt.exm.reachEnd = false; 2058 } 2059 if (qn.lower > 1) { 2060 opt.exm.reachEnd = false; 2061 } 2062 2063 } 2064 } 2065 int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower); 2066 int max; 2067 if (isRepeatInfinite(qn.upper)) { 2068 max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0; 2069 } else { 2070 max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper); 2071 } 2072 opt.length.set(min, max); 2073 break; 2074 } 2075 2076 case NodeType.ENCLOSE: { 2077 EncloseNode en = (EncloseNode)node; 2078 switch (en.type) { 2079 case EncloseType.OPTION: 2080 int save = oenv.options; 2081 oenv.options = en.option; 2082 optimizeNodeLeft(en.target, opt, oenv); 2083 oenv.options = save; 2084 break; 2085 2086 case EncloseType.MEMORY: 2087 if (Config.USE_SUBEXP_CALL && ++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) { 2088 int min = 0; 2089 int max = MinMaxLen.INFINITE_DISTANCE; 2090 if (en.isMinFixed()) min = en.minLength; 2091 if (en.isMaxFixed()) max = en.maxLength; 2092 opt.length.set(min, max); 2093 } else { // USE_SUBEXP_CALL 2094 optimizeNodeLeft(en.target, opt, oenv); 2095 if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) { 2096 if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) { 2097 opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK); 2098 } 2099 } 2100 } 2101 break; 2102 2103 case EncloseType.STOP_BACKTRACK: 2104 optimizeNodeLeft(en.target, opt, oenv); 2105 break; 2106 } // inner switch 2107 break; 2108 } 2109 2110 default: 2111 newInternalException(ERR_PARSER_BUG); 2112 } // switch 2113 } 2114 2115 protected final void setOptimizedInfoFromTree(Node node) { 2116 NodeOptInfo opt = new NodeOptInfo(); 2117 OptEnvironment oenv = new OptEnvironment(); 2118 2119 oenv.options = regex.options; 2120 oenv.caseFoldFlag = regex.caseFoldFlag; 2121 oenv.scanEnv = env; 2122 oenv.mmd.clear(); // ?? 2123 2124 optimizeNodeLeft(node, opt, oenv); 2125 2126 regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | 2127 AnchorType.BEGIN_POSITION | 2128 AnchorType.ANYCHAR_STAR | 2129 AnchorType.ANYCHAR_STAR_ML); 2130 2131 regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | 2132 AnchorType.SEMI_END_BUF); 2133 2134 if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { 2135 regex.anchorDmin = opt.length.min; 2136 regex.anchorDmax = opt.length.max; 2137 } 2138 2139 if (opt.exb.length > 0 || opt.exm.length > 0) { 2140 opt.exb.select(opt.exm); 2141 if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { 2142 // !goto set_map;! 2143 regex.setOptimizeMapInfo(opt.map); 2144 regex.setSubAnchor(opt.map.anchor); 2145 } else { 2146 regex.setExactInfo(opt.exb); 2147 regex.setSubAnchor(opt.exb.anchor); 2148 } 2149 } else if (opt.map.value > 0) { 2150 // !set_map:! 2151 regex.setOptimizeMapInfo(opt.map); 2152 regex.setSubAnchor(opt.map.anchor); 2153 } else { 2154 regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; 2155 if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; 2156 } 2157 2158 if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { 2159 Config.log.println(regex.optimizeInfoToString()); 2160 } 2161 } 2162 }