1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.runtime.regexp.joni; 21 22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll; 23 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 24 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt; 25 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; 26 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; 27 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline; 28 import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode; 29 import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; 30 31 import java.util.HashSet; 32 33 import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; 34 import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; 35 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 36 import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; 37 import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; 38 import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; 39 import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; 40 import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; 41 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 42 import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; 43 import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; 44 import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel; 45 import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; 46 import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr; 47 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 48 import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; 49 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 50 51 final class Analyser extends Parser { 52 53 protected Analyser(ScanEnvironment env, char[] chars, int p, int end) { 54 super(env, chars, p, end); 55 } 56 57 protected final void compile() { 58 if (Config.DEBUG) { 59 Config.log.println(new String(chars, getBegin(), getEnd())); 60 } 61 62 reset(); 63 64 regex.numMem = 0; 65 regex.numRepeat = 0; 66 regex.numNullCheck = 0; 67 //regex.repeatRangeAlloc = 0; 68 regex.repeatRangeLo = null; 69 regex.repeatRangeHi = null; 70 71 parse(); 72 73 if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) { 74 Config.log.println("<RAW TREE>"); 75 Config.log.println(root + "\n"); 76 } 77 78 root = setupTree(root, 0); 79 if (Config.DEBUG_PARSE_TREE) { 80 if (Config.DEBUG_PARSE_TREE_RAW) Config.log.println("<TREE>"); 81 root.verifyTree(new HashSet<Node>(), env.reg.warnings); 82 Config.log.println(root + "\n"); 83 } 84 85 regex.captureHistory = env.captureHistory; 86 regex.btMemStart = env.btMemStart; 87 regex.btMemEnd = env.btMemEnd; 88 89 if (isFindCondition(regex.options)) { 90 regex.btMemEnd = bsAll(); 91 } else { 92 regex.btMemEnd = env.btMemEnd; 93 regex.btMemEnd |= regex.captureHistory; 94 } 95 96 regex.clearOptimizeInfo(); 97 98 if (!Config.DONT_OPTIMIZE) setOptimizedInfoFromTree(root); 99 100 env.memNodes = null; 101 102 if (regex.numRepeat != 0 || regex.btMemEnd != 0) { 103 regex.stackPopLevel = StackPopLevel.ALL; 104 } else { 105 if (regex.btMemStart != 0) { 106 regex.stackPopLevel = StackPopLevel.MEM_START; 107 } else { 108 regex.stackPopLevel = StackPopLevel.FREE; 109 } 110 } 111 112 if (Config.DEBUG_COMPILE) { 113 Config.log.println("stack used: " + regex.stackNeeded); 114 if (Config.USE_STRING_TEMPLATES) Config.log.print("templates: " + regex.templateNum + "\n"); 115 Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); 116 117 } // DEBUG_COMPILE 118 } 119 120 private void swap(Node a, Node b) { 121 a.swap(b); 122 123 if (root == b) { 124 root = a; 125 } else if (root == a) { 126 root = b; 127 } 128 } 129 130 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 131 private int quantifiersMemoryInfo(Node node) { 132 int info = 0; 133 134 switch(node.getType()) { 135 case NodeType.LIST: 136 case NodeType.ALT: 137 ConsAltNode can = (ConsAltNode)node; 138 do { 139 int v = quantifiersMemoryInfo(can.car); 140 if (v > info) info = v; 141 } while ((can = can.cdr) != null); 142 break; 143 144 case NodeType.QTFR: 145 QuantifierNode qn = (QuantifierNode)node; 146 if (qn.upper != 0) { 147 info = quantifiersMemoryInfo(qn.target); 148 } 149 break; 150 151 case NodeType.ENCLOSE: 152 EncloseNode en = (EncloseNode)node; 153 switch (en.type) { 154 case EncloseType.MEMORY: 155 return TargetInfo.IS_EMPTY_MEM; 156 157 case EncloseType.OPTION: 158 case EncloseNode.STOP_BACKTRACK: 159 info = quantifiersMemoryInfo(en.target); 160 break; 161 162 default: 163 break; 164 } // inner switch 165 break; 166 167 case NodeType.BREF: 168 case NodeType.STR: 169 case NodeType.CTYPE: 170 case NodeType.CCLASS: 171 case NodeType.CANY: 172 case NodeType.ANCHOR: 173 default: 174 break; 175 } // switch 176 177 return info; 178 } 179 180 private int getMinMatchLength(Node node) { 181 int min = 0; 182 183 switch (node.getType()) { 184 case NodeType.BREF: 185 BackRefNode br = (BackRefNode)node; 186 if (br.isRecursion()) break; 187 188 if (br.backRef > env.numMem) { 189 throw new ValueException(ERR_INVALID_BACKREF); 190 } 191 min = getMinMatchLength(env.memNodes[br.backRef]); 192 193 break; 194 195 case NodeType.LIST: 196 ConsAltNode can = (ConsAltNode)node; 197 do { 198 min += getMinMatchLength(can.car); 199 } while ((can = can.cdr) != null); 200 break; 201 202 case NodeType.ALT: 203 ConsAltNode y = (ConsAltNode)node; 204 do { 205 Node x = y.car; 206 int tmin = getMinMatchLength(x); 207 if (y == node) { 208 min = tmin; 209 } else if (min > tmin) { 210 min = tmin; 211 } 212 } while ((y = y.cdr) != null); 213 break; 214 215 case NodeType.STR: 216 min = ((StringNode)node).length(); 217 break; 218 219 case NodeType.CTYPE: 220 min = 1; 221 break; 222 223 case NodeType.CCLASS: 224 case NodeType.CANY: 225 min = 1; 226 break; 227 228 case NodeType.QTFR: 229 QuantifierNode qn = (QuantifierNode)node; 230 if (qn.lower > 0) { 231 min = getMinMatchLength(qn.target); 232 min = MinMaxLen.distanceMultiply(min, qn.lower); 233 } 234 break; 235 236 case NodeType.ENCLOSE: 237 EncloseNode en = (EncloseNode)node; 238 switch (en.type) { 239 case EncloseType.MEMORY: 240 if (en.isMinFixed()) { 241 min = en.minLength; 242 } else { 243 min = getMinMatchLength(en.target); 244 en.minLength = min; 245 en.setMinFixed(); 246 } 247 break; 248 249 case EncloseType.OPTION: 250 case EncloseType.STOP_BACKTRACK: 251 min = getMinMatchLength(en.target); 252 break; 253 } // inner switch 254 break; 255 256 case NodeType.ANCHOR: 257 default: 258 break; 259 } // switch 260 261 return min; 262 } 263 264 private int getMaxMatchLength(Node node) { 265 int max = 0; 266 267 switch (node.getType()) { 268 case NodeType.LIST: 269 ConsAltNode ln = (ConsAltNode)node; 270 do { 271 int tmax = getMaxMatchLength(ln.car); 272 max = MinMaxLen.distanceAdd(max, tmax); 273 } while ((ln = ln.cdr) != null); 274 break; 275 276 case NodeType.ALT: 277 ConsAltNode an = (ConsAltNode)node; 278 do { 279 int tmax = getMaxMatchLength(an.car); 280 if (max < tmax) max = tmax; 281 } while ((an = an.cdr) != null); 282 break; 283 284 case NodeType.STR: 285 max = ((StringNode)node).length(); 286 break; 287 288 case NodeType.CTYPE: 289 max = 1; 290 break; 291 292 case NodeType.CCLASS: 293 case NodeType.CANY: 294 max = 1; 295 break; 296 297 case NodeType.BREF: 298 BackRefNode br = (BackRefNode)node; 299 if (br.isRecursion()) { 300 max = MinMaxLen.INFINITE_DISTANCE; 301 break; 302 } 303 304 if (br.backRef > env.numMem) { 305 throw new ValueException(ERR_INVALID_BACKREF); 306 } 307 int tmax = getMaxMatchLength(env.memNodes[br.backRef]); 308 if (max < tmax) max = tmax; 309 break; 310 311 case NodeType.QTFR: 312 QuantifierNode qn = (QuantifierNode)node; 313 if (qn.upper != 0) { 314 max = getMaxMatchLength(qn.target); 315 if (max != 0) { 316 if (!isRepeatInfinite(qn.upper)) { 317 max = MinMaxLen.distanceMultiply(max, qn.upper); 318 } else { 319 max = MinMaxLen.INFINITE_DISTANCE; 320 } 321 } 322 } 323 break; 324 325 case NodeType.ENCLOSE: 326 EncloseNode en = (EncloseNode)node; 327 switch (en.type) { 328 case EncloseType.MEMORY: 329 if (en.isMaxFixed()) { 330 max = en.maxLength; 331 } else { 332 max = getMaxMatchLength(en.target); 333 en.maxLength = max; 334 en.setMaxFixed(); 335 } 336 break; 337 338 case EncloseType.OPTION: 339 case EncloseType.STOP_BACKTRACK: 340 max = getMaxMatchLength(en.target); 341 break; 342 } // inner switch 343 break; 344 345 case NodeType.ANCHOR: 346 default: 347 break; 348 } // switch 349 350 return max; 351 } 352 353 private static final int GET_CHAR_LEN_VARLEN = -1; 354 private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2; 355 protected final int getCharLengthTree(Node node) { 356 return getCharLengthTree(node, 0); 357 } 358 359 private int getCharLengthTree(Node node, int level) { 360 level++; 361 362 int len = 0; 363 returnCode = 0; 364 365 switch(node.getType()) { 366 case NodeType.LIST: 367 ConsAltNode ln = (ConsAltNode)node; 368 do { 369 int tlen = getCharLengthTree(ln.car, level); 370 if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen); 371 } while (returnCode == 0 && (ln = ln.cdr) != null); 372 break; 373 374 case NodeType.ALT: 375 ConsAltNode an = (ConsAltNode)node; 376 boolean varLen = false; 377 378 int tlen = getCharLengthTree(an.car, level); 379 while (returnCode == 0 && (an = an.cdr) != null) { 380 int tlen2 = getCharLengthTree(an.car, level); 381 if (returnCode == 0) { 382 if (tlen != tlen2) varLen = true; 383 } 384 } 385 386 if (returnCode == 0) { 387 if (varLen) { 388 if (level == 1) { 389 returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN; 390 } else { 391 returnCode = GET_CHAR_LEN_VARLEN; 392 } 393 } else { 394 len = tlen; 395 } 396 } 397 break; 398 399 case NodeType.STR: 400 StringNode sn = (StringNode)node; 401 len = sn.length(); 402 break; 403 404 case NodeType.QTFR: 405 QuantifierNode qn = (QuantifierNode)node; 406 if (qn.lower == qn.upper) { 407 tlen = getCharLengthTree(qn.target, level); 408 if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower); 409 } else { 410 returnCode = GET_CHAR_LEN_VARLEN; 411 } 412 break; 413 414 case NodeType.CTYPE: 415 case NodeType.CCLASS: 416 case NodeType.CANY: 417 len = 1; 418 break; 419 420 case NodeType.ENCLOSE: 421 EncloseNode en = (EncloseNode)node; 422 switch(en.type) { 423 case EncloseType.MEMORY: 424 if (en.isCLenFixed()) { 425 len = en.charLength; 426 } else { 427 len = getCharLengthTree(en.target, level); 428 if (returnCode == 0) { 429 en.charLength = len; 430 en.setCLenFixed(); 431 } 432 } 433 break; 434 435 case EncloseType.OPTION: 436 case EncloseType.STOP_BACKTRACK: 437 len = getCharLengthTree(en.target, level); 438 break; 439 } // inner switch 440 break; 441 442 case NodeType.ANCHOR: 443 break; 444 445 default: 446 returnCode = GET_CHAR_LEN_VARLEN; 447 } // switch 448 return len; 449 } 450 451 /* x is not included y ==> 1 : 0 */ 452 private boolean isNotIncluded(Node x, Node y) { 453 Node tmp; 454 455 // !retry:! 456 retry: while(true) { 457 458 int yType = y.getType(); 459 460 switch(x.getType()) { 461 case NodeType.CTYPE: 462 switch(yType) { 463 464 case NodeType.CCLASS: 465 // !swap:! 466 tmp = x; 467 x = y; 468 y = tmp; 469 // !goto retry;! 470 continue retry; 471 472 case NodeType.STR: 473 // !goto swap;! 474 tmp = x; 475 x = y; 476 y = tmp; 477 continue retry; 478 479 default: 480 break; 481 } // inner switch 482 break; 483 484 case NodeType.CCLASS: 485 CClassNode xc = (CClassNode)x; 486 487 switch(yType) { 488 489 case NodeType.CCLASS: 490 CClassNode yc = (CClassNode)y; 491 492 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 493 boolean v = xc.bs.at(i); 494 if ((v && !xc.isNot()) || (!v && xc.isNot())) { 495 v = yc.bs.at(i); 496 if ((v && !yc.isNot()) || (!v && yc.isNot())) return false; 497 } 498 } 499 if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) return true; 500 return false; 501 // break; not reached 502 503 case NodeType.STR: 504 // !goto swap;! 505 tmp = x; 506 x = y; 507 y = tmp; 508 continue retry; 509 510 default: 511 break; 512 513 } // inner switch 514 break; // case NodeType.CCLASS 515 516 case NodeType.STR: 517 StringNode xs = (StringNode)x; 518 if (xs.length() == 0) break; 519 520 switch (yType) { 521 522 case NodeType.CCLASS: 523 CClassNode cc = (CClassNode)y; 524 int code = xs.chars[xs.p]; 525 return !cc.isCodeInCC(code); 526 527 case NodeType.STR: 528 StringNode ys = (StringNode)y; 529 int len = xs.length(); 530 if (len > ys.length()) len = ys.length(); 531 if (xs.isAmbig() || ys.isAmbig()) { 532 /* tiny version */ 533 return false; 534 } else { 535 for (int i=0, p=ys.p, q=xs.p; i<len; i++, p++, q++) { 536 if (ys.chars[p] != xs.chars[q]) return true; 537 } 538 } 539 break; 540 541 default: 542 break; 543 } // inner switch 544 545 break; // case NodeType.STR 546 547 } // switch 548 549 break; 550 } // retry: while 551 return false; 552 } 553 554 private Node getHeadValueNode(Node node, boolean exact) { 555 Node n = null; 556 557 switch(node.getType()) { 558 case NodeType.BREF: 559 case NodeType.ALT: 560 case NodeType.CANY: 561 break; 562 563 case NodeType.CTYPE: 564 case NodeType.CCLASS: 565 if (!exact) n = node; 566 break; 567 568 case NodeType.LIST: 569 n = getHeadValueNode(((ConsAltNode)node).car, exact); 570 break; 571 572 case NodeType.STR: 573 StringNode sn = (StringNode)node; 574 if (sn.end <= sn.p) break; // ??? 575 576 if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){ 577 // nothing 578 } else { 579 n = node; 580 } 581 break; 582 583 case NodeType.QTFR: 584 QuantifierNode qn = (QuantifierNode)node; 585 if (qn.lower > 0) { 586 if (qn.headExact != null) { 587 n = qn.headExact; 588 } else { 589 n = getHeadValueNode(qn.target, exact); 590 } 591 } 592 break; 593 594 case NodeType.ENCLOSE: 595 EncloseNode en = (EncloseNode)node; 596 597 switch (en.type) { 598 case EncloseType.OPTION: 599 int options = regex.options; 600 regex.options = en.option; 601 n = getHeadValueNode(en.target, exact); 602 regex.options = options; 603 break; 604 605 case EncloseType.MEMORY: 606 case EncloseType.STOP_BACKTRACK: 607 n = getHeadValueNode(en.target, exact); 608 break; 609 } // inner switch 610 break; 611 612 case NodeType.ANCHOR: 613 AnchorNode an = (AnchorNode)node; 614 if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact); 615 break; 616 617 default: 618 break; 619 } // switch 620 621 return n; 622 } 623 624 // true: invalid 625 private boolean checkTypeTree(Node node, int typeMask, int encloseMask, int anchorMask) { 626 if ((node.getType2Bit() & typeMask) == 0) return true; 627 628 boolean invalid = false; 629 630 switch(node.getType()) { 631 case NodeType.LIST: 632 case NodeType.ALT: 633 ConsAltNode can = (ConsAltNode)node; 634 do { 635 invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask); 636 } while (!invalid && (can = can.cdr) != null); 637 break; 638 639 case NodeType.QTFR: 640 invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask); 641 break; 642 643 case NodeType.ENCLOSE: 644 EncloseNode en = (EncloseNode)node; 645 if ((en.type & encloseMask) == 0) return true; 646 invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask); 647 break; 648 649 case NodeType.ANCHOR: 650 AnchorNode an = (AnchorNode)node; 651 if ((an.type & anchorMask) == 0) return true; 652 653 if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask); 654 break; 655 656 default: 657 break; 658 659 } // switch 660 661 return invalid; 662 } 663 664 /* divide different length alternatives in look-behind. 665 (?<=A|B) ==> (?<=A)|(?<=B) 666 (?<!A|B) ==> (?<!A)(?<!B) 667 */ 668 private Node divideLookBehindAlternatives(Node node) { 669 AnchorNode an = (AnchorNode)node; 670 int anchorType = an.type; 671 Node head = an.target; 672 Node np = ((ConsAltNode)head).car; 673 674 swap(node, head); 675 676 Node tmp = node; 677 node = head; 678 head = tmp; 679 680 ((ConsAltNode)node).setCar(head); 681 ((AnchorNode)head).setTarget(np); 682 np = node; 683 684 while ((np = ((ConsAltNode)np).cdr) != null) { 685 AnchorNode insert = new AnchorNode(anchorType); 686 insert.setTarget(((ConsAltNode)np).car); 687 ((ConsAltNode)np).setCar(insert); 688 } 689 690 if (anchorType == AnchorType.LOOK_BEHIND_NOT) { 691 np = node; 692 do { 693 ((ConsAltNode)np).toListNode(); /* alt -> list */ 694 } while ((np = ((ConsAltNode)np).cdr) != null); 695 } 696 697 return node; 698 } 699 700 private Node setupLookBehind(Node node) { 701 AnchorNode an = (AnchorNode)node; 702 int len = getCharLengthTree(an.target); 703 switch(returnCode) { 704 case 0: 705 an.charLength = len; 706 break; 707 case GET_CHAR_LEN_VARLEN: 708 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 709 case GET_CHAR_LEN_TOP_ALT_VARLEN: 710 if (syntax.differentLengthAltLookBehind()) { 711 return divideLookBehindAlternatives(node); 712 } else { 713 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 714 } 715 } 716 return node; 717 } 718 719 private void nextSetup(Node node, Node nextNode) { 720 // retry: 721 retry: while(true) { 722 723 int type = node.getType(); 724 if (type == NodeType.QTFR) { 725 QuantifierNode qn = (QuantifierNode)node; 726 if (qn.greedy && isRepeatInfinite(qn.upper)) { 727 if (Config.USE_QTFR_PEEK_NEXT) { 728 StringNode n = (StringNode)getHeadValueNode(nextNode, true); 729 /* '\0': for UTF-16BE etc... */ 730 if (n != null && n.chars[n.p] != 0) { // ????????? 731 qn.nextHeadExact = n; 732 } 733 } // USE_QTFR_PEEK_NEXT 734 /* automatic posseivation a*b ==> (?>a*)b */ 735 if (qn.lower <= 1) { 736 if (qn.target.isSimple()) { 737 Node x = getHeadValueNode(qn.target, false); 738 if (x != null) { 739 Node y = getHeadValueNode(nextNode, false); 740 if (y != null && isNotIncluded(x, y)) { 741 EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose 742 en.setStopBtSimpleRepeat(); 743 //en.setTarget(qn.target); // optimize it ?? 744 swap(node, en); 745 746 en.setTarget(node); 747 } 748 } 749 } 750 } 751 } 752 } else if (type == NodeType.ENCLOSE) { 753 EncloseNode en = (EncloseNode)node; 754 if (en.isMemory()) { 755 node = en.target; 756 // !goto retry;! 757 continue retry; 758 } 759 } 760 761 break; 762 } // while 763 } 764 765 private void updateStringNodeCaseFoldMultiByte(StringNode sn) { 766 char[] chars = sn.chars; 767 int end = sn.end; 768 value = sn.p; 769 int sp = 0; 770 char buf; 771 772 while (value < end) { 773 int ovalue = value; 774 buf = Character.toLowerCase(chars[value++]); 775 776 if (chars[ovalue] != buf) { 777 778 char[] sbuf = new char[sn.length() << 1]; 779 System.arraycopy(chars, sn.p, sbuf, 0, ovalue - sn.p); 780 value = ovalue; 781 while (value < end) { 782 buf = Character.toLowerCase(chars[value++]); 783 if (sp >= sbuf.length) { 784 char[]tmp = new char[sbuf.length << 1]; 785 System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); 786 sbuf = tmp; 787 } 788 sbuf[sp++] = buf; 789 } 790 sn.set(sbuf, 0, sp); 791 return; 792 } 793 sp++; 794 } 795 } 796 797 private void updateStringNodeCaseFold(Node node) { 798 StringNode sn = (StringNode)node; 799 updateStringNodeCaseFoldMultiByte(sn); 800 } 801 802 private Node expandCaseFoldMakeRemString(char[] chars, int p, int end) { 803 StringNode node = new StringNode(chars, p, end); 804 805 updateStringNodeCaseFold(node); 806 node.setAmbig(); 807 node.setDontGetOptInfo(); 808 return node; 809 } 810 811 private boolean expandCaseFoldStringAlt(int itemNum, char[] items, 812 char[] chars, int p, int slen, int end, ObjPtr<Node> node) { 813 814 ConsAltNode altNode; 815 node.p = altNode = newAltNode(null, null); 816 817 StringNode snode = new StringNode(chars, p, p + slen); 818 altNode.setCar(snode); 819 820 for (int i=0; i<itemNum; i++) { 821 snode = new StringNode(); 822 823 snode.catCode(items[i]); 824 825 ConsAltNode an = newAltNode(null, null); 826 an.setCar(snode); 827 altNode.setCdr(an); 828 altNode = an; 829 } 830 return false; 831 } 832 833 private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8; 834 private Node expandCaseFoldString(Node node) { 835 StringNode sn = (StringNode)node; 836 837 if (sn.isAmbig() || sn.length() <= 0) return node; 838 839 char[] chars = sn.chars; 840 int p = sn.p; 841 int end = sn.end; 842 int altNum = 1; 843 844 ConsAltNode topRoot = null, root = null; 845 ObjPtr<Node> prevNode = new ObjPtr<Node>(); 846 StringNode stringNode = null; 847 848 while (p < end) { 849 char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars[p]); 850 851 if (items.length == 0) { 852 if (stringNode == null) { 853 if (root == null && prevNode.p != null) { 854 topRoot = root = ConsAltNode.listAdd(null, prevNode.p); 855 } 856 857 prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL); 858 859 if (root != null) ConsAltNode.listAdd(root, stringNode); 860 861 } 862 863 stringNode.cat(chars, p, p + 1); 864 } else { 865 altNum *= (items.length + 1); 866 if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; 867 868 if (root == null && prevNode.p != null) { 869 topRoot = root = ConsAltNode.listAdd(null, prevNode.p); 870 } 871 872 expandCaseFoldStringAlt(items.length, items, chars, p, 1, end, prevNode); 873 if (root != null) ConsAltNode.listAdd(root, prevNode.p); 874 stringNode = null; 875 } 876 p++; 877 } 878 879 if (p < end) { 880 Node srem = expandCaseFoldMakeRemString(chars, p, end); 881 882 if (prevNode.p != null && root == null) { 883 topRoot = root = ConsAltNode.listAdd(null, prevNode.p); 884 } 885 886 if (root == null) { 887 prevNode.p = srem; 888 } else { 889 ConsAltNode.listAdd(root, srem); 890 } 891 } 892 /* ending */ 893 Node xnode = topRoot != null ? topRoot : prevNode.p; 894 895 swap(node, xnode); 896 return xnode; 897 } 898 899 private static final int IN_ALT = (1<<0); 900 private static final int IN_NOT = (1<<1); 901 private static final int IN_REPEAT = (1<<2); 902 private static final int IN_VAR_REPEAT = (1<<3); 903 private static final int EXPAND_STRING_MAX_LENGTH = 100; 904 905 /* setup_tree does the following work. 906 1. check empty loop. (set qn->target_empty_info) 907 2. expand ignore-case in char class. 908 3. set memory status bit flags. (reg->mem_stats) 909 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 910 5. find invalid patterns in look-behind. 911 6. expand repeated string. 912 */ 913 protected final Node setupTree(Node node, int state) { 914 restart: while (true) { 915 switch (node.getType()) { 916 case NodeType.LIST: 917 ConsAltNode lin = (ConsAltNode)node; 918 Node prev = null; 919 do { 920 setupTree(lin.car, state); 921 if (prev != null) { 922 nextSetup(prev, lin.car); 923 } 924 prev = lin.car; 925 } while ((lin = lin.cdr) != null); 926 break; 927 928 case NodeType.ALT: 929 ConsAltNode aln = (ConsAltNode)node; 930 do { 931 setupTree(aln.car, (state | IN_ALT)); 932 } while ((aln = aln.cdr) != null); 933 break; 934 935 case NodeType.CCLASS: 936 break; 937 938 case NodeType.STR: 939 if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) { 940 node = expandCaseFoldString(node); 941 } 942 break; 943 944 case NodeType.CTYPE: 945 case NodeType.CANY: 946 break; 947 948 case NodeType.BREF: 949 BackRefNode br = (BackRefNode)node; 950 if (br.backRef > env.numMem) { 951 throw new ValueException(ERR_INVALID_BACKREF); 952 } 953 env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef); 954 env.btMemStart = bsOnAt(env.btMemStart, br.backRef); 955 ((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed(); 956 break; 957 958 case NodeType.QTFR: 959 QuantifierNode qn = (QuantifierNode)node; 960 Node target = qn.target; 961 962 if ((state & IN_REPEAT) != 0) qn.setInRepeat(); 963 964 if (isRepeatInfinite(qn.upper) || qn.lower >= 1) { 965 int d = getMinMatchLength(target); 966 if (d == 0) { 967 qn.targetEmptyInfo = TargetInfo.IS_EMPTY; 968 if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { 969 int info = quantifiersMemoryInfo(target); 970 if (info > 0) qn.targetEmptyInfo = info; 971 } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 972 // strange stuff here (turned off) 973 } 974 } 975 976 state |= IN_REPEAT; 977 if (qn.lower != qn.upper) state |= IN_VAR_REPEAT; 978 979 target = setupTree(target, state); 980 981 /* expand string */ 982 if (target.getType() == NodeType.STR) { 983 if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper && 984 qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) { 985 StringNode sn = (StringNode)target; 986 int len = sn.length(); 987 988 if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) { 989 StringNode str = qn.convertToString(sn.flag); 990 int n = qn.lower; 991 for (int i = 0; i < n; i++) { 992 str.cat(sn.chars, sn.p, sn.end); 993 } 994 break; /* break case NT_QTFR: */ 995 } 996 997 } 998 } 999 if (Config.USE_OP_PUSH_OR_JUMP_EXACT) { 1000 if (qn.greedy && qn.targetEmptyInfo != 0) { 1001 if (target.getType() == NodeType.QTFR) { 1002 QuantifierNode tqn = (QuantifierNode)target; 1003 if (tqn.headExact != null) { 1004 qn.headExact = tqn.headExact; 1005 tqn.headExact = null; 1006 } 1007 } else { 1008 qn.headExact = getHeadValueNode(qn.target, true); 1009 } 1010 } 1011 } // USE_OP_PUSH_OR_JUMP_EXACT 1012 break; 1013 1014 case NodeType.ENCLOSE: 1015 EncloseNode en = (EncloseNode)node; 1016 switch (en.type) { 1017 case EncloseType.OPTION: 1018 int options = regex.options; 1019 regex.options = en.option; 1020 setupTree(en.target, state); 1021 regex.options = options; 1022 break; 1023 1024 case EncloseType.MEMORY: 1025 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 1026 env.btMemStart = bsOnAt(env.btMemStart, en.regNum); 1027 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 1028 1029 } 1030 setupTree(en.target, state); 1031 break; 1032 1033 case EncloseType.STOP_BACKTRACK: 1034 setupTree(en.target, state); 1035 if (en.target.getType() == NodeType.QTFR) { 1036 QuantifierNode tqn = (QuantifierNode)en.target; 1037 if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) { 1038 /* (?>a*), a*+ etc... */ 1039 if (tqn.target.isSimple()) en.setStopBtSimpleRepeat(); 1040 } 1041 } 1042 break; 1043 1044 } // inner switch 1045 break; 1046 1047 case NodeType.ANCHOR: 1048 AnchorNode an = (AnchorNode)node; 1049 switch (an.type) { 1050 case AnchorType.PREC_READ: 1051 setupTree(an.target, state); 1052 break; 1053 1054 case AnchorType.PREC_READ_NOT: 1055 setupTree(an.target, (state | IN_NOT)); 1056 break; 1057 1058 case AnchorType.LOOK_BEHIND: 1059 if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) { 1060 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1061 } 1062 node = setupLookBehind(node); 1063 if (node.getType() != NodeType.ANCHOR) continue restart; 1064 setupTree(((AnchorNode)node).target, state); 1065 break; 1066 1067 case AnchorType.LOOK_BEHIND_NOT: 1068 if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) { 1069 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1070 } 1071 node = setupLookBehind(node); 1072 if (node.getType() != NodeType.ANCHOR) continue restart; 1073 setupTree(((AnchorNode)node).target, (state | IN_NOT)); 1074 break; 1075 1076 } // inner switch 1077 break; 1078 } // switch 1079 return node; 1080 } // restart: while 1081 } 1082 1083 private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5; 1084 private void optimizeNodeLeft(Node node, NodeOptInfo opt, OptEnvironment oenv) { // oenv remove, pass mmd 1085 opt.clear(); 1086 opt.setBoundNode(oenv.mmd); 1087 1088 switch (node.getType()) { 1089 case NodeType.LIST: { 1090 OptEnvironment nenv = new OptEnvironment(); 1091 NodeOptInfo nopt = new NodeOptInfo(); 1092 nenv.copy(oenv); 1093 ConsAltNode lin = (ConsAltNode)node; 1094 do { 1095 optimizeNodeLeft(lin.car, nopt, nenv); 1096 nenv.mmd.add(nopt.length); 1097 opt.concatLeftNode(nopt); 1098 } while ((lin = lin.cdr) != null); 1099 break; 1100 } 1101 1102 case NodeType.ALT: { 1103 NodeOptInfo nopt = new NodeOptInfo(); 1104 ConsAltNode aln = (ConsAltNode)node; 1105 do { 1106 optimizeNodeLeft(aln.car, nopt, oenv); 1107 if (aln == node) { 1108 opt.copy(nopt); 1109 } else { 1110 opt.altMerge(nopt, oenv); 1111 } 1112 } while ((aln = aln.cdr) != null); 1113 break; 1114 } 1115 1116 case NodeType.STR: { 1117 StringNode sn = (StringNode)node; 1118 1119 int slen = sn.length(); 1120 1121 if (!sn.isAmbig()) { 1122 opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); 1123 1124 if (slen > 0) { 1125 opt.map.addChar(sn.chars[sn.p]); 1126 } 1127 1128 opt.length.set(slen, slen); 1129 } else { 1130 int max; 1131 if (sn.isDontGetOptInfo()) { 1132 max = sn.length(); 1133 } else { 1134 opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); 1135 opt.exb.ignoreCase = true; 1136 1137 if (slen > 0) { 1138 opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag); 1139 } 1140 1141 max = slen; 1142 } 1143 opt.length.set(slen, max); 1144 } 1145 1146 if (opt.exb.length == slen) { 1147 opt.exb.reachEnd = true; 1148 } 1149 break; 1150 } 1151 1152 case NodeType.CCLASS: { 1153 CClassNode cc = (CClassNode)node; 1154 /* no need to check ignore case. (setted in setup_tree()) */ 1155 if (cc.mbuf != null || cc.isNot()) { 1156 opt.length.set(1, 1); 1157 } else { 1158 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 1159 boolean z = cc.bs.at(i); 1160 if ((z && !cc.isNot()) || (!z && cc.isNot())) { 1161 opt.map.addChar(i); 1162 } 1163 } 1164 opt.length.set(1, 1); 1165 } 1166 break; 1167 } 1168 1169 case NodeType.CANY: { 1170 opt.length.set(1, 1); 1171 break; 1172 } 1173 1174 case NodeType.ANCHOR: { 1175 AnchorNode an = (AnchorNode)node; 1176 switch (an.type) { 1177 case AnchorType.BEGIN_BUF: 1178 case AnchorType.BEGIN_POSITION: 1179 case AnchorType.BEGIN_LINE: 1180 case AnchorType.END_BUF: 1181 case AnchorType.SEMI_END_BUF: 1182 case AnchorType.END_LINE: 1183 opt.anchor.add(an.type); 1184 break; 1185 1186 case AnchorType.PREC_READ: 1187 NodeOptInfo nopt = new NodeOptInfo(); 1188 optimizeNodeLeft(an.target, nopt, oenv); 1189 if (nopt.exb.length > 0) { 1190 opt.expr.copy(nopt.exb); 1191 } else if (nopt.exm.length > 0) { 1192 opt.expr.copy(nopt.exm); 1193 } 1194 opt.expr.reachEnd = false; 1195 if (nopt.map.value > 0) opt.map.copy(nopt.map); 1196 break; 1197 1198 case AnchorType.PREC_READ_NOT: 1199 case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */ 1200 case AnchorType.LOOK_BEHIND_NOT: 1201 break; 1202 1203 } // inner switch 1204 break; 1205 } 1206 1207 case NodeType.BREF: { 1208 BackRefNode br = (BackRefNode)node; 1209 1210 if (br.isRecursion()) { 1211 opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); 1212 break; 1213 } 1214 1215 Node[]nodes = oenv.scanEnv.memNodes; 1216 1217 int min = getMinMatchLength(nodes[br.backRef]); 1218 int max = getMaxMatchLength(nodes[br.backRef]); 1219 1220 opt.length.set(min, max); 1221 break; 1222 } 1223 1224 1225 case NodeType.QTFR: { 1226 NodeOptInfo nopt = new NodeOptInfo(); 1227 QuantifierNode qn = (QuantifierNode)node; 1228 optimizeNodeLeft(qn.target, nopt, oenv); 1229 if (qn.lower == 0 && isRepeatInfinite(qn.upper)) { 1230 if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) { 1231 if (isMultiline(oenv.options)) { 1232 opt.anchor.add(AnchorType.ANYCHAR_STAR_ML); 1233 } else { 1234 opt.anchor.add(AnchorType.ANYCHAR_STAR); 1235 } 1236 } 1237 } else { 1238 if (qn.lower > 0) { 1239 opt.copy(nopt); 1240 if (nopt.exb.length > 0) { 1241 if (nopt.exb.reachEnd) { 1242 int i; 1243 for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) { 1244 opt.exb.concat(nopt.exb); 1245 } 1246 if (i < qn.lower) { 1247 opt.exb.reachEnd = false; 1248 } 1249 } 1250 } 1251 if (qn.lower != qn.upper) { 1252 opt.exb.reachEnd = false; 1253 opt.exm.reachEnd = false; 1254 } 1255 if (qn.lower > 1) { 1256 opt.exm.reachEnd = false; 1257 } 1258 1259 } 1260 } 1261 int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower); 1262 int max; 1263 if (isRepeatInfinite(qn.upper)) { 1264 max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0; 1265 } else { 1266 max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper); 1267 } 1268 opt.length.set(min, max); 1269 break; 1270 } 1271 1272 case NodeType.ENCLOSE: { 1273 EncloseNode en = (EncloseNode)node; 1274 switch (en.type) { 1275 case EncloseType.OPTION: 1276 int save = oenv.options; 1277 oenv.options = en.option; 1278 optimizeNodeLeft(en.target, opt, oenv); 1279 oenv.options = save; 1280 break; 1281 1282 case EncloseType.MEMORY: 1283 if (++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) { 1284 int min = 0; 1285 int max = MinMaxLen.INFINITE_DISTANCE; 1286 if (en.isMinFixed()) min = en.minLength; 1287 if (en.isMaxFixed()) max = en.maxLength; 1288 opt.length.set(min, max); 1289 } else { // USE_SUBEXP_CALL 1290 optimizeNodeLeft(en.target, opt, oenv); 1291 if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) { 1292 if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) { 1293 opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK); 1294 } 1295 } 1296 } 1297 break; 1298 1299 case EncloseType.STOP_BACKTRACK: 1300 optimizeNodeLeft(en.target, opt, oenv); 1301 break; 1302 } // inner switch 1303 break; 1304 } 1305 1306 default: 1307 throw new InternalException(ERR_PARSER_BUG); 1308 } // switch 1309 } 1310 1311 protected final void setOptimizedInfoFromTree(Node node) { 1312 NodeOptInfo opt = new NodeOptInfo(); 1313 OptEnvironment oenv = new OptEnvironment(); 1314 1315 oenv.options = regex.options; 1316 oenv.caseFoldFlag = regex.caseFoldFlag; 1317 oenv.scanEnv = env; 1318 oenv.mmd.clear(); // ?? 1319 1320 optimizeNodeLeft(node, opt, oenv); 1321 1322 regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | 1323 AnchorType.BEGIN_POSITION | 1324 AnchorType.ANYCHAR_STAR | 1325 AnchorType.ANYCHAR_STAR_ML); 1326 1327 regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | 1328 AnchorType.SEMI_END_BUF); 1329 1330 if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { 1331 regex.anchorDmin = opt.length.min; 1332 regex.anchorDmax = opt.length.max; 1333 } 1334 1335 if (opt.exb.length > 0 || opt.exm.length > 0) { 1336 opt.exb.select(opt.exm); 1337 if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { 1338 // !goto set_map;! 1339 regex.setOptimizeMapInfo(opt.map); 1340 regex.setSubAnchor(opt.map.anchor); 1341 } else { 1342 regex.setExactInfo(opt.exb); 1343 regex.setSubAnchor(opt.exb.anchor); 1344 } 1345 } else if (opt.map.value > 0) { 1346 // !set_map:! 1347 regex.setOptimizeMapInfo(opt.map); 1348 regex.setSubAnchor(opt.map.anchor); 1349 } else { 1350 regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; 1351 if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; 1352 } 1353 1354 if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { 1355 Config.log.println(regex.optimizeInfoToString()); 1356 } 1357 } 1358 } --- EOF ---