1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.runtime.regexp.joni; 21 22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 23 import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine; 24 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; 25 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; 26 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty; 27 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol; 28 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol; 29 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion; 30 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 31 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; 32 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 33 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 34 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 35 36 class ByteCodeMachine extends StackMachine { 37 private int bestLen; // return value 38 private int s = 0; // current char 39 40 private int range; // right range 41 private int sprev; 42 private int sstart; 43 private int sbegin; 44 45 private final int[] code; // byte code 46 private int ip; // instruction pointer 47 48 ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end) { 49 super(regex, chars, p, end); 50 this.code = regex.code; 51 } 52 53 private boolean stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd) { 54 int s1 = s1p; 55 int s2 = ps2.value; 56 final int end1 = s1 + mbLen; 57 58 while (s1 < end1) { 59 final char c1 = EncodingHelper.toLowerCase(chars[s1++]); 60 final char c2 = EncodingHelper.toLowerCase(chars[s2++]); 61 62 if (c1 != c2) { 63 return false; 64 } 65 } 66 ps2.value = s2; 67 return true; 68 } 69 70 private void debugMatchBegin() { 71 Config.log.println("match_at: " + 72 "str: " + str + 73 ", end: " + end + 74 ", start: " + this.sstart + 75 ", sprev: " + this.sprev); 76 Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str)); 77 } 78 79 private void debugMatchLoop() { 80 if (Config.DEBUG_MATCH) { 81 Config.log.printf("%4d", (s - str)).print("> \""); 82 int q, i; 83 for (i=0, q=s; i<7 && q<end && s>=0; i++) { 84 if (q < end) { 85 Config.log.print(new String(new char[]{chars[q++]})); 86 } 87 } 88 final String string = q < end ? "...\"" : "\""; 89 q += string.length(); 90 Config.log.print(string); 91 for (i=0; i<20-(q-s);i++) { 92 Config.log.print(" "); 93 } 94 final StringBuilder sb = new StringBuilder(); 95 new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip); 96 Config.log.println(sb.toString()); 97 } 98 } 99 100 @Override 101 protected final int matchAt(final int r, final int ss, final int sp) { 102 this.range = r; 103 this.sstart = ss; 104 this.sprev = sp; 105 106 stk = 0; 107 ip = 0; 108 109 if (Config.DEBUG_MATCH) { 110 debugMatchBegin(); 111 } 112 113 init(); 114 115 bestLen = -1; 116 s = ss; 117 118 final int[] c = this.code; 119 while (true) { 120 if (Config.DEBUG_MATCH) { 121 debugMatchLoop(); 122 } 123 124 sbegin = s; 125 switch (c[ip++]) { 126 case OPCode.END: if (opEnd()) { 127 return finish(); 128 } break; 129 case OPCode.EXACT1: opExact1(); break; 130 case OPCode.EXACT2: opExact2(); continue; 131 case OPCode.EXACT3: opExact3(); continue; 132 case OPCode.EXACT4: opExact4(); continue; 133 case OPCode.EXACT5: opExact5(); continue; 134 case OPCode.EXACTN: opExactN(); continue; 135 136 case OPCode.EXACT1_IC: opExact1IC(); break; 137 case OPCode.EXACTN_IC: opExactNIC(); continue; 138 139 case OPCode.CCLASS: opCClass(); break; 140 case OPCode.CCLASS_MB: opCClassMB(); break; 141 case OPCode.CCLASS_MIX: opCClassMIX(); break; 142 case OPCode.CCLASS_NOT: opCClassNot(); break; 143 case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break; 144 case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break; 145 case OPCode.CCLASS_NODE: opCClassNode(); break; 146 147 case OPCode.ANYCHAR: opAnyChar(); break; 148 case OPCode.ANYCHAR_ML: opAnyCharML(); break; 149 case OPCode.ANYCHAR_STAR: opAnyCharStar(); break; 150 case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break; 151 case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break; 152 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break; 153 154 case OPCode.WORD: opWord(); break; 155 case OPCode.NOT_WORD: opNotWord(); break; 156 case OPCode.WORD_BOUND: opWordBound(); continue; 157 case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue; 158 case OPCode.WORD_BEGIN: opWordBegin(); continue; 159 case OPCode.WORD_END: opWordEnd(); continue; 160 161 case OPCode.BEGIN_BUF: opBeginBuf(); continue; 162 case OPCode.END_BUF: opEndBuf(); continue; 163 case OPCode.BEGIN_LINE: opBeginLine(); continue; 164 case OPCode.END_LINE: opEndLine(); continue; 165 case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; 166 case OPCode.BEGIN_POSITION: opBeginPosition(); continue; 167 168 case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; 169 case OPCode.MEMORY_START: opMemoryStart(); continue; 170 case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; 171 case OPCode.MEMORY_END: opMemoryEnd(); continue; 172 case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; 173 case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; 174 175 case OPCode.BACKREF1: opBackRef1(); continue; 176 case OPCode.BACKREF2: opBackRef2(); continue; 177 case OPCode.BACKREFN: opBackRefN(); continue; 178 case OPCode.BACKREFN_IC: opBackRefNIC(); continue; 179 case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; 180 case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; 181 case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; 182 183 case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; 184 case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; 185 case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; 186 187 case OPCode.JUMP: opJump(); continue; 188 case OPCode.PUSH: opPush(); continue; 189 190 case OPCode.POP: opPop(); continue; 191 case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; 192 case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; 193 194 case OPCode.REPEAT: opRepeat(); continue; 195 case OPCode.REPEAT_NG: opRepeatNG(); continue; 196 case OPCode.REPEAT_INC: opRepeatInc(); continue; 197 case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; 198 case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; 199 case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; 200 201 case OPCode.PUSH_POS: opPushPos(); continue; 202 case OPCode.POP_POS: opPopPos(); continue; 203 case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; 204 case OPCode.FAIL_POS: opFailPos(); continue; 205 case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; 206 case OPCode.POP_STOP_BT: opPopStopBT(); continue; 207 208 case OPCode.LOOK_BEHIND: opLookBehind(); continue; 209 case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; 210 case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; 211 212 case OPCode.FINISH: 213 return finish(); 214 215 case OPCode.FAIL: opFail(); continue; 216 217 default: 218 throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE); 219 220 } // main switch 221 } // main while 222 } 223 224 private boolean opEnd() { 225 final int n = s - sstart; 226 227 if (n > bestLen) { 228 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 229 if (isFindLongest(regex.options)) { 230 if (n > msaBestLen) { 231 msaBestLen = n; 232 msaBestS = sstart; 233 } else { 234 // goto end_best_len; 235 return endBestLength(); 236 } 237 } 238 } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 239 240 bestLen = n; 241 final Region region = msaRegion; 242 if (region != null) { 243 // USE_POSIX_REGION_OPTION ... else ... 244 region.beg[0] = msaBegin = sstart - str; 245 region.end[0] = msaEnd = s - str; 246 for (int i = 1; i <= regex.numMem; i++) { 247 // opt! 248 if (repeatStk[memEndStk + i] != INVALID_INDEX) { 249 region.beg[i] = bsAt(regex.btMemStart, i) ? 250 stack[repeatStk[memStartStk + i]].getMemPStr() - str : 251 repeatStk[memStartStk + i] - str; 252 253 254 region.end[i] = bsAt(regex.btMemEnd, i) ? 255 stack[repeatStk[memEndStk + i]].getMemPStr() : 256 repeatStk[memEndStk + i] - str; 257 258 } else { 259 region.beg[i] = region.end[i] = Region.REGION_NOTPOS; 260 } 261 262 } 263 264 } else { 265 msaBegin = sstart - str; 266 msaEnd = s - str; 267 } 268 } else { 269 final Region region = msaRegion; 270 if (Config.USE_POSIX_API_REGION_OPTION) { 271 if (!isPosixRegion(regex.options)) { 272 if (region != null) { 273 region.clear(); 274 } else { 275 msaBegin = msaEnd = 0; 276 } 277 } 278 } else { 279 if (region != null) { 280 region.clear(); 281 } else { 282 msaBegin = msaEnd = 0; 283 } 284 } // USE_POSIX_REGION_OPTION 285 } 286 // end_best_len: 287 /* default behavior: return first-matching result. */ 288 return endBestLength(); 289 } 290 291 private boolean endBestLength() { 292 if (isFindCondition(regex.options)) { 293 if (isFindNotEmpty(regex.options) && s == sstart) { 294 bestLen = -1; 295 {opFail(); return false;} /* for retry */ 296 } 297 if (isFindLongest(regex.options) && s < range) { 298 {opFail(); return false;} /* for retry */ 299 } 300 } 301 // goto finish; 302 return true; 303 } 304 305 private void opExact1() { 306 if (s >= range || code[ip] != chars[s++]) {opFail(); return;} 307 //if (s > range) {opFail(); return;} 308 ip++; 309 sprev = sbegin; // break; 310 } 311 312 private void opExact2() { 313 if (s + 2 > range) {opFail(); return;} 314 if (code[ip] != chars[s]) {opFail(); return;} 315 ip++; s++; 316 if (code[ip] != chars[s]) {opFail(); return;} 317 sprev = s; 318 ip++; s++; 319 } 320 321 private void opExact3() { 322 if (s + 3 > range) {opFail(); return;} 323 if (code[ip] != chars[s]) {opFail(); return;} 324 ip++; s++; 325 if (code[ip] != chars[s]) {opFail(); return;} 326 ip++; s++; 327 if (code[ip] != chars[s]) {opFail(); return;} 328 sprev = s; 329 ip++; s++; 330 } 331 332 private void opExact4() { 333 if (s + 4 > range) {opFail(); return;} 334 if (code[ip] != chars[s]) {opFail(); return;} 335 ip++; s++; 336 if (code[ip] != chars[s]) {opFail(); return;} 337 ip++; s++; 338 if (code[ip] != chars[s]) {opFail(); return;} 339 ip++; s++; 340 if (code[ip] != chars[s]) {opFail(); return;} 341 sprev = s; 342 ip++; s++; 343 } 344 345 private void opExact5() { 346 if (s + 5 > range) {opFail(); return;} 347 if (code[ip] != chars[s]) {opFail(); return;} 348 ip++; s++; 349 if (code[ip] != chars[s]) {opFail(); return;} 350 ip++; s++; 351 if (code[ip] != chars[s]) {opFail(); return;} 352 ip++; s++; 353 if (code[ip] != chars[s]) {opFail(); return;} 354 ip++; s++; 355 if (code[ip] != chars[s]) {opFail(); return;} 356 sprev = s; 357 ip++; s++; 358 } 359 360 private void opExactN() { 361 int tlen = code[ip++]; 362 if (s + tlen > range) {opFail(); return;} 363 364 if (Config.USE_STRING_TEMPLATES) { 365 final char[] bs = regex.templates[code[ip++]]; 366 int ps = code[ip++]; 367 368 while (tlen-- > 0) { 369 if (bs[ps++] != chars[s++]) {opFail(); return;} 370 } 371 372 } else { 373 while (tlen-- > 0) { 374 if (code[ip++] != chars[s++]) {opFail(); return;} 375 } 376 } 377 sprev = s - 1; 378 } 379 380 private void opExact1IC() { 381 if (s >= range || code[ip] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 382 ip++; 383 sprev = sbegin; // break; 384 } 385 386 private void opExactNIC() { 387 int tlen = code[ip++]; 388 if (s + tlen > range) {opFail(); return;} 389 390 if (Config.USE_STRING_TEMPLATES) { 391 final char[] bs = regex.templates[code[ip++]]; 392 int ps = code[ip++]; 393 394 while (tlen-- > 0) { 395 if (bs[ps++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 396 } 397 } else { 398 399 while (tlen-- > 0) { 400 if (code[ip++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 401 } 402 } 403 sprev = s - 1; 404 } 405 406 private boolean isInBitSet() { 407 final int c = chars[s]; 408 return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); 409 } 410 411 private void opCClass() { 412 if (s >= range || !isInBitSet()) {opFail(); return;} 413 ip += BitSet.BITSET_SIZE; 414 s++; 415 sprev = sbegin; // break; 416 } 417 418 private boolean isInClassMB() { 419 final int tlen = code[ip++]; 420 if (s >= range) { 421 return false; 422 } 423 final int ss = s; 424 s++; 425 final int c = chars[ss]; 426 if (!EncodingHelper.isInCodeRange(code, ip, c)) { 427 return false; 428 } 429 ip += tlen; 430 return true; 431 } 432 433 private void opCClassMB() { 434 // beyond string check 435 if (s >= range || chars[s] <= 0xff) {opFail(); return;} 436 if (!isInClassMB()) {opFail(); return;} // not!!! 437 sprev = sbegin; // break; 438 } 439 440 private void opCClassMIX() { 441 if (s >= range) {opFail(); return;} 442 if (chars[s] > 0xff) { 443 ip += BitSet.BITSET_SIZE; 444 if (!isInClassMB()) {opFail(); return;} 445 } else { 446 if (!isInBitSet()) {opFail(); return;} 447 ip += BitSet.BITSET_SIZE; 448 final int tlen = code[ip++]; // by code range length 449 ip += tlen; 450 s++; 451 } 452 sprev = sbegin; // break; 453 } 454 455 private void opCClassNot() { 456 if (s >= range || isInBitSet()) {opFail(); return;} 457 ip += BitSet.BITSET_SIZE; 458 s++; 459 sprev = sbegin; // break; 460 } 461 462 private boolean isNotInClassMB() { 463 final int tlen = code[ip++]; 464 465 if (!(s + 1 <= range)) { 466 if (s >= range) { 467 return false; 468 } 469 s = end; 470 ip += tlen; 471 return true; 472 } 473 474 final int ss = s; 475 s++; 476 final int c = chars[ss]; 477 478 if (EncodingHelper.isInCodeRange(code, ip, c)) { 479 return false; 480 } 481 ip += tlen; 482 return true; 483 } 484 485 private void opCClassMBNot() { 486 if (s >= range) {opFail(); return;} 487 if (chars[s] <= 0xff) { 488 s++; 489 final int tlen = code[ip++]; 490 ip += tlen; 491 sprev = sbegin; // break; 492 return; 493 } 494 if (!isNotInClassMB()) {opFail(); return;} 495 sprev = sbegin; // break; 496 } 497 498 private void opCClassMIXNot() { 499 if (s >= range) {opFail(); return;} 500 if (chars[s] > 0xff) { 501 ip += BitSet.BITSET_SIZE; 502 if (!isNotInClassMB()) {opFail(); return;} 503 } else { 504 if (isInBitSet()) {opFail(); return;} 505 ip += BitSet.BITSET_SIZE; 506 final int tlen = code[ip++]; 507 ip += tlen; 508 s++; 509 } 510 sprev = sbegin; // break; 511 } 512 513 private void opCClassNode() { 514 if (s >= range) {opFail(); return;} 515 final CClassNode cc = (CClassNode)regex.operands[code[ip++]]; 516 final int ss = s; 517 s++; 518 final int c = chars[ss]; 519 if (!cc.isCodeInCCLength(c)) {opFail(); return;} 520 sprev = sbegin; // break; 521 } 522 523 private void opAnyChar() { 524 if (s >= range) {opFail(); return;} 525 if (isNewLine(chars[s])) {opFail(); return;} 526 s++; 527 sprev = sbegin; // break; 528 } 529 530 private void opAnyCharML() { 531 if (s >= range) {opFail(); return;} 532 s++; 533 sprev = sbegin; // break; 534 } 535 536 private void opAnyCharStar() { 537 final char[] ch = this.chars; 538 while (s < range) { 539 pushAlt(ip, s, sprev); 540 if (isNewLine(ch, s, end)) {opFail(); return;} 541 sprev = s; 542 s++; 543 } 544 } 545 546 private void opAnyCharMLStar() { 547 while (s < range) { 548 pushAlt(ip, s, sprev); 549 sprev = s; 550 s++; 551 } 552 } 553 554 private void opAnyCharStarPeekNext() { 555 final char c = (char)code[ip]; 556 final char[] ch = this.chars; 557 558 while (s < range) { 559 final char b = ch[s]; 560 if (c == b) { 561 pushAlt(ip + 1, s, sprev); 562 } 563 if (isNewLine(b)) {opFail(); return;} 564 sprev = s; 565 s++; 566 } 567 ip++; 568 sprev = sbegin; // break; 569 } 570 571 private void opAnyCharMLStarPeekNext() { 572 final char c = (char)code[ip]; 573 final char[] ch = this.chars; 574 575 while (s < range) { 576 if (c == ch[s]) { 577 pushAlt(ip + 1, s, sprev); 578 } 579 sprev = s; 580 s++; 581 } 582 ip++; 583 sprev = sbegin; // break; 584 } 585 586 private void opWord() { 587 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 588 s++; 589 sprev = sbegin; // break; 590 } 591 592 private void opNotWord() { 593 if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;} 594 s++; 595 sprev = sbegin; // break; 596 } 597 598 private void opWordBound() { 599 if (s == str) { 600 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 601 } else if (s == end) { 602 if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 603 } else { 604 if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 605 } 606 } 607 608 private void opNotWordBound() { 609 if (s == str) { 610 if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;} 611 } else if (s == end) { 612 if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 613 } else { 614 if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 615 } 616 } 617 618 private void opWordBegin() { 619 if (s < range && EncodingHelper.isWord(chars[s])) { 620 if (s == str || !EncodingHelper.isWord(chars[sprev])) { 621 return; 622 } 623 } 624 opFail(); 625 } 626 627 private void opWordEnd() { 628 if (s != str && EncodingHelper.isWord(chars[sprev])) { 629 if (s == end || !EncodingHelper.isWord(chars[s])) { 630 return; 631 } 632 } 633 opFail(); 634 } 635 636 private void opBeginBuf() { 637 if (s != str) { 638 opFail(); 639 } 640 } 641 642 private void opEndBuf() { 643 if (s != end) { 644 opFail(); 645 } 646 } 647 648 private void opBeginLine() { 649 if (s == str) { 650 if (isNotBol(msaOptions)) { 651 opFail(); 652 } 653 return; 654 } else if (isNewLine(chars, sprev, end) && s != end) { 655 return; 656 } 657 opFail(); 658 } 659 660 private void opEndLine() { 661 if (s == end) { 662 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 663 if (str == end || !isNewLine(chars, sprev, end)) { 664 if (isNotEol(msaOptions)) { 665 opFail(); 666 } 667 } 668 return; 669 } 670 if (isNotEol(msaOptions)) { 671 opFail(); 672 } 673 return; 674 } else if (isNewLine(chars, s, end)) { 675 return; 676 } 677 opFail(); 678 } 679 680 private void opSemiEndBuf() { 681 if (s == end) { 682 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 683 if (str == end || !isNewLine(chars, sprev, end)) { 684 if (isNotEol(msaOptions)) { 685 opFail(); 686 } 687 } 688 return; 689 } 690 if (isNotEol(msaOptions)) { 691 opFail(); 692 } 693 return; 694 } else if (isNewLine(chars, s, end) && s + 1 == end) { 695 return; 696 } 697 opFail(); 698 } 699 700 private void opBeginPosition() { 701 if (s != msaStart) { 702 opFail(); 703 } 704 } 705 706 private void opMemoryStartPush() { 707 final int mem = code[ip++]; 708 pushMemStart(mem, s); 709 } 710 711 private void opMemoryStart() { 712 final int mem = code[ip++]; 713 repeatStk[memStartStk + mem] = s; 714 } 715 716 private void opMemoryEndPush() { 717 final int mem = code[ip++]; 718 pushMemEnd(mem, s); 719 } 720 721 private void opMemoryEnd() { 722 final int mem = code[ip++]; 723 repeatStk[memEndStk + mem] = s; 724 } 725 726 private void opMemoryEndPushRec() { 727 final int mem = code[ip++]; 728 final int stkp = getMemStart(mem); /* should be before push mem-end. */ 729 pushMemEnd(mem, s); 730 repeatStk[memStartStk + mem] = stkp; 731 } 732 733 private void opMemoryEndRec() { 734 final int mem = code[ip++]; 735 repeatStk[memEndStk + mem] = s; 736 final int stkp = getMemStart(mem); 737 738 if (BitStatus.bsAt(regex.btMemStart, mem)) { 739 repeatStk[memStartStk + mem] = stkp; 740 } else { 741 repeatStk[memStartStk + mem] = stack[stkp].getMemPStr(); 742 } 743 744 pushMemEndMark(mem); 745 } 746 747 private boolean backrefInvalid(final int mem) { 748 return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX; 749 } 750 751 private int backrefStart(final int mem) { 752 return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem]; 753 } 754 755 private int backrefEnd(final int mem) { 756 return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem]; 757 } 758 759 private void backref(final int mem) { 760 /* if you want to remove following line, 761 you should check in parse and compile time. (numMem) */ 762 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 763 764 int pstart = backrefStart(mem); 765 final int pend = backrefEnd(mem); 766 767 int n = pend - pstart; 768 if (s + n > range) {opFail(); return;} 769 sprev = s; 770 771 // STRING_CMP 772 while(n-- > 0) { 773 if (chars[pstart++] != chars[s++]) {opFail(); return;} 774 } 775 776 // beyond string check 777 if (sprev < range) { 778 while (sprev + 1 < s) { 779 sprev++; 780 } 781 } 782 } 783 784 private void opBackRef1() { 785 backref(1); 786 } 787 788 private void opBackRef2() { 789 backref(2); 790 } 791 792 private void opBackRefN() { 793 backref(code[ip++]); 794 } 795 796 private void opBackRefNIC() { 797 final int mem = code[ip++]; 798 /* if you want to remove following line, 799 you should check in parse and compile time. (numMem) */ 800 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 801 802 final int pstart = backrefStart(mem); 803 final int pend = backrefEnd(mem); 804 805 final int n = pend - pstart; 806 if (s + n > range) {opFail(); return;} 807 sprev = s; 808 809 value = s; 810 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;} 811 s = value; 812 813 // if (sprev < chars.length) 814 while (sprev + 1 < s) { 815 sprev++; 816 } 817 } 818 819 private void opBackRefMulti() { 820 final int tlen = code[ip++]; 821 822 int i; 823 loop:for (i=0; i<tlen; i++) { 824 final int mem = code[ip++]; 825 if (backrefInvalid(mem)) { 826 continue; 827 } 828 829 int pstart = backrefStart(mem); 830 final int pend = backrefEnd(mem); 831 832 int n = pend - pstart; 833 if (s + n > range) {opFail(); return;} 834 835 sprev = s; 836 int swork = s; 837 838 while (n-- > 0) { 839 if (chars[pstart++] != chars[swork++]) { 840 continue loop; 841 } 842 } 843 844 s = swork; 845 846 // beyond string check 847 if (sprev < range) { 848 while (sprev + 1 < s) { 849 sprev++; 850 } 851 } 852 853 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 854 break; /* success */ 855 } 856 if (i == tlen) {opFail(); return;} 857 } 858 859 private void opBackRefMultiIC() { 860 final int tlen = code[ip++]; 861 862 int i; 863 loop:for (i=0; i<tlen; i++) { 864 final int mem = code[ip++]; 865 if (backrefInvalid(mem)) { 866 continue; 867 } 868 869 final int pstart = backrefStart(mem); 870 final int pend = backrefEnd(mem); 871 872 final int n = pend - pstart; 873 if (s + n > range) {opFail(); return;} 874 875 sprev = s; 876 877 value = s; 878 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) 879 { 880 continue loop; // STRING_CMP_VALUE_IC 881 } 882 s = value; 883 884 // if (sprev < chars.length) 885 while (sprev + 1 < s) { 886 sprev++; 887 } 888 889 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 890 break; /* success */ 891 } 892 if (i == tlen) {opFail(); return;} 893 } 894 895 private boolean memIsInMemp(final int mem, final int num, final int mempp) { 896 for (int i=0, memp = mempp; i<num; i++) { 897 final int m = code[memp++]; 898 if (mem == m) { 899 return true; 900 } 901 } 902 return false; 903 } 904 905 // USE_BACKREF_AT_LEVEL // (s) and (end) implicit 906 private boolean backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag, 907 final int nest, final int memNum, final int memp) { 908 int pend = -1; 909 int level = 0; 910 int k = stk - 1; 911 912 while (k >= 0) { 913 final StackEntry e = stack[k]; 914 915 if (e.type == CALL_FRAME) { 916 level--; 917 } else if (e.type == RETURN) { 918 level++; 919 } else if (level == nest) { 920 if (e.type == MEM_START) { 921 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 922 final int pstart = e.getMemPStr(); 923 if (pend != -1) { 924 if (pend - pstart > end - s) { 925 return false; /* or goto next_mem; */ 926 } 927 int p = pstart; 928 929 value = s; 930 if (ignoreCase) { 931 if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) { 932 return false; /* or goto next_mem; */ 933 } 934 } else { 935 while (p < pend) { 936 if (chars[p++] != chars[value++]) { 937 return false; /* or goto next_mem; */ 938 } 939 } 940 } 941 s = value; 942 943 return true; 944 } 945 } 946 } else if (e.type == MEM_END) { 947 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 948 pend = e.getMemPStr(); 949 } 950 } 951 } 952 k--; 953 } 954 return false; 955 } 956 957 private void opBackRefAtLevel() { 958 final int ic = code[ip++]; 959 final int level = code[ip++]; 960 final int tlen = code[ip++]; 961 962 sprev = s; 963 if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit 964 while (sprev + 1 < s) { 965 sprev++; 966 } 967 ip += tlen; // * SIZE_MEMNUM 968 } else { 969 {opFail(); return;} 970 } 971 } 972 973 private void opNullCheckStart() { 974 final int mem = code[ip++]; 975 pushNullCheckStart(mem, s); 976 } 977 978 private void nullCheckFound() { 979 // null_check_found: 980 /* empty loop founded, skip next instruction */ 981 switch(code[ip++]) { 982 case OPCode.JUMP: 983 case OPCode.PUSH: 984 ip++; // p += SIZE_RELADDR; 985 break; 986 case OPCode.REPEAT_INC: 987 case OPCode.REPEAT_INC_NG: 988 case OPCode.REPEAT_INC_SG: 989 case OPCode.REPEAT_INC_NG_SG: 990 ip++; // p += SIZE_MEMNUM; 991 break; 992 default: 993 throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE); 994 } // switch 995 } 996 997 private void opNullCheckEnd() { 998 final int mem = code[ip++]; 999 final int isNull = nullCheck(mem, s); /* mem: null check id */ 1000 1001 if (isNull != 0) { 1002 if (Config.DEBUG_MATCH) { 1003 Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s); 1004 } 1005 1006 nullCheckFound(); 1007 } 1008 } 1009 1010 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 1011 private void opNullCheckEndMemST() { 1012 final int mem = code[ip++]; /* mem: null check id */ 1013 final int isNull = nullCheckMemSt(mem, s); 1014 1015 if (isNull != 0) { 1016 if (Config.DEBUG_MATCH) { 1017 Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s); 1018 } 1019 1020 if (isNull == -1) {opFail(); return;} 1021 nullCheckFound(); 1022 } 1023 } 1024 1025 private void opJump() { 1026 ip += code[ip] + 1; 1027 } 1028 1029 private void opPush() { 1030 final int addr = code[ip++]; 1031 pushAlt(ip + addr, s, sprev); 1032 } 1033 1034 private void opPop() { 1035 popOne(); 1036 } 1037 1038 private void opPushOrJumpExact1() { 1039 final int addr = code[ip++]; 1040 // beyond string check 1041 if (s < range && code[ip] == chars[s]) { 1042 ip++; 1043 pushAlt(ip + addr, s, sprev); 1044 return; 1045 } 1046 ip += addr + 1; 1047 } 1048 1049 private void opPushIfPeekNext() { 1050 final int addr = code[ip++]; 1051 // beyond string check 1052 if (s < range && code[ip] == chars[s]) { 1053 ip++; 1054 pushAlt(ip + addr, s, sprev); 1055 return; 1056 } 1057 ip++; 1058 } 1059 1060 private void opRepeat() { 1061 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1062 final int addr= code[ip++]; 1063 1064 // ensure1(); 1065 repeatStk[mem] = stk; 1066 pushRepeat(mem, ip); 1067 1068 if (regex.repeatRangeLo[mem] == 0) { // lower 1069 pushAlt(ip + addr, s, sprev); 1070 } 1071 } 1072 1073 private void opRepeatNG() { 1074 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1075 final int addr= code[ip++]; 1076 1077 // ensure1(); 1078 repeatStk[mem] = stk; 1079 pushRepeat(mem, ip); 1080 1081 if (regex.repeatRangeLo[mem] == 0) { 1082 pushAlt(ip, s, sprev); 1083 ip += addr; 1084 } 1085 } 1086 1087 private void repeatInc(final int mem, final int si) { 1088 final StackEntry e = stack[si]; 1089 1090 e.increaseRepeatCount(); 1091 1092 if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) { 1093 /* end of repeat. Nothing to do. */ 1094 } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1095 pushAlt(ip, s, sprev); 1096 ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */ 1097 } else { 1098 ip = e.getRepeatPCode(); 1099 } 1100 pushRepeatInc(si); 1101 } 1102 1103 private void opRepeatInc() { 1104 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1105 final int si = repeatStk[mem]; 1106 repeatInc(mem, si); 1107 } 1108 1109 private void opRepeatIncSG() { 1110 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1111 final int si = getRepeat(mem); 1112 repeatInc(mem, si); 1113 } 1114 1115 private void repeatIncNG(final int mem, final int si) { 1116 final StackEntry e = stack[si]; 1117 1118 e.increaseRepeatCount(); 1119 1120 if (e.getRepeatCount() < regex.repeatRangeHi[mem]) { 1121 if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1122 final int pcode = e.getRepeatPCode(); 1123 pushRepeatInc(si); 1124 pushAlt(pcode, s, sprev); 1125 } else { 1126 ip = e.getRepeatPCode(); 1127 pushRepeatInc(si); 1128 } 1129 } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) { 1130 pushRepeatInc(si); 1131 } 1132 } 1133 1134 private void opRepeatIncNG() { 1135 final int mem = code[ip++]; 1136 final int si = repeatStk[mem]; 1137 repeatIncNG(mem, si); 1138 } 1139 1140 private void opRepeatIncNGSG() { 1141 final int mem = code[ip++]; 1142 final int si = getRepeat(mem); 1143 repeatIncNG(mem, si); 1144 } 1145 1146 private void opPushPos() { 1147 pushPos(s, sprev); 1148 } 1149 1150 private void opPopPos() { 1151 final StackEntry e = stack[posEnd()]; 1152 s = e.getStatePStr(); 1153 sprev= e.getStatePStrPrev(); 1154 } 1155 1156 private void opPushPosNot() { 1157 final int addr = code[ip++]; 1158 pushPosNot(ip + addr, s, sprev); 1159 } 1160 1161 private void opFailPos() { 1162 popTilPosNot(); 1163 opFail(); 1164 } 1165 1166 private void opPushStopBT() { 1167 pushStopBT(); 1168 } 1169 1170 private void opPopStopBT() { 1171 stopBtEnd(); 1172 } 1173 1174 private void opLookBehind() { 1175 final int tlen = code[ip++]; 1176 s = EncodingHelper.stepBack(str, s, tlen); 1177 if (s == -1) {opFail(); return;} 1178 sprev = EncodingHelper.prevCharHead(str, s); 1179 } 1180 1181 private void opPushLookBehindNot() { 1182 final int addr = code[ip++]; 1183 final int tlen = code[ip++]; 1184 final int q = EncodingHelper.stepBack(str, s, tlen); 1185 if (q == -1) { 1186 /* too short case -> success. ex. /(?<!XXX)a/.match("a") 1187 If you want to change to fail, replace following line. */ 1188 ip += addr; 1189 // return FAIL; 1190 } else { 1191 pushLookBehindNot(ip + addr, s, sprev); 1192 s = q; 1193 sprev = EncodingHelper.prevCharHead(str, s); 1194 } 1195 } 1196 1197 private void opFailLookBehindNot() { 1198 popTilLookBehindNot(); 1199 opFail(); 1200 } 1201 1202 private void opFail() { 1203 if (stack == null) { 1204 ip = regex.codeLength - 1; 1205 return; 1206 } 1207 1208 1209 final StackEntry e = pop(); 1210 ip = e.getStatePCode(); 1211 s = e.getStatePStr(); 1212 sprev = e.getStatePStrPrev(); 1213 } 1214 1215 private int finish() { 1216 return bestLen; 1217 } 1218 }