1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.runtime.regexp.joni; 21 22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 23 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; 24 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; 25 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty; 26 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol; 27 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol; 28 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion; 29 import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine; 30 31 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 32 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; 33 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize; 34 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 35 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 36 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 37 38 class ByteCodeMachine extends StackMachine { 39 private int bestLen; // return value 40 private int s = 0; // current char 41 42 private int range; // right range 43 private int sprev; 44 private int sstart; 45 private int sbegin; 46 47 private final int[] code; // byte code 48 private int ip; // instruction pointer 49 50 ByteCodeMachine(Regex regex, char[] chars, int p, int end) { 51 super(regex, chars, p, end); 52 this.code = regex.code; 53 } 54 55 private boolean stringCmpIC(int caseFlodFlag, int s1, IntHolder ps2, int mbLen, int textEnd) { 56 57 int s2 = ps2.value; 58 int end1 = s1 + mbLen; 59 60 while (s1 < end1) { 61 char c1 = Character.toLowerCase(chars[s1++]); 62 char c2 = Character.toLowerCase(chars[s2++]); 63 64 if (c1 != c2) { 65 return false; 66 } 67 } 68 ps2.value = s2; 69 return true; 70 } 71 72 private void debugMatchBegin() { 73 Config.log.println("match_at: " + 74 "str: " + str + 75 ", end: " + end + 76 ", start: " + this.sstart + 77 ", sprev: " + this.sprev); 78 Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str)); 79 } 80 81 private void debugMatchLoop() { 82 if (Config.DEBUG_MATCH) { 83 Config.log.printf("%4d", (s - str)).print("> \""); 84 int q, i; 85 for (i=0, q=s; i<7 && q<end && s>=0; i++) { 86 if (q < end) Config.log.print(new String(new char[]{chars[q++]})); 87 } 88 String str = q < end ? "...\"" : "\""; 89 q += str.length(); 90 Config.log.print(str); 91 for (i=0; i<20-(q-s);i++) Config.log.print(" "); 92 StringBuilder sb = new StringBuilder(); 93 new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip); 94 Config.log.println(sb.toString()); 95 } 96 } 97 98 @Override 99 protected final int matchAt(int range, int sstart, int sprev) { 100 this.range = range; 101 this.sstart = sstart; 102 this.sprev = sprev; 103 104 stk = 0; 105 ip = 0; 106 107 if (Config.DEBUG_MATCH) debugMatchBegin(); 108 109 init(); 110 111 bestLen = -1; 112 s = sstart; 113 114 final int[]code = this.code; 115 while (true) { 116 if (Config.DEBUG_MATCH) debugMatchLoop(); 117 118 sbegin = s; 119 switch (code[ip++]) { 120 case OPCode.END: if (opEnd()) return finish(); break; 121 case OPCode.EXACT1: opExact1(); break; 122 case OPCode.EXACT2: opExact2(); continue; 123 case OPCode.EXACT3: opExact3(); continue; 124 case OPCode.EXACT4: opExact4(); continue; 125 case OPCode.EXACT5: opExact5(); continue; 126 case OPCode.EXACTN: opExactN(); continue; 127 128 case OPCode.EXACT1_IC: opExact1IC(); break; 129 case OPCode.EXACTN_IC: opExactNIC(); continue; 130 131 case OPCode.CCLASS: opCClass(); break; 132 case OPCode.CCLASS_MB: opCClassMB(); break; 133 case OPCode.CCLASS_MIX: opCClassMIX(); break; 134 case OPCode.CCLASS_NOT: opCClassNot(); break; 135 case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break; 136 case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break; 137 case OPCode.CCLASS_NODE: opCClassNode(); break; 138 139 case OPCode.ANYCHAR: opAnyChar(); break; 140 case OPCode.ANYCHAR_ML: opAnyCharML(); break; 141 case OPCode.ANYCHAR_STAR: opAnyCharStar(); break; 142 case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break; 143 case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break; 144 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break; 145 146 case OPCode.WORD: opWord(); break; 147 case OPCode.NOT_WORD: opNotWord(); break; 148 case OPCode.WORD_BOUND: opWordBound(); continue; 149 case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue; 150 case OPCode.WORD_BEGIN: opWordBegin(); continue; 151 case OPCode.WORD_END: opWordEnd(); continue; 152 153 case OPCode.BEGIN_BUF: opBeginBuf(); continue; 154 case OPCode.END_BUF: opEndBuf(); continue; 155 case OPCode.BEGIN_LINE: opBeginLine(); continue; 156 case OPCode.END_LINE: opEndLine(); continue; 157 case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; 158 case OPCode.BEGIN_POSITION: opBeginPosition(); continue; 159 160 case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; 161 case OPCode.MEMORY_START: opMemoryStart(); continue; 162 case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; 163 case OPCode.MEMORY_END: opMemoryEnd(); continue; 164 case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; 165 case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; 166 167 case OPCode.BACKREF1: opBackRef1(); continue; 168 case OPCode.BACKREF2: opBackRef2(); continue; 169 case OPCode.BACKREFN: opBackRefN(); continue; 170 case OPCode.BACKREFN_IC: opBackRefNIC(); continue; 171 case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; 172 case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; 173 case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; 174 175 case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; 176 case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; 177 case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; 178 case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue; 179 180 case OPCode.JUMP: opJump(); continue; 181 case OPCode.PUSH: opPush(); continue; 182 183 case OPCode.POP: opPop(); continue; 184 case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; 185 case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; 186 187 case OPCode.REPEAT: opRepeat(); continue; 188 case OPCode.REPEAT_NG: opRepeatNG(); continue; 189 case OPCode.REPEAT_INC: opRepeatInc(); continue; 190 case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; 191 case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; 192 case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; 193 194 case OPCode.PUSH_POS: opPushPos(); continue; 195 case OPCode.POP_POS: opPopPos(); continue; 196 case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; 197 case OPCode.FAIL_POS: opFailPos(); continue; 198 case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; 199 case OPCode.POP_STOP_BT: opPopStopBT(); continue; 200 201 case OPCode.LOOK_BEHIND: opLookBehind(); continue; 202 case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; 203 case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; 204 205 case OPCode.FINISH: 206 return finish(); 207 208 case OPCode.FAIL: opFail(); continue; 209 210 default: 211 throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE); 212 213 } // main switch 214 } // main while 215 } 216 217 private boolean opEnd() { 218 int n = s - sstart; 219 220 if (n > bestLen) { 221 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 222 if (isFindLongest(regex.options)) { 223 if (n > msaBestLen) { 224 msaBestLen = n; 225 msaBestS = sstart; 226 } else { 227 // goto end_best_len; 228 return endBestLength(); 229 } 230 } 231 } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 232 233 bestLen = n; 234 final Region region = msaRegion; 235 if (region != null) { 236 // USE_POSIX_REGION_OPTION ... else ... 237 region.beg[0] = msaBegin = sstart - str; 238 region.end[0] = msaEnd = s - str; 239 for (int i = 1; i <= regex.numMem; i++) { 240 // opt! 241 if (repeatStk[memEndStk + i] != INVALID_INDEX) { 242 region.beg[i] = bsAt(regex.btMemStart, i) ? 243 stack[repeatStk[memStartStk + i]].getMemPStr() - str : 244 repeatStk[memStartStk + i] - str; 245 246 247 region.end[i] = bsAt(regex.btMemEnd, i) ? 248 stack[repeatStk[memEndStk + i]].getMemPStr() : 249 repeatStk[memEndStk + i] - str; 250 251 } else { 252 region.beg[i] = region.end[i] = Region.REGION_NOTPOS; 253 } 254 255 } 256 257 } else { 258 msaBegin = sstart - str; 259 msaEnd = s - str; 260 } 261 } else { 262 Region region = msaRegion; 263 if (Config.USE_POSIX_API_REGION_OPTION) { 264 if (!isPosixRegion(regex.options)) { 265 if (region != null) { 266 region.clear(); 267 } else { 268 msaBegin = msaEnd = 0; 269 } 270 } 271 } else { 272 if (region != null) { 273 region.clear(); 274 } else { 275 msaBegin = msaEnd = 0; 276 } 277 } // USE_POSIX_REGION_OPTION 278 } 279 // end_best_len: 280 /* default behavior: return first-matching result. */ 281 return endBestLength(); 282 } 283 284 private boolean endBestLength() { 285 if (isFindCondition(regex.options)) { 286 if (isFindNotEmpty(regex.options) && s == sstart) { 287 bestLen = -1; 288 {opFail(); return false;} /* for retry */ 289 } 290 if (isFindLongest(regex.options) && s < range) { 291 {opFail(); return false;} /* for retry */ 292 } 293 } 294 // goto finish; 295 return true; 296 } 297 298 private void opExact1() { 299 if (s >= range || code[ip] != chars[s++]) {opFail(); return;} 300 //if (s > range) {opFail(); return;} 301 ip++; 302 sprev = sbegin; // break; 303 } 304 305 private void opExact2() { 306 if (s + 2 > range) {opFail(); return;} 307 if (code[ip] != chars[s]) {opFail(); return;} 308 ip++; s++; 309 if (code[ip] != chars[s]) {opFail(); return;} 310 sprev = s; 311 ip++; s++; 312 } 313 314 private void opExact3() { 315 if (s + 3 > range) {opFail(); return;} 316 if (code[ip] != chars[s]) {opFail(); return;} 317 ip++; s++; 318 if (code[ip] != chars[s]) {opFail(); return;} 319 ip++; s++; 320 if (code[ip] != chars[s]) {opFail(); return;} 321 sprev = s; 322 ip++; s++; 323 } 324 325 private void opExact4() { 326 if (s + 4 > range) {opFail(); return;} 327 if (code[ip] != chars[s]) {opFail(); return;} 328 ip++; s++; 329 if (code[ip] != chars[s]) {opFail(); return;} 330 ip++; s++; 331 if (code[ip] != chars[s]) {opFail(); return;} 332 ip++; s++; 333 if (code[ip] != chars[s]) {opFail(); return;} 334 sprev = s; 335 ip++; s++; 336 } 337 338 private void opExact5() { 339 if (s + 5 > range) {opFail(); return;} 340 if (code[ip] != chars[s]) {opFail(); return;} 341 ip++; s++; 342 if (code[ip] != chars[s]) {opFail(); return;} 343 ip++; s++; 344 if (code[ip] != chars[s]) {opFail(); return;} 345 ip++; s++; 346 if (code[ip] != chars[s]) {opFail(); return;} 347 ip++; s++; 348 if (code[ip] != chars[s]) {opFail(); return;} 349 sprev = s; 350 ip++; s++; 351 } 352 353 private void opExactN() { 354 int tlen = code[ip++]; 355 if (s + tlen > range) {opFail(); return;} 356 357 if (Config.USE_STRING_TEMPLATES) { 358 char[] bs = regex.templates[code[ip++]]; 359 int ps = code[ip++]; 360 361 while (tlen-- > 0) if (bs[ps++] != chars[s++]) {opFail(); return;} 362 363 } else { 364 while (tlen-- > 0) if (code[ip++] != chars[s++]) {opFail(); return;} 365 } 366 sprev = s - 1; 367 } 368 369 private void opExact1IC() { 370 if (s >= range || code[ip] != Character.toLowerCase(chars[s++])) {opFail(); return;} 371 ip++; 372 sprev = sbegin; // break; 373 } 374 375 private void opExactNIC() { 376 int tlen = code[ip++]; 377 if (s + tlen > range) {opFail(); return;} 378 379 if (Config.USE_STRING_TEMPLATES) { 380 char[] bs = regex.templates[code[ip++]]; 381 int ps = code[ip++]; 382 383 while (tlen-- > 0) if (bs[ps++] != Character.toLowerCase(chars[s++])) {opFail(); return;} 384 } else { 385 386 while (tlen-- > 0) if (code[ip++] != Character.toLowerCase(chars[s++])) {opFail(); return;} 387 } 388 sprev = s - 1; 389 } 390 391 private boolean isInBitSet() { 392 int c = chars[s]; 393 return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); 394 } 395 396 private void opCClass() { 397 if (s >= range || !isInBitSet()) {opFail(); return;} 398 ip += BitSet.BITSET_SIZE; 399 s++; 400 sprev = sbegin; // break; 401 } 402 403 private boolean isInClassMB() { 404 int tlen = code[ip++]; 405 if (s >= range) return false; 406 int ss = s; 407 s++; 408 int c = chars[ss]; 409 if (!EncodingHelper.isInCodeRange(code, ip, c)) return false; 410 ip += tlen; 411 return true; 412 } 413 414 private void opCClassMB() { 415 // beyond string check 416 if (s >= range || chars[s] <= 0xff) {opFail(); return;} 417 if (!isInClassMB()) {opFail(); return;} // not!!! 418 sprev = sbegin; // break; 419 } 420 421 private void opCClassMIX() { 422 if (s >= range) {opFail(); return;} 423 if (chars[s] > 0xff) { 424 ip += BitSet.BITSET_SIZE; 425 if (!isInClassMB()) {opFail(); return;} 426 } else { 427 if (!isInBitSet()) {opFail(); return;} 428 ip += BitSet.BITSET_SIZE; 429 int tlen = code[ip++]; // by code range length 430 ip += tlen; 431 s++; 432 } 433 sprev = sbegin; // break; 434 } 435 436 private void opCClassNot() { 437 if (s >= range || isInBitSet()) {opFail(); return;} 438 ip += BitSet.BITSET_SIZE; 439 s++; 440 sprev = sbegin; // break; 441 } 442 443 private boolean isNotInClassMB() { 444 int tlen = code[ip++]; 445 446 if (!(s + 1 <= range)) { 447 if (s >= range) return false; 448 s = end; 449 ip += tlen; 450 return true; 451 } 452 453 int ss = s; 454 s++; 455 int c = chars[ss]; 456 457 if (EncodingHelper.isInCodeRange(code, ip, c)) return false; 458 ip += tlen; 459 return true; 460 } 461 462 private void opCClassMBNot() { 463 if (s >= range) {opFail(); return;} 464 if (chars[s] <= 0xff) { 465 s++; 466 int tlen = code[ip++]; 467 ip += tlen; 468 sprev = sbegin; // break; 469 return; 470 } 471 if (!isNotInClassMB()) {opFail(); return;} 472 sprev = sbegin; // break; 473 } 474 475 private void opCClassMIXNot() { 476 if (s >= range) {opFail(); return;} 477 if (chars[s] > 0xff) { 478 ip += BitSet.BITSET_SIZE; 479 if (!isNotInClassMB()) {opFail(); return;} 480 } else { 481 if (isInBitSet()) {opFail(); return;} 482 ip += BitSet.BITSET_SIZE; 483 int tlen = code[ip++]; 484 ip += tlen; 485 s++; 486 } 487 sprev = sbegin; // break; 488 } 489 490 private void opCClassNode() { 491 if (s >= range) {opFail(); return;} 492 CClassNode cc = (CClassNode)regex.operands[code[ip++]]; 493 int ss = s; 494 s++; 495 int c = chars[ss]; 496 if (!cc.isCodeInCCLength(c)) {opFail(); return;} 497 sprev = sbegin; // break; 498 } 499 500 private void opAnyChar() { 501 if (s >= range) {opFail(); return;} 502 if (isNewLine(chars[s])) {opFail(); return;} 503 s++; 504 sprev = sbegin; // break; 505 } 506 507 private void opAnyCharML() { 508 if (s >= range) {opFail(); return;} 509 s++; 510 sprev = sbegin; // break; 511 } 512 513 private void opAnyCharStar() { 514 final char[] chars = this.chars; 515 while (s < range) { 516 pushAlt(ip, s, sprev); 517 if (isNewLine(chars, s, end)) {opFail(); return;} 518 sprev = s; 519 s++; 520 } 521 sprev = sbegin; // break; 522 } 523 524 private void opAnyCharMLStar() { 525 while (s < range) { 526 pushAlt(ip, s, sprev); 527 sprev = s; 528 s++; 529 } 530 sprev = sbegin; // break; 531 } 532 533 private void opAnyCharStarPeekNext() { 534 final char c = (char)code[ip]; 535 final char[] chars = this.chars; 536 537 while (s < range) { 538 char b = chars[s]; 539 if (c == b) pushAlt(ip + 1, s, sprev); 540 if (isNewLine(b)) {opFail(); return;} 541 sprev = s; 542 s++; 543 } 544 ip++; 545 sprev = sbegin; // break; 546 } 547 548 private void opAnyCharMLStarPeekNext() { 549 final char c = (char)code[ip]; 550 final char[] chars = this.chars; 551 552 while (s < range) { 553 if (c == chars[s]) pushAlt(ip + 1, s, sprev); 554 sprev = s; 555 s++; 556 } 557 ip++; 558 sprev = sbegin; // break; 559 } 560 561 private void opWord() { 562 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 563 s++; 564 sprev = sbegin; // break; 565 } 566 567 private void opNotWord() { 568 if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;} 569 s++; 570 sprev = sbegin; // break; 571 } 572 573 private void opWordBound() { 574 if (s == str) { 575 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 576 } else if (s == end) { 577 if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 578 } else { 579 if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 580 } 581 } 582 583 private void opNotWordBound() { 584 if (s == str) { 585 if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;} 586 } else if (s == end) { 587 if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 588 } else { 589 if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 590 } 591 } 592 593 private void opWordBegin() { 594 if (s < range && EncodingHelper.isWord(chars[s])) { 595 if (s == str || !EncodingHelper.isWord(chars[sprev])) return; 596 } 597 opFail(); 598 } 599 600 private void opWordEnd() { 601 if (s != str && EncodingHelper.isWord(chars[sprev])) { 602 if (s == end || !EncodingHelper.isWord(chars[s])) return; 603 } 604 opFail(); 605 } 606 607 private void opBeginBuf() { 608 if (s != str) opFail(); 609 } 610 611 private void opEndBuf() { 612 if (s != end) opFail(); 613 } 614 615 private void opBeginLine() { 616 if (s == str) { 617 if (isNotBol(msaOptions)) opFail(); 618 return; 619 } else if (isNewLine(chars, sprev, end) && s != end) { 620 return; 621 } 622 opFail(); 623 } 624 625 private void opEndLine() { 626 if (s == end) { 627 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 628 if (str == end || !isNewLine(chars, sprev, end)) { 629 if (isNotEol(msaOptions)) opFail(); 630 } 631 return; 632 } else { 633 if (isNotEol(msaOptions)) opFail(); 634 return; 635 } 636 } else if (isNewLine(chars, s, end)) { 637 return; 638 } 639 opFail(); 640 } 641 642 private void opSemiEndBuf() { 643 if (s == end) { 644 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 645 if (str == end || !isNewLine(chars, sprev, end)) { 646 if (isNotEol(msaOptions)) opFail(); 647 } 648 return; 649 } else { 650 if (isNotEol(msaOptions)) opFail(); 651 return; 652 } 653 } else if (isNewLine(chars, s, end) && s + 1 == end) { 654 return; 655 } 656 opFail(); 657 } 658 659 private void opBeginPosition() { 660 if (s != msaStart) opFail(); 661 } 662 663 private void opMemoryStartPush() { 664 int mem = code[ip++]; 665 pushMemStart(mem, s); 666 } 667 668 private void opMemoryStart() { 669 int mem = code[ip++]; 670 repeatStk[memStartStk + mem] = s; 671 } 672 673 private void opMemoryEndPush() { 674 int mem = code[ip++]; 675 pushMemEnd(mem, s); 676 } 677 678 private void opMemoryEnd() { 679 int mem = code[ip++]; 680 repeatStk[memEndStk + mem] = s; 681 } 682 683 private void opMemoryEndPushRec() { 684 int mem = code[ip++]; 685 int stkp = getMemStart(mem); /* should be before push mem-end. */ 686 pushMemEnd(mem, s); 687 repeatStk[memStartStk + mem] = stkp; 688 } 689 690 private void opMemoryEndRec() { 691 int mem = code[ip++]; 692 repeatStk[memEndStk + mem] = s; 693 int stkp = getMemStart(mem); 694 695 if (BitStatus.bsAt(regex.btMemStart, mem)) { 696 repeatStk[memStartStk + mem] = stkp; 697 } else { 698 repeatStk[memStartStk + mem] = stack[stkp].getMemPStr(); 699 } 700 701 pushMemEndMark(mem); 702 } 703 704 private boolean backrefInvalid(int mem) { 705 return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX; 706 } 707 708 private int backrefStart(int mem) { 709 return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem]; 710 } 711 712 private int backrefEnd(int mem) { 713 return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem]; 714 } 715 716 private void backref(int mem) { 717 /* if you want to remove following line, 718 you should check in parse and compile time. (numMem) */ 719 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 720 721 int pstart = backrefStart(mem); 722 int pend = backrefEnd(mem); 723 724 int n = pend - pstart; 725 if (s + n > range) {opFail(); return;} 726 sprev = s; 727 728 // STRING_CMP 729 while(n-- > 0) if (chars[pstart++] != chars[s++]) {opFail(); return;} 730 731 // beyond string check 732 if (sprev < range) { 733 while (sprev + 1 < s) sprev++; 734 } 735 } 736 737 private void opBackRef1() { 738 backref(1); 739 } 740 741 private void opBackRef2() { 742 backref(2); 743 } 744 745 private void opBackRefN() { 746 backref(code[ip++]); 747 } 748 749 private void opBackRefNIC() { 750 int mem = code[ip++]; 751 /* if you want to remove following line, 752 you should check in parse and compile time. (numMem) */ 753 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 754 755 int pstart = backrefStart(mem); 756 int pend = backrefEnd(mem); 757 758 int n = pend - pstart; 759 if (s + n > range) {opFail(); return;} 760 sprev = s; 761 762 value = s; 763 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;} 764 s = value; 765 766 // if (sprev < chars.length) 767 while (sprev + 1 < s) sprev++; 768 } 769 770 private void opBackRefMulti() { 771 int tlen = code[ip++]; 772 773 int i; 774 loop:for (i=0; i<tlen; i++) { 775 int mem = code[ip++]; 776 if (backrefInvalid(mem)) continue; 777 778 int pstart = backrefStart(mem); 779 int pend = backrefEnd(mem); 780 781 int n = pend - pstart; 782 if (s + n > range) {opFail(); return;} 783 784 sprev = s; 785 int swork = s; 786 787 while (n-- > 0) { 788 if (chars[pstart++] != chars[swork++]) continue loop; 789 } 790 791 s = swork; 792 793 // beyond string check 794 if (sprev < range) { 795 while (sprev + 1 < s) sprev++; 796 } 797 798 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 799 break; /* success */ 800 } 801 if (i == tlen) {opFail(); return;} 802 } 803 804 private void opBackRefMultiIC() { 805 int tlen = code[ip++]; 806 807 int i; 808 loop:for (i=0; i<tlen; i++) { 809 int mem = code[ip++]; 810 if (backrefInvalid(mem)) continue; 811 812 int pstart = backrefStart(mem); 813 int pend = backrefEnd(mem); 814 815 int n = pend - pstart; 816 if (s + n > range) {opFail(); return;} 817 818 sprev = s; 819 820 value = s; 821 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) continue loop; // STRING_CMP_VALUE_IC 822 s = value; 823 824 // if (sprev < chars.length) 825 while (sprev + 1 < s) sprev++; 826 827 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 828 break; /* success */ 829 } 830 if (i == tlen) {opFail(); return;} 831 } 832 833 private boolean memIsInMemp(int mem, int num, int memp) { 834 for (int i=0; i<num; i++) { 835 int m = code[memp++]; 836 if (mem == m) return true; 837 } 838 return false; 839 } 840 841 // USE_BACKREF_AT_LEVEL // (s) and (end) implicit 842 private boolean backrefMatchAtNestedLevel(boolean ignoreCase, int caseFoldFlag, 843 int nest, int memNum, int memp) { 844 int pend = -1; 845 int level = 0; 846 int k = stk - 1; 847 848 while (k >= 0) { 849 StackEntry e = stack[k]; 850 851 if (e.type == CALL_FRAME) { 852 level--; 853 } else if (e.type == RETURN) { 854 level++; 855 } else if (level == nest) { 856 if (e.type == MEM_START) { 857 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 858 int pstart = e.getMemPStr(); 859 if (pend != -1) { 860 if (pend - pstart > end - s) return false; /* or goto next_mem; */ 861 int p = pstart; 862 863 value = s; 864 if (ignoreCase) { 865 if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) { 866 return false; /* or goto next_mem; */ 867 } 868 } else { 869 while (p < pend) { 870 if (chars[p++] != chars[value++]) return false; /* or goto next_mem; */ 871 } 872 } 873 s = value; 874 875 return true; 876 } 877 } 878 } else if (e.type == MEM_END) { 879 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 880 pend = e.getMemPStr(); 881 } 882 } 883 } 884 k--; 885 } 886 return false; 887 } 888 889 private void opBackRefAtLevel() { 890 int ic = code[ip++]; 891 int level = code[ip++]; 892 int tlen = code[ip++]; 893 894 sprev = s; 895 if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit 896 while (sprev + 1 < s) sprev++; 897 ip += tlen; // * SIZE_MEMNUM 898 } else { 899 {opFail(); return;} 900 } 901 } 902 903 /* no need: IS_DYNAMIC_OPTION() == 0 */ 904 private void opSetOptionPush() { 905 // option = code[ip++]; // final for now 906 pushAlt(ip, s, sprev); 907 ip += OPSize.SET_OPTION + OPSize.FAIL; 908 } 909 910 private void opSetOption() { 911 // option = code[ip++]; // final for now 912 } 913 914 private void opNullCheckStart() { 915 int mem = code[ip++]; 916 pushNullCheckStart(mem, s); 917 } 918 919 private void nullCheckFound() { 920 // null_check_found: 921 /* empty loop founded, skip next instruction */ 922 switch(code[ip++]) { 923 case OPCode.JUMP: 924 case OPCode.PUSH: 925 ip++; // p += SIZE_RELADDR; 926 break; 927 case OPCode.REPEAT_INC: 928 case OPCode.REPEAT_INC_NG: 929 case OPCode.REPEAT_INC_SG: 930 case OPCode.REPEAT_INC_NG_SG: 931 ip++; // p += SIZE_MEMNUM; 932 break; 933 default: 934 throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE); 935 } // switch 936 } 937 938 private void opNullCheckEnd() { 939 int mem = code[ip++]; 940 int isNull = nullCheck(mem, s); /* mem: null check id */ 941 942 if (isNull != 0) { 943 if (Config.DEBUG_MATCH) { 944 Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s); 945 } 946 947 nullCheckFound(); 948 } 949 } 950 951 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 952 private void opNullCheckEndMemST() { 953 int mem = code[ip++]; /* mem: null check id */ 954 int isNull = nullCheckMemSt(mem, s); 955 956 if (isNull != 0) { 957 if (Config.DEBUG_MATCH) { 958 Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s); 959 } 960 961 if (isNull == -1) {opFail(); return;} 962 nullCheckFound(); 963 } 964 } 965 966 // USE_SUBEXP_CALL 967 private void opNullCheckEndMemSTPush() { 968 int mem = code[ip++]; /* mem: null check id */ 969 970 int isNull; 971 if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { 972 isNull = nullCheckMemStRec(mem, s); 973 } else { 974 isNull = nullCheckRec(mem, s); 975 } 976 977 if (isNull != 0) { 978 if (Config.DEBUG_MATCH) { 979 Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s); 980 } 981 982 if (isNull == -1) {opFail(); return;} 983 nullCheckFound(); 984 } else { 985 pushNullCheckEnd(mem); 986 } 987 } 988 989 private void opJump() { 990 ip += code[ip] + 1; 991 } 992 993 private void opPush() { 994 int addr = code[ip++]; 995 pushAlt(ip + addr, s, sprev); 996 } 997 998 private void opPop() { 999 popOne(); 1000 } 1001 1002 private void opPushOrJumpExact1() { 1003 int addr = code[ip++]; 1004 // beyond string check 1005 if (s < range && code[ip] == chars[s]) { 1006 ip++; 1007 pushAlt(ip + addr, s, sprev); 1008 return; 1009 } 1010 ip += addr + 1; 1011 } 1012 1013 private void opPushIfPeekNext() { 1014 int addr = code[ip++]; 1015 // beyond string check 1016 if (s < range && code[ip] == chars[s]) { 1017 ip++; 1018 pushAlt(ip + addr, s, sprev); 1019 return; 1020 } 1021 ip++; 1022 } 1023 1024 private void opRepeat() { 1025 int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1026 int addr= code[ip++]; 1027 1028 // ensure1(); 1029 repeatStk[mem] = stk; 1030 pushRepeat(mem, ip); 1031 1032 if (regex.repeatRangeLo[mem] == 0) { // lower 1033 pushAlt(ip + addr, s, sprev); 1034 } 1035 } 1036 1037 private void opRepeatNG() { 1038 int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1039 int addr= code[ip++]; 1040 1041 // ensure1(); 1042 repeatStk[mem] = stk; 1043 pushRepeat(mem, ip); 1044 1045 if (regex.repeatRangeLo[mem] == 0) { 1046 pushAlt(ip, s, sprev); 1047 ip += addr; 1048 } 1049 } 1050 1051 private void repeatInc(int mem, int si) { 1052 StackEntry e = stack[si]; 1053 1054 e.increaseRepeatCount(); 1055 1056 if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) { 1057 /* end of repeat. Nothing to do. */ 1058 } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1059 pushAlt(ip, s, sprev); 1060 ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */ 1061 } else { 1062 ip = e.getRepeatPCode(); 1063 } 1064 pushRepeatInc(si); 1065 } 1066 1067 private void opRepeatInc() { 1068 int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1069 int si = repeatStk[mem]; 1070 repeatInc(mem, si); 1071 } 1072 1073 private void opRepeatIncSG() { 1074 int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1075 int si = getRepeat(mem); 1076 repeatInc(mem, si); 1077 } 1078 1079 private void repeatIncNG(int mem, int si) { 1080 StackEntry e = stack[si]; 1081 1082 e.increaseRepeatCount(); 1083 1084 if (e.getRepeatCount() < regex.repeatRangeHi[mem]) { 1085 if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1086 int pcode = e.getRepeatPCode(); 1087 pushRepeatInc(si); 1088 pushAlt(pcode, s, sprev); 1089 } else { 1090 ip = e.getRepeatPCode(); 1091 pushRepeatInc(si); 1092 } 1093 } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) { 1094 pushRepeatInc(si); 1095 } 1096 } 1097 1098 private void opRepeatIncNG() { 1099 int mem = code[ip++]; 1100 int si = repeatStk[mem]; 1101 repeatIncNG(mem, si); 1102 } 1103 1104 private void opRepeatIncNGSG() { 1105 int mem = code[ip++]; 1106 int si = getRepeat(mem); 1107 repeatIncNG(mem, si); 1108 } 1109 1110 private void opPushPos() { 1111 pushPos(s, sprev); 1112 } 1113 1114 private void opPopPos() { 1115 StackEntry e = stack[posEnd()]; 1116 s = e.getStatePStr(); 1117 sprev= e.getStatePStrPrev(); 1118 } 1119 1120 private void opPushPosNot() { 1121 int addr = code[ip++]; 1122 pushPosNot(ip + addr, s, sprev); 1123 } 1124 1125 private void opFailPos() { 1126 popTilPosNot(); 1127 opFail(); 1128 } 1129 1130 private void opPushStopBT() { 1131 pushStopBT(); 1132 } 1133 1134 private void opPopStopBT() { 1135 stopBtEnd(); 1136 } 1137 1138 private void opLookBehind() { 1139 int tlen = code[ip++]; 1140 s = EncodingHelper.stepBack(str, s, tlen); 1141 if (s == -1) {opFail(); return;} 1142 sprev = EncodingHelper.prevCharHead(str, s); 1143 } 1144 1145 private void opLookBehindSb() { 1146 int tlen = code[ip++]; 1147 s -= tlen; 1148 if (s < str) {opFail(); return;} 1149 sprev = s == str ? -1 : s - 1; 1150 } 1151 1152 private void opPushLookBehindNot() { 1153 int addr = code[ip++]; 1154 int tlen = code[ip++]; 1155 int q = EncodingHelper.stepBack(str, s, tlen); 1156 if (q == -1) { 1157 /* too short case -> success. ex. /(?<!XXX)a/.match("a") 1158 If you want to change to fail, replace following line. */ 1159 ip += addr; 1160 // return FAIL; 1161 } else { 1162 pushLookBehindNot(ip + addr, s, sprev); 1163 s = q; 1164 sprev = EncodingHelper.prevCharHead(str, s); 1165 } 1166 } 1167 1168 private void opFailLookBehindNot() { 1169 popTilLookBehindNot(); 1170 opFail(); 1171 } 1172 1173 private void opFail() { 1174 if (stack == null) { 1175 ip = regex.codeLength - 1; 1176 return; 1177 } 1178 1179 1180 StackEntry e = pop(); 1181 ip = e.getStatePCode(); 1182 s = e.getStatePStr(); 1183 sprev = e.getStatePStrPrev(); 1184 } 1185 1186 private int finish() { 1187 return bestLen; 1188 } 1189 } --- EOF ---