1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 21 package jdk.nashorn.internal.runtime.regexp.joni; 22 23 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; 24 25 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 26 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 27 28 public abstract class Matcher extends IntHolder { 29 protected final Regex regex; 30 31 protected final char[] chars; 32 protected final int str; 33 protected final int end; 34 35 protected int msaStart; 36 protected int msaOptions; 37 protected final Region msaRegion; 38 protected int msaBestLen; 39 protected int msaBestS; 40 41 protected int msaBegin; 42 protected int msaEnd; 43 44 public Matcher(Regex regex, char[] chars) { 45 this(regex, chars, 0, chars.length); 46 } 47 48 public Matcher(Regex regex, char[] chars, int p, int end) { 49 this.regex = regex; 50 51 this.chars = chars; 52 this.str = p; 53 this.end = end; 54 55 this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1); 56 } 57 58 // main matching method 59 protected abstract int matchAt(int range, int sstart, int sprev); 60 61 protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum); 62 protected abstract void stateCheckBuffClear(); 63 64 public final Region getRegion() { 65 return msaRegion; 66 } 67 68 public final Region getEagerRegion() { 69 return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd); 70 } 71 72 public final int getBegin() { 73 return msaBegin; 74 } 75 76 public final int getEnd() { 77 return msaEnd; 78 } 79 80 protected final void msaInit(int option, int start) { 81 msaOptions = option; 82 msaStart = start; 83 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1; 84 } 85 86 public final int match(int at, int range, int option) { 87 msaInit(option, at); 88 89 if (Config.USE_COMBINATION_EXPLOSION_CHECK) { 90 int offset = at = str; 91 stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction? 92 } // USE_COMBINATION_EXPLOSION_CHECK 93 94 int prev = EncodingHelper.prevCharHead(str, at); 95 96 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 97 return matchAt(end /*range*/, at, prev); 98 } else { 99 return matchAt(range /*range*/, at, prev); 100 } 101 } 102 103 int low, high; // these are the return values 104 private boolean forwardSearchRange(char[] chars, int str, int end, int s, int range, IntHolder lowPrev) { 105 int pprev = -1; 106 int p = s; 107 108 if (Config.DEBUG_SEARCH) { 109 Config.log.println("forward_search_range: "+ 110 "str: " + str + 111 ", end: " + end + 112 ", s: " + s + 113 ", range: " + range); 114 } 115 116 if (regex.dMin > 0) { 117 p += regex.dMin; 118 } 119 120 retry:while (true) { 121 p = regex.searchAlgorithm.search(regex, chars, p, end, range); 122 123 if (p != -1 && p < range) { 124 if (p - regex.dMin < s) { 125 // retry_gate: 126 pprev = p; 127 p++; 128 continue retry; 129 } 130 131 if (regex.subAnchor != 0) { 132 switch (regex.subAnchor) { 133 case AnchorType.BEGIN_LINE: 134 if (p != str) { 135 int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); 136 if (!EncodingHelper.isNewLine(chars, prev, end)) { 137 // goto retry_gate; 138 pprev = p; 139 p++; 140 continue retry; 141 } 142 } 143 break; 144 145 case AnchorType.END_LINE: 146 if (p == end) { 147 if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 148 int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); 149 if (prev != -1 && EncodingHelper.isNewLine(chars, prev, end)) { 150 // goto retry_gate; 151 pprev = p; 152 p++; 153 continue retry; 154 } 155 } 156 } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) { 157 //if () break; 158 // goto retry_gate; 159 pprev = p; 160 p++; 161 continue retry; 162 } 163 break; 164 } // switch 165 } 166 167 if (regex.dMax == 0) { 168 low = p; 169 if (lowPrev != null) { // ??? // remove null checks 170 if (low > s) { 171 lowPrev.value = EncodingHelper.prevCharHead(s, p); 172 } else { 173 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); 174 } 175 } 176 } else { 177 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 178 low = p - regex.dMax; 179 180 if (low > s) { 181 low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev); 182 if (lowPrev != null && lowPrev.value == -1) { 183 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low); 184 } 185 } else { 186 if (lowPrev != null) { 187 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, low); 188 } 189 } 190 } 191 } 192 /* no needs to adjust *high, *high is used as range check only */ 193 high = p - regex.dMin; 194 195 if (Config.DEBUG_SEARCH) { 196 Config.log.println("forward_search_range success: "+ 197 "low: " + (low - str) + 198 ", high: " + (high - str) + 199 ", dmin: " + regex.dMin + 200 ", dmax: " + regex.dMax); 201 } 202 203 return true; /* success */ 204 } 205 206 return false; /* fail */ 207 } //while 208 } 209 210 // low, high 211 private boolean backwardSearchRange(char[] chars, int str, int end, int s, int range, int adjrange) { 212 range += regex.dMin; 213 int p = s; 214 215 retry:while (true) { 216 p = regex.searchAlgorithm.searchBackward(regex, chars, range, adjrange, end, p, s, range); 217 218 if (p != -1) { 219 if (regex.subAnchor != 0) { 220 switch (regex.subAnchor) { 221 case AnchorType.BEGIN_LINE: 222 if (p != str) { 223 int prev = EncodingHelper.prevCharHead(str, p); 224 if (!EncodingHelper.isNewLine(chars, prev, end)) { 225 p = prev; 226 continue retry; 227 } 228 } 229 break; 230 231 case AnchorType.END_LINE: 232 if (p == end) { 233 if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 234 int prev = EncodingHelper.prevCharHead(adjrange, p); 235 if (prev == -1) return false; 236 if (EncodingHelper.isNewLine(chars, prev, end)) { 237 p = prev; 238 continue retry; 239 } 240 } 241 } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) { 242 p = EncodingHelper.prevCharHead(adjrange, p); 243 if (p == -1) return false; 244 continue retry; 245 } 246 break; 247 } // switch 248 } 249 250 /* no needs to adjust *high, *high is used as range check only */ 251 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 252 low = p - regex.dMax; 253 high = p - regex.dMin; 254 } 255 256 if (Config.DEBUG_SEARCH) { 257 Config.log.println("backward_search_range: "+ 258 "low: " + (low - str) + 259 ", high: " + (high - str)); 260 } 261 262 return true; 263 } 264 265 if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail."); 266 return false; 267 } // while 268 } 269 270 // MATCH_AND_RETURN_CHECK 271 private boolean matchCheck(int upperRange, int s, int prev) { 272 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 273 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 274 //range = upperRange; 275 if (matchAt(upperRange, s, prev) != -1) { 276 if (!isFindLongest(regex.options)) return true; 277 } 278 } else { 279 //range = upperRange; 280 if (matchAt(upperRange, s, prev) != -1) return true; 281 } 282 } else { 283 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 284 if (matchAt(end, s, prev) != -1) { 285 //range = upperRange; 286 if (!isFindLongest(regex.options)) return true; 287 } 288 } else { 289 //range = upperRange; 290 if (matchAt(end, s, prev) != -1) return true; 291 } 292 } 293 return false; 294 } 295 296 public final int search(int start, int range, int option) { 297 int s, prev; 298 int origStart = start; 299 int origRange = range; 300 301 if (Config.DEBUG_SEARCH) { 302 Config.log.println("onig_search (entry point): "+ 303 "str: " + str + 304 ", end: " + (end - str) + 305 ", start: " + (start - str) + 306 ", range " + (range - str)); 307 } 308 309 if (start > end || start < str) return -1; 310 311 /* anchor optimize: resume search range */ 312 if (regex.anchor != 0 && str < end) { 313 int minSemiEnd, maxSemiEnd; 314 315 if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) { 316 /* search start-position only */ 317 // !begin_position:! 318 if (range > start) { 319 range = start + 1; 320 } else { 321 range = start; 322 } 323 } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) { 324 /* search str-position only */ 325 if (range > start) { 326 if (start != str) return -1; // mismatch_no_msa; 327 range = str + 1; 328 } else { 329 if (range <= str) { 330 start = str; 331 range = str; 332 } else { 333 return -1; // mismatch_no_msa; 334 } 335 } 336 } else if ((regex.anchor & AnchorType.END_BUF) != 0) { 337 minSemiEnd = maxSemiEnd = end; 338 // !end_buf:! 339 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; 340 } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { 341 int preEnd = EncodingHelper.stepBack(str, end, 1); 342 maxSemiEnd = end; 343 if (EncodingHelper.isNewLine(chars, preEnd, end)) { 344 minSemiEnd = preEnd; 345 if (Config.USE_CRNL_AS_LINE_TERMINATOR) { 346 preEnd = EncodingHelper.stepBack(str, preEnd, 1); 347 if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) { 348 minSemiEnd = preEnd; 349 } 350 } 351 if (minSemiEnd > str && start <= minSemiEnd) { 352 // !goto end_buf;! 353 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; 354 } 355 } else { 356 minSemiEnd = end; 357 // !goto end_buf;! 358 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; 359 } 360 } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) { 361 // goto !begin_position;! 362 if (range > start) { 363 range = start + 1; 364 } else { 365 range = start; 366 } 367 } 368 369 } else if (str == end) { /* empty string */ 370 // empty address ? 371 if (Config.DEBUG_SEARCH) { 372 Config.log.println("onig_search: empty string."); 373 } 374 375 if (regex.thresholdLength == 0) { 376 s = start = str; 377 prev = -1; 378 msaInit(option, start); 379 380 if (Config.USE_COMBINATION_EXPLOSION_CHECK) stateCheckBuffClear(); 381 382 if (matchCheck(end, s, prev)) return match(s); 383 return mismatch(); 384 } 385 return -1; // goto mismatch_no_msa; 386 } 387 388 if (Config.DEBUG_SEARCH) { 389 Config.log.println("onig_search(apply anchor): " + 390 "end: " + (end - str) + 391 ", start " + (start - str) + 392 ", range " + (range - str)); 393 } 394 395 msaInit(option, origStart); 396 if (Config.USE_COMBINATION_EXPLOSION_CHECK) { 397 int offset = Math.min(start, range) - str; 398 stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); 399 } 400 401 s = start; 402 if (range > start) { /* forward search */ 403 if (s > str) { 404 prev = EncodingHelper.prevCharHead(str, s); 405 } else { 406 prev = 0; // -1 407 } 408 409 if (regex.searchAlgorithm != SearchAlgorithm.NONE) { 410 int schRange = range; 411 if (regex.dMax != 0) { 412 if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { 413 schRange = end; 414 } else { 415 schRange += regex.dMax; 416 if (schRange > end) schRange = end; 417 } 418 } 419 if ((end - start) < regex.thresholdLength) return mismatch(); 420 421 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 422 do { 423 if (!forwardSearchRange(chars, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev 424 if (s < low) { 425 s = low; 426 prev = value; 427 } 428 while (s <= high) { 429 if (matchCheck(origRange, s, prev)) return match(s); // ??? 430 prev = s; 431 s++; 432 } 433 } while (s < range); 434 return mismatch(); 435 436 } else { /* check only. */ 437 if (!forwardSearchRange(chars, str, end, s, schRange, null)) return mismatch(); 438 439 if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) { 440 do { 441 if (matchCheck(origRange, s, prev)) return match(s); 442 prev = s; 443 s++; 444 } while (s < range); 445 return mismatch(); 446 } 447 448 } 449 } 450 451 do { 452 if (matchCheck(origRange, s, prev)) return match(s); 453 prev = s; 454 s++; 455 } while (s < range); 456 457 if (s == range) { /* because empty match with /$/. */ 458 if (matchCheck(origRange, s, prev)) return match(s); 459 } 460 } else { /* backward search */ 461 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 462 if (origStart < end) { 463 origStart++; // /* is upper range */ 464 } 465 } 466 467 if (regex.searchAlgorithm != SearchAlgorithm.NONE) { 468 int adjrange; 469 if (range < end) { 470 adjrange = range; 471 } else { 472 adjrange = end; 473 } 474 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) { 475 do { 476 int schStart = s + regex.dMax; 477 if (schStart > end) schStart = end; 478 if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); // low, high 479 if (s > high) s = high; 480 while (s != -1 && s >= low) { 481 prev = EncodingHelper.prevCharHead(str, s); 482 if (matchCheck(origStart, s, prev)) return match(s); 483 s = prev; 484 } 485 } while (s >= range); 486 return mismatch(); 487 } else { /* check only. */ 488 if ((end - range) < regex.thresholdLength) return mismatch(); 489 490 int schStart = s; 491 if (regex.dMax != 0) { 492 if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { 493 schStart = end; 494 } else { 495 schStart += regex.dMax; 496 if (schStart > end) { 497 schStart = end; 498 } 499 } 500 } 501 if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); 502 } 503 } 504 505 do { 506 prev = EncodingHelper.prevCharHead(str, s); 507 if (matchCheck(origStart, s, prev)) return match(s); 508 s = prev; 509 } while (s >= range); 510 511 } 512 return mismatch(); 513 } 514 515 private boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) { 516 if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa; 517 518 if (range > start) { 519 if ((minSemiEnd - start) > regex.anchorDmax) { 520 start = minSemiEnd - regex.anchorDmax; 521 if (start >= end) { 522 /* match with empty at end */ 523 start = EncodingHelper.prevCharHead(str, end); 524 } 525 } 526 if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) { 527 range = maxSemiEnd - regex.anchorDmin + 1; 528 } 529 if (start >= range) return true; // mismatch_no_msa; 530 } else { 531 if ((minSemiEnd - range) > regex.anchorDmax) { 532 range = minSemiEnd - regex.anchorDmax; 533 } 534 if ((maxSemiEnd - start) < regex.anchorDmin) { 535 start = maxSemiEnd - regex.anchorDmin; 536 } 537 if (range > start) return true; // mismatch_no_msa; 538 } 539 return false; 540 } 541 542 private int match(int s) { 543 return s - str; // sstart ??? 544 } 545 546 private int mismatch() { 547 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 548 if (msaBestLen >= 0) { 549 int s = msaBestS; 550 return match(s); 551 } 552 } 553 // falls through finish: 554 return -1; 555 } 556 }