1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 
  21 package jdk.nashorn.internal.joni;
  22 
  23 import static jdk.nashorn.internal.joni.Option.isFindLongest;
  24 
  25 import jdk.nashorn.internal.joni.constants.AnchorType;
  26 import jdk.nashorn.internal.joni.encoding.IntHolder;
  27 
  28 public abstract class Matcher extends IntHolder {
  29     protected final Regex regex;
  30 
  31     protected final char[] chars;
  32     protected final int str;
  33     protected final int end;
  34 
  35     protected int msaStart;
  36     protected int msaOptions;
  37     protected final Region msaRegion;
  38     protected int msaBestLen;
  39     protected int msaBestS;
  40 
  41     protected int msaBegin;
  42     protected int msaEnd;
  43 
  44     public Matcher(Regex regex, char[] chars) {
  45         this(regex, chars, 0, chars.length);
  46     }
  47 
  48     public Matcher(Regex regex, char[] chars, int p, int end) {
  49         this.regex = regex;
  50 
  51         this.chars = chars;
  52         this.str = p;
  53         this.end = end;
  54 
  55         this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
  56     }
  57 
  58     // main matching method
  59     protected abstract int matchAt(int range, int sstart, int sprev);
  60 
  61     protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum);
  62     protected abstract void stateCheckBuffClear();
  63 
  64     public final Region getRegion() {
  65         return msaRegion;
  66     }
  67 
  68     public final Region getEagerRegion() {
  69         return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd);
  70     }
  71 
  72     public final int getBegin() {
  73         return msaBegin;
  74     }
  75 
  76     public final int getEnd() {
  77         return msaEnd;
  78     }
  79 
  80     protected final void msaInit(int option, int start) {
  81         msaOptions = option;
  82         msaStart = start;
  83         if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1;
  84     }
  85 
  86     public final int match(int at, int range, int option) {
  87         msaInit(option, at);
  88 
  89         if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
  90             int offset = at = str;
  91             stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction?
  92         } // USE_COMBINATION_EXPLOSION_CHECK
  93 
  94         int prev = EncodingHelper.prevCharHead(str, at);
  95 
  96         if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
  97             return matchAt(end /*range*/, at, prev);
  98         } else {
  99             return matchAt(range /*range*/, at, prev);
 100         }
 101     }
 102 
 103     int low, high; // these are the return values
 104     private boolean forwardSearchRange(char[] chars, int str, int end, int s, int range, IntHolder lowPrev) {
 105         int pprev = -1;
 106         int p = s;
 107 
 108         if (Config.DEBUG_SEARCH) {
 109             Config.log.println("forward_search_range: "+
 110                                 "str: " + str +
 111                                 ", end: " + end +
 112                                 ", s: " + s +
 113                                 ", range: " + range);
 114         }
 115 
 116         if (regex.dMin > 0) {
 117             p += regex.dMin;
 118         }
 119 
 120         retry:while (true) {
 121             p = regex.searchAlgorithm.search(regex, chars, p, end, range);
 122 
 123             if (p != -1 && p < range) {
 124                 if (p - regex.dMin < s) {
 125                     // retry_gate:
 126                     pprev = p;
 127                     p++;
 128                     continue retry;
 129                 }
 130 
 131                 if (regex.subAnchor != 0) {
 132                     switch (regex.subAnchor) {
 133                     case AnchorType.BEGIN_LINE:
 134                         if (p != str) {
 135                             int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
 136                             if (!EncodingHelper.isNewLine(chars, prev, end)) {
 137                                 // goto retry_gate;
 138                                 pprev = p;
 139                                 p++;
 140                                 continue retry;
 141                             }
 142                         }
 143                         break;
 144 
 145                     case AnchorType.END_LINE:
 146                         if (p == end) {
 147                             if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
 148                                 int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
 149                                 if (prev != -1 && EncodingHelper.isNewLine(chars, prev, end)) {
 150                                     // goto retry_gate;
 151                                     pprev = p;
 152                                     p++;
 153                                     continue retry;
 154                                 }
 155                             }
 156                         } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
 157                             //if () break;
 158                             // goto retry_gate;
 159                             pprev = p;
 160                             p++;
 161                             continue retry;
 162                         }
 163                         break;
 164                     } // switch
 165                 }
 166 
 167                 if (regex.dMax == 0) {
 168                     low = p;
 169                     if (lowPrev != null) { // ??? // remove null checks
 170                         if (low > s) {
 171                             lowPrev.value = EncodingHelper.prevCharHead(s, p);
 172                         } else {
 173                             lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
 174                         }
 175                     }
 176                 } else {
 177                     if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
 178                         low = p - regex.dMax;
 179 
 180                         if (low > s) {
 181                             low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev);
 182                             if (lowPrev != null && lowPrev.value == -1) {
 183                                 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low);
 184                             }
 185                         } else {
 186                             if (lowPrev != null) {
 187                                 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, low);
 188                             }
 189                         }
 190                     }
 191                 }
 192                 /* no needs to adjust *high, *high is used as range check only */
 193                 high = p - regex.dMin;
 194 
 195                 if (Config.DEBUG_SEARCH) {
 196                     Config.log.println("forward_search_range success: "+
 197                                         "low: " + (low - str) +
 198                                         ", high: " + (high - str) +
 199                                         ", dmin: " + regex.dMin +
 200                                         ", dmax: " + regex.dMax);
 201                 }
 202 
 203                 return true;    /* success */
 204             }
 205 
 206             return false;   /* fail */
 207         } //while
 208     }
 209 
 210     // low, high
 211     private boolean backwardSearchRange(char[] chars, int str, int end, int s, int range, int adjrange) {
 212         range += regex.dMin;
 213         int p = s;
 214 
 215         retry:while (true) {
 216             p = regex.searchAlgorithm.searchBackward(regex, chars, range, adjrange, end, p, s, range);
 217 
 218             if (p != -1) {
 219                 if (regex.subAnchor != 0) {
 220                     switch (regex.subAnchor) {
 221                     case AnchorType.BEGIN_LINE:
 222                         if (p != str) {
 223                             int prev = EncodingHelper.prevCharHead(str, p);
 224                             if (!EncodingHelper.isNewLine(chars, prev, end)) {
 225                                 p = prev;
 226                                 continue retry;
 227                             }
 228                         }
 229                         break;
 230 
 231                     case AnchorType.END_LINE:
 232                         if (p == end) {
 233                             if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
 234                                 int prev = EncodingHelper.prevCharHead(adjrange, p);
 235                                 if (prev == -1) return false;
 236                                 if (EncodingHelper.isNewLine(chars, prev, end)) {
 237                                     p = prev;
 238                                     continue retry;
 239                                 }
 240                             }
 241                         } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
 242                             p = EncodingHelper.prevCharHead(adjrange, p);
 243                             if (p == -1) return false;
 244                             continue retry;
 245                         }
 246                         break;
 247                     } // switch
 248                 }
 249 
 250                 /* no needs to adjust *high, *high is used as range check only */
 251                 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
 252                     low = p - regex.dMax;
 253                     high = p - regex.dMin;
 254                 }
 255 
 256                 if (Config.DEBUG_SEARCH) {
 257                     Config.log.println("backward_search_range: "+
 258                                         "low: " + (low - str) +
 259                                         ", high: " + (high - str));
 260                 }
 261 
 262                 return true;
 263             }
 264 
 265             if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail.");
 266             return false;
 267         } // while
 268     }
 269 
 270     // MATCH_AND_RETURN_CHECK
 271     private boolean matchCheck(int upperRange, int s, int prev) {
 272         if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
 273             if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
 274                 //range = upperRange;
 275                 if (matchAt(upperRange, s, prev) != -1) {
 276                     if (!isFindLongest(regex.options)) return true;
 277                 }
 278             } else {
 279                 //range = upperRange;
 280                 if (matchAt(upperRange, s, prev) != -1) return true;
 281             }
 282         } else {
 283             if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
 284                 if (matchAt(end, s, prev) != -1) {
 285                     //range = upperRange;
 286                     if (!isFindLongest(regex.options)) return true;
 287                 }
 288             } else {
 289                 //range = upperRange;
 290                 if (matchAt(end, s, prev) != -1) return true;
 291             }
 292         }
 293         return false;
 294     }
 295 
 296     public final int search(int start, int range, int option) {
 297         int s, prev;
 298         int origStart = start;
 299         int origRange = range;
 300 
 301         if (Config.DEBUG_SEARCH) {
 302             Config.log.println("onig_search (entry point): "+
 303                         "str: " + str +
 304                     ", end: " + (end - str) +
 305                     ", start: " + (start - str) +
 306                     ", range " + (range - str));
 307         }
 308 
 309         if (start > end || start < str) return -1;
 310 
 311         /* anchor optimize: resume search range */
 312         if (regex.anchor != 0 && str < end) {
 313             int minSemiEnd, maxSemiEnd;
 314 
 315             if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
 316                 /* search start-position only */
 317                 // !begin_position:!
 318                 if (range > start) {
 319                     range = start + 1;
 320                 } else {
 321                     range = start;
 322                 }
 323             } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
 324                 /* search str-position only */
 325                 if (range > start) {
 326                     if (start != str) return -1; // mismatch_no_msa;
 327                     range = str + 1;
 328                 } else {
 329                     if (range <= str) {
 330                         start = str;
 331                         range = str;
 332                     } else {
 333                         return -1; // mismatch_no_msa;
 334                     }
 335                 }
 336             } else if ((regex.anchor & AnchorType.END_BUF) != 0) {
 337                 minSemiEnd = maxSemiEnd = end;
 338                 // !end_buf:!
 339                 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
 340             } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
 341                 int preEnd = EncodingHelper.stepBack(str, end, 1);
 342                 maxSemiEnd = end;
 343                 if (EncodingHelper.isNewLine(chars, preEnd, end)) {
 344                     minSemiEnd = preEnd;
 345                     if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
 346                         preEnd = EncodingHelper.stepBack(str, preEnd, 1);
 347                         if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) {
 348                             minSemiEnd = preEnd;
 349                         }
 350                     }
 351                     if (minSemiEnd > str && start <= minSemiEnd) {
 352                         // !goto end_buf;!
 353                         if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
 354                     }
 355                 } else {
 356                     minSemiEnd = end;
 357                     // !goto end_buf;!
 358                     if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
 359                 }
 360             } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
 361                 // goto !begin_position;!
 362                 if (range > start) {
 363                     range = start + 1;
 364                 } else {
 365                     range = start;
 366                 }
 367             }
 368 
 369         } else if (str == end) { /* empty string */
 370             // empty address ?
 371             if (Config.DEBUG_SEARCH) {
 372                 Config.log.println("onig_search: empty string.");
 373             }
 374 
 375             if (regex.thresholdLength == 0) {
 376                 s = start = str;
 377                 prev = -1;
 378                 msaInit(option, start);
 379 
 380                 if (Config.USE_COMBINATION_EXPLOSION_CHECK) stateCheckBuffClear();
 381 
 382                 if (matchCheck(end, s, prev)) return match(s);
 383                 return mismatch();
 384             }
 385             return -1; // goto mismatch_no_msa;
 386         }
 387 
 388         if (Config.DEBUG_SEARCH) {
 389             Config.log.println("onig_search(apply anchor): " +
 390                                 "end: " + (end - str) +
 391                                 ", start " + (start - str) +
 392                                 ", range " + (range - str));
 393         }
 394 
 395         msaInit(option, origStart);
 396         if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
 397             int offset = Math.min(start, range) - str;
 398             stateCheckBuffInit(end - str, offset, regex.numCombExpCheck);
 399         }
 400 
 401         s = start;
 402         if (range > start) {    /* forward search */
 403             if (s > str) {
 404                 prev = EncodingHelper.prevCharHead(str, s);
 405             } else {
 406                 prev = 0; // -1
 407             }
 408 
 409             if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
 410                 int schRange = range;
 411                 if (regex.dMax != 0) {
 412                     if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
 413                         schRange = end;
 414                     } else {
 415                         schRange += regex.dMax;
 416                         if (schRange > end) schRange = end;
 417                     }
 418                 }
 419                 if ((end - start) < regex.thresholdLength) return mismatch();
 420 
 421                 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
 422                     do {
 423                         if (!forwardSearchRange(chars, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev
 424                         if (s < low) {
 425                             s = low;
 426                             prev = value;
 427                         }
 428                         while (s <= high) {
 429                             if (matchCheck(origRange, s, prev)) return match(s); // ???
 430                             prev = s;
 431                             s++;
 432                         }
 433                     } while (s < range);
 434                     return mismatch();
 435 
 436                 } else { /* check only. */
 437                     if (!forwardSearchRange(chars, str, end, s, schRange, null)) return mismatch();
 438 
 439                     if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
 440                         do {
 441                             if (matchCheck(origRange, s, prev)) return match(s);
 442                             prev = s;
 443                             s++;
 444                         } while (s < range);
 445                         return mismatch();
 446                     }
 447 
 448                 }
 449             }
 450 
 451             do {
 452                 if (matchCheck(origRange, s, prev)) return match(s);
 453                 prev = s;
 454                 s++;
 455             } while (s < range);
 456 
 457             if (s == range) { /* because empty match with /$/. */
 458                 if (matchCheck(origRange, s, prev)) return match(s);
 459             }
 460         } else { /* backward search */
 461             if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
 462                 if (origStart < end) {
 463                     origStart++; // /* is upper range */
 464                 }
 465             }
 466 
 467             if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
 468                 int adjrange;
 469                 if (range < end) {
 470                     adjrange = range;
 471                 } else {
 472                     adjrange = end;
 473                 }
 474                 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
 475                     do {
 476                         int schStart = s + regex.dMax;
 477                         if (schStart > end) schStart = end;
 478                         if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); // low, high
 479                         if (s > high) s = high;
 480                         while (s != -1 && s >= low) {
 481                             prev = EncodingHelper.prevCharHead(str, s);
 482                             if (matchCheck(origStart, s, prev)) return match(s);
 483                             s = prev;
 484                         }
 485                     } while (s >= range);
 486                     return mismatch();
 487                 } else { /* check only. */
 488                     if ((end - range) < regex.thresholdLength) return mismatch();
 489 
 490                     int schStart = s;
 491                     if (regex.dMax != 0) {
 492                         if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
 493                             schStart = end;
 494                         } else {
 495                             schStart += regex.dMax;
 496                             if (schStart > end) {
 497                                 schStart = end;
 498                             }
 499                         }
 500                     }
 501                     if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch();
 502                 }
 503             }
 504 
 505             do {
 506                 prev = EncodingHelper.prevCharHead(str, s);
 507                 if (matchCheck(origStart, s, prev)) return match(s);
 508                 s = prev;
 509             } while (s >= range);
 510 
 511         }
 512         return mismatch();
 513     }
 514 
 515     private boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) {
 516         if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa;
 517 
 518         if (range > start) {
 519             if ((minSemiEnd - start) > regex.anchorDmax) {
 520                 start = minSemiEnd - regex.anchorDmax;
 521                 if (start >= end) {
 522                     /* match with empty at end */
 523                     start = EncodingHelper.prevCharHead(str, end);
 524                 }
 525             }
 526             if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
 527                 range = maxSemiEnd - regex.anchorDmin + 1;
 528             }
 529             if (start >= range) return true; // mismatch_no_msa;
 530         } else {
 531             if ((minSemiEnd - range) > regex.anchorDmax) {
 532                 range = minSemiEnd - regex.anchorDmax;
 533             }
 534             if ((maxSemiEnd - start) < regex.anchorDmin) {
 535                 start = maxSemiEnd - regex.anchorDmin;
 536             }
 537             if (range > start) return true; // mismatch_no_msa;
 538         }
 539         return false;
 540     }
 541 
 542     private int match(int s) {
 543         return s - str; // sstart ???
 544     }
 545 
 546     private int mismatch() {
 547         if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
 548             if (msaBestLen >= 0) {
 549                 int s = msaBestS;
 550                 return match(s);
 551             }
 552         }
 553         // falls through finish:
 554         return -1;
 555     }
 556 }