1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 
  21 package jdk.nashorn.internal.runtime.regexp.joni;
  22 
  23 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
  24 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
  25 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
  26 
  27 @SuppressWarnings("javadoc")
  28 public abstract class Matcher extends IntHolder {
  29     protected final Regex regex;
  30 
  31     protected final char[] chars;
  32     protected final int str;
  33     protected final int end;
  34 
  35     protected int msaStart;
  36     protected int msaOptions;
  37     protected final Region msaRegion;
  38     protected int msaBestLen;
  39     protected int msaBestS;
  40 
  41     protected int msaBegin;
  42     protected int msaEnd;
  43 
  44     public Matcher(final Regex regex, final char[] chars) {
  45         this(regex, chars, 0, chars.length);
  46     }
  47 
  48     public Matcher(final Regex regex, final char[] chars, final int p, final int end) {
  49         this.regex = regex;
  50 
  51         this.chars = chars;
  52         this.str = p;
  53         this.end = end;
  54 
  55         this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
  56     }
  57 
  58     // main matching method
  59     protected abstract int matchAt(int range, int sstart, int sprev);
  60 
  61     public final Region getRegion() {
  62         return msaRegion;
  63     }
  64 
  65     public final int getBegin() {
  66         return msaBegin;
  67     }
  68 
  69     public final int getEnd() {
  70         return msaEnd;
  71     }
  72 
  73     protected final void msaInit(final int option, final int start) {
  74         msaOptions = option;
  75         msaStart = start;
  76         if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
  77             msaBestLen = -1;
  78         }
  79     }
  80 
  81     public final int match(final int at, final int range, final int option) {
  82         msaInit(option, at);
  83 
  84         final int prev = EncodingHelper.prevCharHead(str, at);
  85 
  86         if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
  87             return matchAt(end /*range*/, at, prev);
  88         }
  89         return matchAt(range /*range*/, at, prev);
  90     }
  91 
  92     int low, high; // these are the return values
  93     private boolean forwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final IntHolder lowPrev) {
  94         int pprev = -1;
  95         int p = s;
  96 
  97         if (Config.DEBUG_SEARCH) {
  98             Config.log.println("forward_search_range: "+
  99                                 "str: " + string +
 100                                 ", end: " + e +
 101                                 ", s: " + s +
 102                                 ", range: " + range);
 103         }
 104 
 105         if (regex.dMin > 0) {
 106             p += regex.dMin;
 107         }
 108 
 109         retry:while (true) {
 110             p = regex.searchAlgorithm.search(regex, ch, p, e, range);
 111 
 112             if (p != -1 && p < range) {
 113                 if (p - regex.dMin < s) {
 114                     // retry_gate:
 115                     pprev = p;
 116                     p++;
 117                     continue retry;
 118                 }
 119 
 120                 if (regex.subAnchor != 0) {
 121                     switch (regex.subAnchor) {
 122                     case AnchorType.BEGIN_LINE:
 123                         if (p != string) {
 124                             final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
 125                             if (!EncodingHelper.isNewLine(ch, prev, e)) {
 126                                 // goto retry_gate;
 127                                 pprev = p;
 128                                 p++;
 129                                 continue retry;
 130                             }
 131                         }
 132                         break;
 133 
 134                     case AnchorType.END_LINE:
 135                         if (p == e) {
 136                             if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
 137                                 final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
 138                                 if (prev != -1 && EncodingHelper.isNewLine(ch, prev, e)) {
 139                                     // goto retry_gate;
 140                                     pprev = p;
 141                                     p++;
 142                                     continue retry;
 143                                 }
 144                             }
 145                         } else if (!EncodingHelper.isNewLine(ch, p, e)) {
 146                             //if () break;
 147                             // goto retry_gate;
 148                             pprev = p;
 149                             p++;
 150                             continue retry;
 151                         }
 152                         break;
 153 
 154                     default:
 155                         break;
 156                     } // switch
 157                 }
 158 
 159                 if (regex.dMax == 0) {
 160                     low = p;
 161                     if (lowPrev != null) { // ??? // remove null checks
 162                         if (low > s) {
 163                             lowPrev.value = EncodingHelper.prevCharHead(s, p);
 164                         } else {
 165                             lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p);
 166                         }
 167                     }
 168                 } else {
 169                     if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
 170                         low = p - regex.dMax;
 171 
 172                         if (low > s) {
 173                             low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev);
 174                             if (lowPrev != null && lowPrev.value == -1) {
 175                                 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low);
 176                             }
 177                         } else {
 178                             if (lowPrev != null) {
 179                                 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, low);
 180                             }
 181                         }
 182                     }
 183                 }
 184                 /* no needs to adjust *high, *high is used as range check only */
 185                 high = p - regex.dMin;
 186 
 187                 if (Config.DEBUG_SEARCH) {
 188                     Config.log.println("forward_search_range success: "+
 189                                         "low: " + (low - string) +
 190                                         ", high: " + (high - string) +
 191                                         ", dmin: " + regex.dMin +
 192                                         ", dmax: " + regex.dMax);
 193                 }
 194 
 195                 return true;    /* success */
 196             }
 197 
 198             return false;   /* fail */
 199         } //while
 200     }
 201 
 202     // low, high
 203     private boolean backwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final int adjrange) {
 204         int r = range;
 205         r += regex.dMin;
 206         int p = s;
 207 
 208         retry:while (true) {
 209             p = regex.searchAlgorithm.searchBackward(regex, ch, r, adjrange, e, p, s, r);
 210 
 211             if (p != -1) {
 212                 if (regex.subAnchor != 0) {
 213                     switch (regex.subAnchor) {
 214                     case AnchorType.BEGIN_LINE:
 215                         if (p != string) {
 216                             final int prev = EncodingHelper.prevCharHead(string, p);
 217                             if (!EncodingHelper.isNewLine(ch, prev, e)) {
 218                                 p = prev;
 219                                 continue retry;
 220                             }
 221                         }
 222                         break;
 223 
 224                     case AnchorType.END_LINE:
 225                         if (p == e) {
 226                             if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
 227                                 final int prev = EncodingHelper.prevCharHead(adjrange, p);
 228                                 if (prev == -1) {
 229                                     return false;
 230                                 }
 231                                 if (EncodingHelper.isNewLine(ch, prev, e)) {
 232                                     p = prev;
 233                                     continue retry;
 234                                 }
 235                             }
 236                         } else if (!EncodingHelper.isNewLine(ch, p, e)) {
 237                             p = EncodingHelper.prevCharHead(adjrange, p);
 238                             if (p == -1) {
 239                                 return false;
 240                             }
 241                             continue retry;
 242                         }
 243                         break;
 244 
 245                     default:
 246                         break;
 247                     } // switch
 248                 }
 249 
 250                 /* no needs to adjust *high, *high is used as range check only */
 251                 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
 252                     low = p - regex.dMax;
 253                     high = p - regex.dMin;
 254                 }
 255 
 256                 if (Config.DEBUG_SEARCH) {
 257                     Config.log.println("backward_search_range: "+
 258                                         "low: " + (low - string) +
 259                                         ", high: " + (high - string));
 260                 }
 261 
 262                 return true;
 263             }
 264 
 265             if (Config.DEBUG_SEARCH) {
 266                 Config.log.println("backward_search_range: fail.");
 267             }
 268             return false;
 269         } // while
 270     }
 271 
 272     // MATCH_AND_RETURN_CHECK
 273     private boolean matchCheck(final int upperRange, final int s, final int prev) {
 274         if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
 275             if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
 276                 //range = upperRange;
 277                 if (matchAt(upperRange, s, prev) != -1) {
 278                     if (!isFindLongest(regex.options)) {
 279                         return true;
 280                     }
 281                 }
 282             } else {
 283                 //range = upperRange;
 284                 if (matchAt(upperRange, s, prev) != -1) {
 285                     return true;
 286                 }
 287             }
 288         } else {
 289             if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
 290                 if (matchAt(end, s, prev) != -1) {
 291                     //range = upperRange;
 292                     if (!isFindLongest(regex.options)) {
 293                         return true;
 294                     }
 295                 }
 296             } else {
 297                 //range = upperRange;
 298                 if (matchAt(end, s, prev) != -1) {
 299                     return true;
 300                 }
 301             }
 302         }
 303         return false;
 304     }
 305 
 306     public final int search(final int startp, final int rangep, final int option) {
 307         int start = startp, range = rangep;
 308         int s, prev;
 309         int origStart = start;
 310         final int origRange = range;
 311 
 312         if (Config.DEBUG_SEARCH) {
 313             Config.log.println("onig_search (entry point): "+
 314                     "str: " + str +
 315                     ", end: " + (end - str) +
 316                     ", start: " + (start - str) +
 317                     ", range " + (range - str));
 318         }
 319 
 320         if (start > end || start < str) {
 321             return -1;
 322         }
 323 
 324         /* anchor optimize: resume search range */
 325         if (regex.anchor != 0 && str < end) {
 326             int minSemiEnd, maxSemiEnd;
 327 
 328             if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
 329                 /* search start-position only */
 330                 // !begin_position:!
 331                 if (range > start) {
 332                     range = start + 1;
 333                 } else {
 334                     range = start;
 335                 }
 336             } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
 337                 /* search str-position only */
 338                 if (range > start) {
 339                     if (start != str)
 340                      {
 341                         return -1; // mismatch_no_msa;
 342                     }
 343                     range = str + 1;
 344                 } else {
 345                     if (range <= str) {
 346                         start = str;
 347                         range = str;
 348                     } else {
 349                         return -1; // mismatch_no_msa;
 350                     }
 351                 }
 352             } else if ((regex.anchor & AnchorType.END_BUF) != 0) {
 353                 minSemiEnd = maxSemiEnd = end;
 354                 // !end_buf:!
 355                 if (endBuf(start, range, minSemiEnd, maxSemiEnd))
 356                  {
 357                     return -1; // mismatch_no_msa;
 358                 }
 359             } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
 360                 final int preEnd = EncodingHelper.stepBack(str, end, 1);
 361                 maxSemiEnd = end;
 362                 if (EncodingHelper.isNewLine(chars, preEnd, end)) {
 363                     minSemiEnd = preEnd;
 364                     if (minSemiEnd > str && start <= minSemiEnd) {
 365                         // !goto end_buf;!
 366                         if (endBuf(start, range, minSemiEnd, maxSemiEnd))
 367                          {
 368                             return -1; // mismatch_no_msa;
 369                         }
 370                     }
 371                 } else {
 372                     minSemiEnd = end;
 373                     // !goto end_buf;!
 374                     if (endBuf(start, range, minSemiEnd, maxSemiEnd))
 375                      {
 376                         return -1; // mismatch_no_msa;
 377                     }
 378                 }
 379             } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
 380                 // goto !begin_position;!
 381                 if (range > start) {
 382                     range = start + 1;
 383                 } else {
 384                     range = start;
 385                 }
 386             }
 387 
 388         } else if (str == end) { /* empty string */
 389             // empty address ?
 390             if (Config.DEBUG_SEARCH) {
 391                 Config.log.println("onig_search: empty string.");
 392             }
 393 
 394             if (regex.thresholdLength == 0) {
 395                 s = start = str;
 396                 prev = -1;
 397                 msaInit(option, start);
 398 
 399                 if (matchCheck(end, s, prev)) {
 400                     return match(s);
 401                 }
 402                 return mismatch();
 403             }
 404             return -1; // goto mismatch_no_msa;
 405         }
 406 
 407         if (Config.DEBUG_SEARCH) {
 408             Config.log.println("onig_search(apply anchor): " +
 409                                 "end: " + (end - str) +
 410                                 ", start " + (start - str) +
 411                                 ", range " + (range - str));
 412         }
 413 
 414         msaInit(option, origStart);
 415 
 416         s = start;
 417         if (range > start) {    /* forward search */
 418             if (s > str) {
 419                 prev = EncodingHelper.prevCharHead(str, s);
 420             } else {
 421                 prev = 0; // -1
 422             }
 423 
 424             if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
 425                 int schRange = range;
 426                 if (regex.dMax != 0) {
 427                     if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
 428                         schRange = end;
 429                     } else {
 430                         schRange += regex.dMax;
 431                         if (schRange > end) {
 432                             schRange = end;
 433                         }
 434                     }
 435                 }
 436                 if ((end - start) < regex.thresholdLength) {
 437                     return mismatch();
 438                 }
 439 
 440                 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
 441                     do {
 442                         if (!forwardSearchRange(chars, str, end, s, schRange, this)) {
 443                             return mismatch(); // low, high, lowPrev
 444                         }
 445                         if (s < low) {
 446                             s = low;
 447                             prev = value;
 448                         }
 449                         while (s <= high) {
 450                             if (matchCheck(origRange, s, prev)) {
 451                                 return match(s); // ???
 452                             }
 453                             prev = s;
 454                             s++;
 455                         }
 456                     } while (s < range);
 457                 }
 458                 /* check only. */
 459                 if (!forwardSearchRange(chars, str, end, s, schRange, null)) {
 460                     return mismatch();
 461                 }
 462 
 463                 if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
 464                     do {
 465                         if (matchCheck(origRange, s, prev)) {
 466                             return match(s);
 467                         }
 468                         prev = s;
 469                         s++;
 470                     } while (s < range);
 471                     return mismatch();
 472                 }
 473             }
 474 
 475             do {
 476                 if (matchCheck(origRange, s, prev)) {
 477                     return match(s);
 478                 }
 479                 prev = s;
 480                 s++;
 481             } while (s < range);
 482 
 483             if (s == range) { /* because empty match with /$/. */
 484                 if (matchCheck(origRange, s, prev)) {
 485                     return match(s);
 486                 }
 487             }
 488         } else { /* backward search */
 489             if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
 490                 if (origStart < end) {
 491                     origStart++; // /* is upper range */
 492                 }
 493             }
 494 
 495             if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
 496                 int adjrange;
 497                 if (range < end) {
 498                     adjrange = range;
 499                 } else {
 500                     adjrange = end;
 501                 }
 502                 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
 503                     do {
 504                         int schStart = s + regex.dMax;
 505                         if (schStart > end) {
 506                             schStart = end;
 507                         }
 508                         if (!backwardSearchRange(chars, str, end, schStart, range, adjrange))
 509                          {
 510                             return mismatch(); // low, high
 511                         }
 512                         if (s > high) {
 513                             s = high;
 514                         }
 515                         while (s != -1 && s >= low) {
 516                             prev = EncodingHelper.prevCharHead(str, s);
 517                             if (matchCheck(origStart, s, prev)) {
 518                                 return match(s);
 519                             }
 520                             s = prev;
 521                         }
 522                     } while (s >= range);
 523                     return mismatch();
 524                 }
 525                 if ((end - range) < regex.thresholdLength) {
 526                     return mismatch();
 527                 }
 528 
 529                 int schStart = s;
 530                 if (regex.dMax != 0) {
 531                     if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
 532                         schStart = end;
 533                     } else {
 534                         schStart += regex.dMax;
 535                         if (schStart > end) {
 536                             schStart = end;
 537                         }
 538                     }
 539                 }
 540                 if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) {
 541                     return mismatch();
 542                 }
 543             }
 544 
 545             do {
 546                 prev = EncodingHelper.prevCharHead(str, s);
 547                 if (matchCheck(origStart, s, prev)) {
 548                     return match(s);
 549                 }
 550                 s = prev;
 551             } while (s >= range);
 552 
 553         }
 554         return mismatch();
 555     }
 556 
 557     private boolean endBuf(final int startp, final int rangep, final int minSemiEnd, final int maxSemiEnd) {
 558         int start = startp;
 559         int range = rangep;
 560 
 561         if ((maxSemiEnd - str) < regex.anchorDmin) {
 562             return true; // mismatch_no_msa;
 563         }
 564 
 565         if (range > start) {
 566             if ((minSemiEnd - start) > regex.anchorDmax) {
 567                 start = minSemiEnd - regex.anchorDmax;
 568                 if (start >= end) {
 569                     /* match with empty at end */
 570                     start = EncodingHelper.prevCharHead(str, end);
 571                 }
 572             }
 573             if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
 574                 range = maxSemiEnd - regex.anchorDmin + 1;
 575             }
 576             if (start >= range)
 577              {
 578                 return true; // mismatch_no_msa;
 579             }
 580         } else {
 581             if ((minSemiEnd - range) > regex.anchorDmax) {
 582                 range = minSemiEnd - regex.anchorDmax;
 583             }
 584             if ((maxSemiEnd - start) < regex.anchorDmin) {
 585                 start = maxSemiEnd - regex.anchorDmin;
 586             }
 587             if (range > start)
 588              {
 589                 return true; // mismatch_no_msa;
 590             }
 591         }
 592         return false;
 593     }
 594 
 595     private int match(final int s) {
 596         return s - str; // sstart ???
 597     }
 598 
 599     private int mismatch() {
 600         if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
 601             if (msaBestLen >= 0) {
 602                 final int s = msaBestS;
 603                 return match(s);
 604             }
 605         }
 606         // falls through finish:
 607         return -1;
 608     }
 609 }