--- old/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java 2020-04-15 18:51:34.000000000 +0530 +++ /dev/null 2020-04-15 18:51:34.000000000 +0530 @@ -1,609 +0,0 @@ -/* - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished to do - * so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package jdk.nashorn.internal.runtime.regexp.joni; - -import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; -import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; -import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; - -@SuppressWarnings("javadoc") -public abstract class Matcher extends IntHolder { - protected final Regex regex; - - protected final char[] chars; - protected final int str; - protected final int end; - - protected int msaStart; - protected int msaOptions; - protected final Region msaRegion; - protected int msaBestLen; - protected int msaBestS; - - protected int msaBegin; - protected int msaEnd; - - public Matcher(final Regex regex, final char[] chars) { - this(regex, chars, 0, chars.length); - } - - public Matcher(final Regex regex, final char[] chars, final int p, final int end) { - this.regex = regex; - - this.chars = chars; - this.str = p; - this.end = end; - - this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1); - } - - // main matching method - protected abstract int matchAt(int range, int sstart, int sprev); - - public final Region getRegion() { - return msaRegion; - } - - public final int getBegin() { - return msaBegin; - } - - public final int getEnd() { - return msaEnd; - } - - protected final void msaInit(final int option, final int start) { - msaOptions = option; - msaStart = start; - if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { - msaBestLen = -1; - } - } - - public final int match(final int at, final int range, final int option) { - msaInit(option, at); - - final int prev = EncodingHelper.prevCharHead(str, at); - - if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { - return matchAt(end /*range*/, at, prev); - } - return matchAt(range /*range*/, at, prev); - } - - int low, high; // these are the return values - private boolean forwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final IntHolder lowPrev) { - int pprev = -1; - int p = s; - - if (Config.DEBUG_SEARCH) { - Config.log.println("forward_search_range: "+ - "str: " + string + - ", end: " + e + - ", s: " + s + - ", range: " + range); - } - - if (regex.dMin > 0) { - p += regex.dMin; - } - - retry:while (true) { - p = regex.searchAlgorithm.search(regex, ch, p, e, range); - - if (p != -1 && p < range) { - if (p - regex.dMin < s) { - // retry_gate: - pprev = p; - p++; - continue retry; - } - - if (regex.subAnchor != 0) { - switch (regex.subAnchor) { - case AnchorType.BEGIN_LINE: - if (p != string) { - final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); - if (!EncodingHelper.isNewLine(ch, prev, e)) { - // goto retry_gate; - pprev = p; - p++; - continue retry; - } - } - break; - - case AnchorType.END_LINE: - if (p == e) { - if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { - final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); - if (prev != -1 && EncodingHelper.isNewLine(ch, prev, e)) { - // goto retry_gate; - pprev = p; - p++; - continue retry; - } - } - } else if (!EncodingHelper.isNewLine(ch, p, e)) { - //if () break; - // goto retry_gate; - pprev = p; - p++; - continue retry; - } - break; - - default: - break; - } // switch - } - - if (regex.dMax == 0) { - low = p; - if (lowPrev != null) { // ??? // remove null checks - if (low > s) { - lowPrev.value = EncodingHelper.prevCharHead(s, p); - } else { - lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, p); - } - } - } else { - if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { - low = p - regex.dMax; - - if (low > s) { - low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev); - if (lowPrev != null && lowPrev.value == -1) { - lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low); - } - } else { - if (lowPrev != null) { - lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : string, low); - } - } - } - } - /* no needs to adjust *high, *high is used as range check only */ - high = p - regex.dMin; - - if (Config.DEBUG_SEARCH) { - Config.log.println("forward_search_range success: "+ - "low: " + (low - string) + - ", high: " + (high - string) + - ", dmin: " + regex.dMin + - ", dmax: " + regex.dMax); - } - - return true; /* success */ - } - - return false; /* fail */ - } //while - } - - // low, high - private boolean backwardSearchRange(final char[] ch, final int string, final int e, final int s, final int range, final int adjrange) { - int r = range; - r += regex.dMin; - int p = s; - - retry:while (true) { - p = regex.searchAlgorithm.searchBackward(regex, ch, r, adjrange, e, p, s, r); - - if (p != -1) { - if (regex.subAnchor != 0) { - switch (regex.subAnchor) { - case AnchorType.BEGIN_LINE: - if (p != string) { - final int prev = EncodingHelper.prevCharHead(string, p); - if (!EncodingHelper.isNewLine(ch, prev, e)) { - p = prev; - continue retry; - } - } - break; - - case AnchorType.END_LINE: - if (p == e) { - if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { - final int prev = EncodingHelper.prevCharHead(adjrange, p); - if (prev == -1) { - return false; - } - if (EncodingHelper.isNewLine(ch, prev, e)) { - p = prev; - continue retry; - } - } - } else if (!EncodingHelper.isNewLine(ch, p, e)) { - p = EncodingHelper.prevCharHead(adjrange, p); - if (p == -1) { - return false; - } - continue retry; - } - break; - - default: - break; - } // switch - } - - /* no needs to adjust *high, *high is used as range check only */ - if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { - low = p - regex.dMax; - high = p - regex.dMin; - } - - if (Config.DEBUG_SEARCH) { - Config.log.println("backward_search_range: "+ - "low: " + (low - string) + - ", high: " + (high - string)); - } - - return true; - } - - if (Config.DEBUG_SEARCH) { - Config.log.println("backward_search_range: fail."); - } - return false; - } // while - } - - // MATCH_AND_RETURN_CHECK - private boolean matchCheck(final int upperRange, final int s, final int prev) { - if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { - if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { - //range = upperRange; - if (matchAt(upperRange, s, prev) != -1) { - if (!isFindLongest(regex.options)) { - return true; - } - } - } else { - //range = upperRange; - if (matchAt(upperRange, s, prev) != -1) { - return true; - } - } - } else { - if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { - if (matchAt(end, s, prev) != -1) { - //range = upperRange; - if (!isFindLongest(regex.options)) { - return true; - } - } - } else { - //range = upperRange; - if (matchAt(end, s, prev) != -1) { - return true; - } - } - } - return false; - } - - public final int search(final int startp, final int rangep, final int option) { - int start = startp, range = rangep; - int s, prev; - int origStart = start; - final int origRange = range; - - if (Config.DEBUG_SEARCH) { - Config.log.println("onig_search (entry point): "+ - "str: " + str + - ", end: " + (end - str) + - ", start: " + (start - str) + - ", range " + (range - str)); - } - - if (start > end || start < str) { - return -1; - } - - /* anchor optimize: resume search range */ - if (regex.anchor != 0 && str < end) { - int minSemiEnd, maxSemiEnd; - - if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) { - /* search start-position only */ - // !begin_position:! - if (range > start) { - range = start + 1; - } else { - range = start; - } - } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) { - /* search str-position only */ - if (range > start) { - if (start != str) - { - return -1; // mismatch_no_msa; - } - range = str + 1; - } else { - if (range <= str) { - start = str; - range = str; - } else { - return -1; // mismatch_no_msa; - } - } - } else if ((regex.anchor & AnchorType.END_BUF) != 0) { - minSemiEnd = maxSemiEnd = end; - // !end_buf:! - if (endBuf(start, range, minSemiEnd, maxSemiEnd)) - { - return -1; // mismatch_no_msa; - } - } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { - final int preEnd = EncodingHelper.stepBack(str, end, 1); - maxSemiEnd = end; - if (EncodingHelper.isNewLine(chars, preEnd, end)) { - minSemiEnd = preEnd; - if (minSemiEnd > str && start <= minSemiEnd) { - // !goto end_buf;! - if (endBuf(start, range, minSemiEnd, maxSemiEnd)) - { - return -1; // mismatch_no_msa; - } - } - } else { - minSemiEnd = end; - // !goto end_buf;! - if (endBuf(start, range, minSemiEnd, maxSemiEnd)) - { - return -1; // mismatch_no_msa; - } - } - } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) { - // goto !begin_position;! - if (range > start) { - range = start + 1; - } else { - range = start; - } - } - - } else if (str == end) { /* empty string */ - // empty address ? - if (Config.DEBUG_SEARCH) { - Config.log.println("onig_search: empty string."); - } - - if (regex.thresholdLength == 0) { - s = start = str; - prev = -1; - msaInit(option, start); - - if (matchCheck(end, s, prev)) { - return match(s); - } - return mismatch(); - } - return -1; // goto mismatch_no_msa; - } - - if (Config.DEBUG_SEARCH) { - Config.log.println("onig_search(apply anchor): " + - "end: " + (end - str) + - ", start " + (start - str) + - ", range " + (range - str)); - } - - msaInit(option, origStart); - - s = start; - if (range > start) { /* forward search */ - if (s > str) { - prev = EncodingHelper.prevCharHead(str, s); - } else { - prev = 0; // -1 - } - - if (regex.searchAlgorithm != SearchAlgorithm.NONE) { - int schRange = range; - if (regex.dMax != 0) { - if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { - schRange = end; - } else { - schRange += regex.dMax; - if (schRange > end) { - schRange = end; - } - } - } - if ((end - start) < regex.thresholdLength) { - return mismatch(); - } - - if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { - do { - if (!forwardSearchRange(chars, str, end, s, schRange, this)) { - return mismatch(); // low, high, lowPrev - } - if (s < low) { - s = low; - prev = value; - } - while (s <= high) { - if (matchCheck(origRange, s, prev)) { - return match(s); // ??? - } - prev = s; - s++; - } - } while (s < range); - } - /* check only. */ - if (!forwardSearchRange(chars, str, end, s, schRange, null)) { - return mismatch(); - } - - if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) { - do { - if (matchCheck(origRange, s, prev)) { - return match(s); - } - prev = s; - s++; - } while (s < range); - return mismatch(); - } - } - - do { - if (matchCheck(origRange, s, prev)) { - return match(s); - } - prev = s; - s++; - } while (s < range); - - if (s == range) { /* because empty match with /$/. */ - if (matchCheck(origRange, s, prev)) { - return match(s); - } - } - } else { /* backward search */ - if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { - if (origStart < end) { - origStart++; // /* is upper range */ - } - } - - if (regex.searchAlgorithm != SearchAlgorithm.NONE) { - int adjrange; - if (range < end) { - adjrange = range; - } else { - adjrange = end; - } - if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) { - do { - int schStart = s + regex.dMax; - if (schStart > end) { - schStart = end; - } - if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) - { - return mismatch(); // low, high - } - if (s > high) { - s = high; - } - while (s != -1 && s >= low) { - prev = EncodingHelper.prevCharHead(str, s); - if (matchCheck(origStart, s, prev)) { - return match(s); - } - s = prev; - } - } while (s >= range); - return mismatch(); - } - if ((end - range) < regex.thresholdLength) { - return mismatch(); - } - - int schStart = s; - if (regex.dMax != 0) { - if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { - schStart = end; - } else { - schStart += regex.dMax; - if (schStart > end) { - schStart = end; - } - } - } - if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) { - return mismatch(); - } - } - - do { - prev = EncodingHelper.prevCharHead(str, s); - if (matchCheck(origStart, s, prev)) { - return match(s); - } - s = prev; - } while (s >= range); - - } - return mismatch(); - } - - private boolean endBuf(final int startp, final int rangep, final int minSemiEnd, final int maxSemiEnd) { - int start = startp; - int range = rangep; - - if ((maxSemiEnd - str) < regex.anchorDmin) { - return true; // mismatch_no_msa; - } - - if (range > start) { - if ((minSemiEnd - start) > regex.anchorDmax) { - start = minSemiEnd - regex.anchorDmax; - if (start >= end) { - /* match with empty at end */ - start = EncodingHelper.prevCharHead(str, end); - } - } - if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) { - range = maxSemiEnd - regex.anchorDmin + 1; - } - if (start >= range) - { - return true; // mismatch_no_msa; - } - } else { - if ((minSemiEnd - range) > regex.anchorDmax) { - range = minSemiEnd - regex.anchorDmax; - } - if ((maxSemiEnd - start) < regex.anchorDmin) { - start = maxSemiEnd - regex.anchorDmin; - } - if (range > start) - { - return true; // mismatch_no_msa; - } - } - return false; - } - - private int match(final int s) { - return s - str; // sstart ??? - } - - private int mismatch() { - if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { - if (msaBestLen >= 0) { - final int s = msaBestS; - return match(s); - } - } - // falls through finish: - return -1; - } -}