src/jdk/nashorn/internal/objects/NativeRegExp.java

Print this page

        

*** 29,40 **** import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED; import java.util.ArrayList; import java.util.Arrays; import java.util.List; - import java.util.regex.Matcher; - import java.util.regex.Pattern; import jdk.nashorn.internal.objects.annotations.Attribute; import jdk.nashorn.internal.objects.annotations.Constructor; import jdk.nashorn.internal.objects.annotations.Function; import jdk.nashorn.internal.objects.annotations.Getter; import jdk.nashorn.internal.objects.annotations.Property; --- 29,38 ----
*** 47,79 **** import jdk.nashorn.internal.runtime.RegExpMatch; import jdk.nashorn.internal.runtime.ScriptFunction; import jdk.nashorn.internal.runtime.ScriptObject; import jdk.nashorn.internal.runtime.ScriptRuntime; /** * ECMA 15.10 RegExp Objects. */ @ScriptClass("RegExp") public final class NativeRegExp extends ScriptObject { /** ECMA 15.10.7.5 lastIndex property */ @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE) public Object lastIndex; /** Pattern string. */ private String input; /** Global search flag for this regexp. */ private boolean global; /** Case insensitive flag for this regexp */ private boolean ignoreCase; /** Multi-line flag for this regexp */ private boolean multiline; ! /** Java regex pattern to use for match. We compile to one of these */ ! private Pattern pattern; private BitVector groupsInNegativeLookahead; /* public NativeRegExp() { --- 45,88 ---- import jdk.nashorn.internal.runtime.RegExpMatch; import jdk.nashorn.internal.runtime.ScriptFunction; import jdk.nashorn.internal.runtime.ScriptObject; import jdk.nashorn.internal.runtime.ScriptRuntime; + import jdk.nashorn.internal.joni.Matcher; + import jdk.nashorn.internal.joni.Option; + import jdk.nashorn.internal.joni.Regex; + import jdk.nashorn.internal.joni.Region; + /** * ECMA 15.10 RegExp Objects. */ @ScriptClass("RegExp") public final class NativeRegExp extends ScriptObject { /** ECMA 15.10.7.5 lastIndex property */ @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE) public Object lastIndex; /** Pattern string. */ + private String source; + + /** Input string. */ private String input; + /** Input string as char array */ + private char[] inputChars; + /** Global search flag for this regexp. */ private boolean global; /** Case insensitive flag for this regexp */ private boolean ignoreCase; /** Multi-line flag for this regexp */ private boolean multiline; ! /** Joni regex pattern to use for match. We compile to one of these */ ! private Regex regex; private BitVector groupsInNegativeLookahead; /* public NativeRegExp() {
*** 89,133 **** e.throwAsEcmaException(Global.instance()); throw new AssertionError(); //guard against null warnings below } this.setLastIndex(0); ! this.input = regExp.getInput(); this.global = regExp.isGlobal(); this.ignoreCase = regExp.isIgnoreCase(); this.multiline = regExp.isMultiline(); ! this.pattern = regExp.getPattern(); this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead(); init(); } NativeRegExp(final String string) { this(string, ""); } NativeRegExp(final NativeRegExp regExp) { ! this.input = regExp.getInput(); this.global = regExp.getGlobal(); this.multiline = regExp.getMultiline(); this.ignoreCase = regExp.getIgnoreCase(); this.lastIndex = regExp.getLastIndexObject(); ! this.pattern = regExp.getPattern(); this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead(); init(); } ! NativeRegExp(final Pattern pattern) { this.input = pattern.pattern(); this.multiline = (pattern.flags() & Pattern.MULTILINE) != 0; this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0; this.lastIndex = 0; this.pattern = pattern; init(); ! } @Override public String getClassName() { return "RegExp"; } --- 98,142 ---- e.throwAsEcmaException(Global.instance()); throw new AssertionError(); //guard against null warnings below } this.setLastIndex(0); ! this.source = regExp.getSource(); this.global = regExp.isGlobal(); this.ignoreCase = regExp.isIgnoreCase(); this.multiline = regExp.isMultiline(); ! this.regex = regExp.getRegex(); this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead(); init(); } NativeRegExp(final String string) { this(string, ""); } NativeRegExp(final NativeRegExp regExp) { ! this.source = regExp.getSource(); this.global = regExp.getGlobal(); this.multiline = regExp.getMultiline(); this.ignoreCase = regExp.getIgnoreCase(); this.lastIndex = regExp.getLastIndexObject(); ! this.regex = regExp.getRegex(); this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead(); init(); } ! /* NativeRegExp(final Pattern pattern) { this.input = pattern.pattern(); this.multiline = (pattern.flags() & Pattern.MULTILINE) != 0; this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0; this.lastIndex = 0; this.pattern = pattern; init(); ! } */ @Override public String getClassName() { return "RegExp"; }
*** 227,236 **** --- 236,271 ---- } return new NativeRegExp(patternString, flagString); } + public static NativeRegExp flatRegExp(String string) { + // escape special characters + StringBuilder sb = new StringBuilder(string.length()); + for (int i = 0; i < string.length(); i++) { + final char c = string.charAt(i); + switch (c) { + case '^': + case '$': + case '\\': + case '.': + case '*': + case '+': + case '?': + case '(': + case ')': + case '[': + case '{': + case '|': + sb.append('\\'); + default: + sb.append(c); + } + } + return new NativeRegExp(sb.toString(), ""); + } + private String getFlagString() { final StringBuilder sb = new StringBuilder(); if (global) { sb.append('g');
*** 250,260 **** return "[RegExp " + toString() + "]"; } @Override public String toString() { ! return "/" + input + "/" + getFlagString(); } /** * Nashorn extension: RegExp.prototype.compile - everybody implements this! * --- 285,295 ---- return "[RegExp " + toString() + "]"; } @Override public String toString() { ! return "/" + source + "/" + getFlagString(); } /** * Nashorn extension: RegExp.prototype.compile - everybody implements this! *
*** 266,280 **** @Function(attributes = Attribute.NOT_ENUMERABLE) public static Object compile(final Object self, final Object pattern, final Object flags) { final NativeRegExp regExp = checkRegExp(self); final NativeRegExp compiled = newRegExp(pattern, flags); // copy over fields to 'self' ! regExp.setInput(compiled.getInput()); regExp.setGlobal(compiled.getGlobal()); regExp.setIgnoreCase(compiled.getIgnoreCase()); regExp.setMultiline(compiled.getMultiline()); ! regExp.setPattern(compiled.getPattern()); regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead()); // Some implementations return undefined. Some return 'self'. Since return // value is most likely be ignored, we can play safe and return 'self'. return regExp; --- 301,315 ---- @Function(attributes = Attribute.NOT_ENUMERABLE) public static Object compile(final Object self, final Object pattern, final Object flags) { final NativeRegExp regExp = checkRegExp(self); final NativeRegExp compiled = newRegExp(pattern, flags); // copy over fields to 'self' ! regExp.setSource(compiled.getSource()); regExp.setGlobal(compiled.getGlobal()); regExp.setIgnoreCase(compiled.getIgnoreCase()); regExp.setMultiline(compiled.getMultiline()); ! regExp.setRegex(compiled.getRegex()); regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead()); // Some implementations return undefined. Some return 'self'. Since return // value is most likely be ignored, we can play safe and return 'self'. return regExp;
*** 321,331 **** * @param self self reference * @return the input string for the regexp */ @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE) public static Object source(final Object self) { ! return checkRegExp(self).input; } /** * ECMA 15.10.7.2 global * --- 356,366 ---- * @param self self reference * @return the input string for the regexp */ @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE) public static Object source(final Object self) { ! return checkRegExp(self).source; } /** * ECMA 15.10.7.2 global *
*** 358,415 **** public static Object multiline(final Object self) { return checkRegExp(self).multiline; } private RegExpMatch execInner(final String string) { ! if (this.pattern == null) { return null; // never matches or similar, e.g. a[] } ! final Matcher matcher = pattern.matcher(string); final int start = this.global ? getLastIndex() : 0; if (start < 0 || start > string.length()) { setLastIndex(0); return null; } ! if (!matcher.find(start)) { setLastIndex(0); return null; } if (global) { ! setLastIndex(matcher.end()); } ! return new RegExpMatch(string, matcher.start(), groups(matcher)); } /** * Convert java.util.regex.Matcher groups to JavaScript groups. * That is, replace null and groups that didn't match with undefined. */ private Object[] groups(final Matcher matcher) { ! final int groupCount = matcher.groupCount(); final Object[] groups = new Object[groupCount + 1]; ! for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) { ! final int groupStart = matcher.start(i); if (lastGroupStart > groupStart || (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) { // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated. // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere // in the pattern always return undefined because the negative lookahead must fail. groups[i] = UNDEFINED; continue; } ! final String group = matcher.group(i); ! groups[i] = group == null ? UNDEFINED : group; lastGroupStart = groupStart; } return groups; } /** * Executes a search for a match within a string based on a regular * expression. It returns an array of information or null if no match is * found. * --- 393,463 ---- public static Object multiline(final Object self) { return checkRegExp(self).multiline; } private RegExpMatch execInner(final String string) { ! if (this.regex == null) { return null; // never matches or similar, e.g. a[] } ! setInput(string); ! ! final Matcher matcher = regex.matcher(inputChars); final int start = this.global ? getLastIndex() : 0; if (start < 0 || start > string.length()) { setLastIndex(0); return null; } ! if (matcher.search(start, inputChars.length, Option.NONE) == -1) { setLastIndex(0); return null; } if (global) { ! setLastIndex(matcher.getEnd()); } ! return new RegExpMatch(string, matcher.getBegin(), groups(matcher)); } /** * Convert java.util.regex.Matcher groups to JavaScript groups. * That is, replace null and groups that didn't match with undefined. */ private Object[] groups(final Matcher matcher) { ! Region region = matcher.getRegion(); ! final int groupCount = region == null ? 0 : region.numRegs - 1; final Object[] groups = new Object[groupCount + 1]; ! groups[0] = input.substring(matcher.getBegin(), matcher.getEnd()); ! ! for (int i = 1, lastGroupStart = matcher.getBegin(); i <= groupCount; i++) { ! final int groupStart = region.beg[i]; if (lastGroupStart > groupStart || (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) { // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated. // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere // in the pattern always return undefined because the negative lookahead must fail. groups[i] = UNDEFINED; continue; } ! final int begin = region.beg[i]; ! final int end = region.end[i]; ! groups[i] = begin == -1 ? UNDEFINED : input.substring(begin, end); lastGroupStart = groupStart; } return groups; } + private void setInput(String input) { + if (!input.equals(this.input)) { + this.input = input; + this.inputChars = input.toCharArray(); + } + } + /** * Executes a search for a match within a string based on a regular * expression. It returns an array of information or null if no match is * found. *
*** 446,456 **** * @param string String to match. * @param replacement Replacement string. * @return String with substitutions. */ Object replace(final String string, final String replacement, final ScriptFunction function) { ! final Matcher matcher = pattern.matcher(string); /* * $$ -> $ * $& -> the matched substring * $` -> the portion of string that preceeds matched substring * $' -> the portion of string that follows the matched substring --- 494,505 ---- * @param string String to match. * @param replacement Replacement string. * @return String with substitutions. */ Object replace(final String string, final String replacement, final ScriptFunction function) { ! setInput(string); ! final Matcher matcher = regex.matcher(inputChars); /* * $$ -> $ * $& -> the matched substring * $` -> the portion of string that preceeds matched substring * $' -> the portion of string that follows the matched substring
*** 458,514 **** * $nn -> the nnth capture, where nn is a two digit decimal number [01-99]. */ String replace = replacement; if (!global) { ! if (!matcher.find()) { return string; } final StringBuilder sb = new StringBuilder(); if (function != null) { replace = callReplaceValue(function, matcher, string); } appendReplacement(matcher, string, replace, sb, 0); ! sb.append(string, matcher.end(), string.length()); return sb.toString(); } int end = 0; // a.k.a. lastAppendPosition setLastIndex(0); ! boolean found; try { ! found = matcher.find(end); } catch (final IndexOutOfBoundsException e) { ! found = false; } ! if (!found) { return string; } int previousLastIndex = 0; final StringBuilder sb = new StringBuilder(); do { if (function != null) { replace = callReplaceValue(function, matcher, string); } ! appendReplacement(matcher, string, replace, sb, end); ! end = matcher.end(); // ECMA 15.5.4.10 String.prototype.match(regexp) ! final int thisIndex = end; if (thisIndex == previousLastIndex) { setLastIndex(thisIndex + 1); previousLastIndex = thisIndex + 1; } else { previousLastIndex = thisIndex; } ! } while (matcher.find()); ! sb.append(string, end, string.length()); return sb.toString(); } private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) { --- 507,565 ---- * $nn -> the nnth capture, where nn is a two digit decimal number [01-99]. */ String replace = replacement; if (!global) { ! if (matcher.search(0, inputChars.length, Option.NONE) == -1) { return string; } final StringBuilder sb = new StringBuilder(); if (function != null) { replace = callReplaceValue(function, matcher, string); } appendReplacement(matcher, string, replace, sb, 0); ! sb.append(string, matcher.getEnd(), string.length()); return sb.toString(); } int end = 0; // a.k.a. lastAppendPosition setLastIndex(0); ! int found; try { ! found = matcher.search(end, inputChars.length, Option.NONE); } catch (final IndexOutOfBoundsException e) { ! found = -1; } ! if (found == -1) { return string; } + int thisIndex = 0; int previousLastIndex = 0; final StringBuilder sb = new StringBuilder(); do { if (function != null) { replace = callReplaceValue(function, matcher, string); } ! appendReplacement(matcher, string, replace, sb, thisIndex); ! end = matcher.getEnd(); // ECMA 15.5.4.10 String.prototype.match(regexp) ! thisIndex = end; if (thisIndex == previousLastIndex) { setLastIndex(thisIndex + 1); previousLastIndex = thisIndex + 1; + end++; } else { previousLastIndex = thisIndex; } ! } while (matcher.search(end, inputChars.length, Option.NONE) > -1); ! sb.append(string, thisIndex, string.length()); return sb.toString(); } private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
*** 522,541 **** if (nextChar == '$') { // Skip past $ cursor++; nextChar = replacement.charAt(cursor); final int firstDigit = nextChar - '0'; ! if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) { // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit. int refNum = firstDigit; cursor++; ! if (cursor < replacement.length() && firstDigit < matcher.groupCount()) { final int secondDigit = replacement.charAt(cursor) - '0'; if ((secondDigit >= 0) && (secondDigit <= 9)) { final int newRefNum = (firstDigit * 10) + secondDigit; ! if (newRefNum <= matcher.groupCount() && newRefNum > 0) { // $nn ($01-$99) refNum = newRefNum; cursor++; } } --- 573,594 ---- if (nextChar == '$') { // Skip past $ cursor++; nextChar = replacement.charAt(cursor); final int firstDigit = nextChar - '0'; + Region region = matcher.getRegion(); + int groupCount = region == null ? 0 : region.numRegs - 1; ! if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= groupCount) { // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit. int refNum = firstDigit; cursor++; ! if (cursor < replacement.length() && firstDigit < groupCount) { final int secondDigit = replacement.charAt(cursor) - '0'; if ((secondDigit >= 0) && (secondDigit <= 9)) { final int newRefNum = (firstDigit * 10) + secondDigit; ! if (newRefNum <= groupCount && newRefNum > 0) { // $nn ($01-$99) refNum = newRefNum; cursor++; } }
*** 554,570 **** } } else if (nextChar == '$') { result.append('$'); cursor++; } else if (nextChar == '&') { ! result.append(matcher.group()); cursor++; } else if (nextChar == '`') { ! result.append(text.substring(0, matcher.start())); cursor++; } else if (nextChar == '\'') { ! result.append(text.substring(matcher.end())); cursor++; } else { // unknown substitution or $n with n>m. skip. result.append('$'); } --- 607,623 ---- } } else if (nextChar == '$') { result.append('$'); cursor++; } else if (nextChar == '&') { ! result.append(text, matcher.getBegin(), matcher.getEnd()); cursor++; } else if (nextChar == '`') { ! result.append(text, 0, matcher.getBegin()); cursor++; } else if (nextChar == '\'') { ! result.append(text, matcher.getEnd(), text.length()); cursor++; } else { // unknown substitution or $n with n>m. skip. result.append('$'); }
*** 572,591 **** result.append(nextChar); cursor++; } } // Append the intervening text ! sb.append(text, lastAppendPosition, matcher.start()); // Append the match substitution sb.append(result); } private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) { final Object[] groups = groups(matcher); final Object[] args = Arrays.copyOf(groups, groups.length + 2); ! args[groups.length] = matcher.start(); args[groups.length + 1] = string; final Object self = function.isStrict() ? UNDEFINED : Global.instance(); return JSType.toString(ScriptRuntime.apply(function, self, args)); --- 625,644 ---- result.append(nextChar); cursor++; } } // Append the intervening text ! sb.append(text, lastAppendPosition, matcher.getBegin()); // Append the match substitution sb.append(result); } private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) { final Object[] groups = groups(matcher); final Object[] args = Arrays.copyOf(groups, groups.length + 2); ! args[groups.length] = matcher.getBegin(); args[groups.length + 1] = string; final Object self = function.isStrict() ? UNDEFINED : Global.instance(); return JSType.toString(ScriptRuntime.apply(function, self, args));
*** 663,683 **** * * @param string String to match. * @return Index of match. */ Object search(final String string) { ! final Matcher matcher = pattern.matcher(string); int start = 0; if (global) { start = getLastIndex(); } ! start = matcher.find(start) ? matcher.start() : -1; if (global) { ! setLastIndex(matcher.end()); } return start; } --- 716,738 ---- * * @param string String to match. * @return Index of match. */ Object search(final String string) { ! setInput(string); ! ! final Matcher matcher = regex.matcher(inputChars); int start = 0; if (global) { start = getLastIndex(); } ! start = matcher.search(start, inputChars.length, Option.NONE); if (global) { ! setLastIndex(matcher.getEnd()); } return start; }
*** 719,734 **** typeError(Global.instance(), "not.a.regexp", ScriptRuntime.safeToString(self)); return null; } } ! private String getInput() { ! return input; } ! private void setInput(final String input) { ! this.input = input; } boolean getGlobal() { return global; } --- 774,789 ---- typeError(Global.instance(), "not.a.regexp", ScriptRuntime.safeToString(self)); return null; } } ! private String getSource() { ! return source; } ! private void setSource(final String source) { ! this.source = source; } boolean getGlobal() { return global; }
*** 751,766 **** private void setMultiline(final boolean multiline) { this.multiline = multiline; } ! private Pattern getPattern() { ! return pattern; } ! private void setPattern(final Pattern pattern) { ! this.pattern = pattern; } private BitVector getGroupsInNegativeLookahead() { return groupsInNegativeLookahead; } --- 806,821 ---- private void setMultiline(final boolean multiline) { this.multiline = multiline; } ! private Regex getRegex() { ! return regex; } ! private void setRegex(final Regex regex) { ! this.regex = regex; } private BitVector getGroupsInNegativeLookahead() { return groupsInNegativeLookahead; }