src/jdk/nashorn/internal/objects/NativeRegExp.java
Print this page
*** 29,40 ****
import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
import jdk.nashorn.internal.objects.annotations.Attribute;
import jdk.nashorn.internal.objects.annotations.Constructor;
import jdk.nashorn.internal.objects.annotations.Function;
import jdk.nashorn.internal.objects.annotations.Getter;
import jdk.nashorn.internal.objects.annotations.Property;
--- 29,38 ----
*** 47,79 ****
import jdk.nashorn.internal.runtime.RegExpMatch;
import jdk.nashorn.internal.runtime.ScriptFunction;
import jdk.nashorn.internal.runtime.ScriptObject;
import jdk.nashorn.internal.runtime.ScriptRuntime;
/**
* ECMA 15.10 RegExp Objects.
*/
@ScriptClass("RegExp")
public final class NativeRegExp extends ScriptObject {
/** ECMA 15.10.7.5 lastIndex property */
@Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
public Object lastIndex;
/** Pattern string. */
private String input;
/** Global search flag for this regexp. */
private boolean global;
/** Case insensitive flag for this regexp */
private boolean ignoreCase;
/** Multi-line flag for this regexp */
private boolean multiline;
! /** Java regex pattern to use for match. We compile to one of these */
! private Pattern pattern;
private BitVector groupsInNegativeLookahead;
/*
public NativeRegExp() {
--- 45,88 ----
import jdk.nashorn.internal.runtime.RegExpMatch;
import jdk.nashorn.internal.runtime.ScriptFunction;
import jdk.nashorn.internal.runtime.ScriptObject;
import jdk.nashorn.internal.runtime.ScriptRuntime;
+ import jdk.nashorn.internal.joni.Matcher;
+ import jdk.nashorn.internal.joni.Option;
+ import jdk.nashorn.internal.joni.Regex;
+ import jdk.nashorn.internal.joni.Region;
+
/**
* ECMA 15.10 RegExp Objects.
*/
@ScriptClass("RegExp")
public final class NativeRegExp extends ScriptObject {
/** ECMA 15.10.7.5 lastIndex property */
@Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
public Object lastIndex;
/** Pattern string. */
+ private String source;
+
+ /** Input string. */
private String input;
+ /** Input string as char array */
+ private char[] inputChars;
+
/** Global search flag for this regexp. */
private boolean global;
/** Case insensitive flag for this regexp */
private boolean ignoreCase;
/** Multi-line flag for this regexp */
private boolean multiline;
! /** Joni regex pattern to use for match. We compile to one of these */
! private Regex regex;
private BitVector groupsInNegativeLookahead;
/*
public NativeRegExp() {
*** 89,133 ****
e.throwAsEcmaException(Global.instance());
throw new AssertionError(); //guard against null warnings below
}
this.setLastIndex(0);
! this.input = regExp.getInput();
this.global = regExp.isGlobal();
this.ignoreCase = regExp.isIgnoreCase();
this.multiline = regExp.isMultiline();
! this.pattern = regExp.getPattern();
this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
init();
}
NativeRegExp(final String string) {
this(string, "");
}
NativeRegExp(final NativeRegExp regExp) {
! this.input = regExp.getInput();
this.global = regExp.getGlobal();
this.multiline = regExp.getMultiline();
this.ignoreCase = regExp.getIgnoreCase();
this.lastIndex = regExp.getLastIndexObject();
! this.pattern = regExp.getPattern();
this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
init();
}
! NativeRegExp(final Pattern pattern) {
this.input = pattern.pattern();
this.multiline = (pattern.flags() & Pattern.MULTILINE) != 0;
this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
this.lastIndex = 0;
this.pattern = pattern;
init();
! }
@Override
public String getClassName() {
return "RegExp";
}
--- 98,142 ----
e.throwAsEcmaException(Global.instance());
throw new AssertionError(); //guard against null warnings below
}
this.setLastIndex(0);
! this.source = regExp.getSource();
this.global = regExp.isGlobal();
this.ignoreCase = regExp.isIgnoreCase();
this.multiline = regExp.isMultiline();
! this.regex = regExp.getRegex();
this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
init();
}
NativeRegExp(final String string) {
this(string, "");
}
NativeRegExp(final NativeRegExp regExp) {
! this.source = regExp.getSource();
this.global = regExp.getGlobal();
this.multiline = regExp.getMultiline();
this.ignoreCase = regExp.getIgnoreCase();
this.lastIndex = regExp.getLastIndexObject();
! this.regex = regExp.getRegex();
this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
init();
}
! /* NativeRegExp(final Pattern pattern) {
this.input = pattern.pattern();
this.multiline = (pattern.flags() & Pattern.MULTILINE) != 0;
this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
this.lastIndex = 0;
this.pattern = pattern;
init();
! } */
@Override
public String getClassName() {
return "RegExp";
}
*** 227,236 ****
--- 236,271 ----
}
return new NativeRegExp(patternString, flagString);
}
+ public static NativeRegExp flatRegExp(String string) {
+ // escape special characters
+ StringBuilder sb = new StringBuilder(string.length());
+ for (int i = 0; i < string.length(); i++) {
+ final char c = string.charAt(i);
+ switch (c) {
+ case '^':
+ case '$':
+ case '\\':
+ case '.':
+ case '*':
+ case '+':
+ case '?':
+ case '(':
+ case ')':
+ case '[':
+ case '{':
+ case '|':
+ sb.append('\\');
+ default:
+ sb.append(c);
+ }
+ }
+ return new NativeRegExp(sb.toString(), "");
+ }
+
private String getFlagString() {
final StringBuilder sb = new StringBuilder();
if (global) {
sb.append('g');
*** 250,260 ****
return "[RegExp " + toString() + "]";
}
@Override
public String toString() {
! return "/" + input + "/" + getFlagString();
}
/**
* Nashorn extension: RegExp.prototype.compile - everybody implements this!
*
--- 285,295 ----
return "[RegExp " + toString() + "]";
}
@Override
public String toString() {
! return "/" + source + "/" + getFlagString();
}
/**
* Nashorn extension: RegExp.prototype.compile - everybody implements this!
*
*** 266,280 ****
@Function(attributes = Attribute.NOT_ENUMERABLE)
public static Object compile(final Object self, final Object pattern, final Object flags) {
final NativeRegExp regExp = checkRegExp(self);
final NativeRegExp compiled = newRegExp(pattern, flags);
// copy over fields to 'self'
! regExp.setInput(compiled.getInput());
regExp.setGlobal(compiled.getGlobal());
regExp.setIgnoreCase(compiled.getIgnoreCase());
regExp.setMultiline(compiled.getMultiline());
! regExp.setPattern(compiled.getPattern());
regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
// Some implementations return undefined. Some return 'self'. Since return
// value is most likely be ignored, we can play safe and return 'self'.
return regExp;
--- 301,315 ----
@Function(attributes = Attribute.NOT_ENUMERABLE)
public static Object compile(final Object self, final Object pattern, final Object flags) {
final NativeRegExp regExp = checkRegExp(self);
final NativeRegExp compiled = newRegExp(pattern, flags);
// copy over fields to 'self'
! regExp.setSource(compiled.getSource());
regExp.setGlobal(compiled.getGlobal());
regExp.setIgnoreCase(compiled.getIgnoreCase());
regExp.setMultiline(compiled.getMultiline());
! regExp.setRegex(compiled.getRegex());
regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
// Some implementations return undefined. Some return 'self'. Since return
// value is most likely be ignored, we can play safe and return 'self'.
return regExp;
*** 321,331 ****
* @param self self reference
* @return the input string for the regexp
*/
@Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
public static Object source(final Object self) {
! return checkRegExp(self).input;
}
/**
* ECMA 15.10.7.2 global
*
--- 356,366 ----
* @param self self reference
* @return the input string for the regexp
*/
@Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
public static Object source(final Object self) {
! return checkRegExp(self).source;
}
/**
* ECMA 15.10.7.2 global
*
*** 358,415 ****
public static Object multiline(final Object self) {
return checkRegExp(self).multiline;
}
private RegExpMatch execInner(final String string) {
! if (this.pattern == null) {
return null; // never matches or similar, e.g. a[]
}
! final Matcher matcher = pattern.matcher(string);
final int start = this.global ? getLastIndex() : 0;
if (start < 0 || start > string.length()) {
setLastIndex(0);
return null;
}
! if (!matcher.find(start)) {
setLastIndex(0);
return null;
}
if (global) {
! setLastIndex(matcher.end());
}
! return new RegExpMatch(string, matcher.start(), groups(matcher));
}
/**
* Convert java.util.regex.Matcher groups to JavaScript groups.
* That is, replace null and groups that didn't match with undefined.
*/
private Object[] groups(final Matcher matcher) {
! final int groupCount = matcher.groupCount();
final Object[] groups = new Object[groupCount + 1];
! for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
! final int groupStart = matcher.start(i);
if (lastGroupStart > groupStart
|| (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) {
// (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
// (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
// in the pattern always return undefined because the negative lookahead must fail.
groups[i] = UNDEFINED;
continue;
}
! final String group = matcher.group(i);
! groups[i] = group == null ? UNDEFINED : group;
lastGroupStart = groupStart;
}
return groups;
}
/**
* Executes a search for a match within a string based on a regular
* expression. It returns an array of information or null if no match is
* found.
*
--- 393,463 ----
public static Object multiline(final Object self) {
return checkRegExp(self).multiline;
}
private RegExpMatch execInner(final String string) {
! if (this.regex == null) {
return null; // never matches or similar, e.g. a[]
}
! setInput(string);
!
! final Matcher matcher = regex.matcher(inputChars);
final int start = this.global ? getLastIndex() : 0;
if (start < 0 || start > string.length()) {
setLastIndex(0);
return null;
}
! if (matcher.search(start, inputChars.length, Option.NONE) == -1) {
setLastIndex(0);
return null;
}
if (global) {
! setLastIndex(matcher.getEnd());
}
! return new RegExpMatch(string, matcher.getBegin(), groups(matcher));
}
/**
* Convert java.util.regex.Matcher groups to JavaScript groups.
* That is, replace null and groups that didn't match with undefined.
*/
private Object[] groups(final Matcher matcher) {
! Region region = matcher.getRegion();
! final int groupCount = region == null ? 0 : region.numRegs - 1;
final Object[] groups = new Object[groupCount + 1];
! groups[0] = input.substring(matcher.getBegin(), matcher.getEnd());
!
! for (int i = 1, lastGroupStart = matcher.getBegin(); i <= groupCount; i++) {
! final int groupStart = region.beg[i];
if (lastGroupStart > groupStart
|| (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) {
// (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
// (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
// in the pattern always return undefined because the negative lookahead must fail.
groups[i] = UNDEFINED;
continue;
}
! final int begin = region.beg[i];
! final int end = region.end[i];
! groups[i] = begin == -1 ? UNDEFINED : input.substring(begin, end);
lastGroupStart = groupStart;
}
return groups;
}
+ private void setInput(String input) {
+ if (!input.equals(this.input)) {
+ this.input = input;
+ this.inputChars = input.toCharArray();
+ }
+ }
+
/**
* Executes a search for a match within a string based on a regular
* expression. It returns an array of information or null if no match is
* found.
*
*** 446,456 ****
* @param string String to match.
* @param replacement Replacement string.
* @return String with substitutions.
*/
Object replace(final String string, final String replacement, final ScriptFunction function) {
! final Matcher matcher = pattern.matcher(string);
/*
* $$ -> $
* $& -> the matched substring
* $` -> the portion of string that preceeds matched substring
* $' -> the portion of string that follows the matched substring
--- 494,505 ----
* @param string String to match.
* @param replacement Replacement string.
* @return String with substitutions.
*/
Object replace(final String string, final String replacement, final ScriptFunction function) {
! setInput(string);
! final Matcher matcher = regex.matcher(inputChars);
/*
* $$ -> $
* $& -> the matched substring
* $` -> the portion of string that preceeds matched substring
* $' -> the portion of string that follows the matched substring
*** 458,514 ****
* $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
*/
String replace = replacement;
if (!global) {
! if (!matcher.find()) {
return string;
}
final StringBuilder sb = new StringBuilder();
if (function != null) {
replace = callReplaceValue(function, matcher, string);
}
appendReplacement(matcher, string, replace, sb, 0);
! sb.append(string, matcher.end(), string.length());
return sb.toString();
}
int end = 0; // a.k.a. lastAppendPosition
setLastIndex(0);
! boolean found;
try {
! found = matcher.find(end);
} catch (final IndexOutOfBoundsException e) {
! found = false;
}
! if (!found) {
return string;
}
int previousLastIndex = 0;
final StringBuilder sb = new StringBuilder();
do {
if (function != null) {
replace = callReplaceValue(function, matcher, string);
}
! appendReplacement(matcher, string, replace, sb, end);
! end = matcher.end();
// ECMA 15.5.4.10 String.prototype.match(regexp)
! final int thisIndex = end;
if (thisIndex == previousLastIndex) {
setLastIndex(thisIndex + 1);
previousLastIndex = thisIndex + 1;
} else {
previousLastIndex = thisIndex;
}
! } while (matcher.find());
! sb.append(string, end, string.length());
return sb.toString();
}
private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
--- 507,565 ----
* $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
*/
String replace = replacement;
if (!global) {
! if (matcher.search(0, inputChars.length, Option.NONE) == -1) {
return string;
}
final StringBuilder sb = new StringBuilder();
if (function != null) {
replace = callReplaceValue(function, matcher, string);
}
appendReplacement(matcher, string, replace, sb, 0);
! sb.append(string, matcher.getEnd(), string.length());
return sb.toString();
}
int end = 0; // a.k.a. lastAppendPosition
setLastIndex(0);
! int found;
try {
! found = matcher.search(end, inputChars.length, Option.NONE);
} catch (final IndexOutOfBoundsException e) {
! found = -1;
}
! if (found == -1) {
return string;
}
+ int thisIndex = 0;
int previousLastIndex = 0;
final StringBuilder sb = new StringBuilder();
do {
if (function != null) {
replace = callReplaceValue(function, matcher, string);
}
! appendReplacement(matcher, string, replace, sb, thisIndex);
! end = matcher.getEnd();
// ECMA 15.5.4.10 String.prototype.match(regexp)
! thisIndex = end;
if (thisIndex == previousLastIndex) {
setLastIndex(thisIndex + 1);
previousLastIndex = thisIndex + 1;
+ end++;
} else {
previousLastIndex = thisIndex;
}
! } while (matcher.search(end, inputChars.length, Option.NONE) > -1);
! sb.append(string, thisIndex, string.length());
return sb.toString();
}
private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
*** 522,541 ****
if (nextChar == '$') {
// Skip past $
cursor++;
nextChar = replacement.charAt(cursor);
final int firstDigit = nextChar - '0';
! if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) {
// $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
int refNum = firstDigit;
cursor++;
! if (cursor < replacement.length() && firstDigit < matcher.groupCount()) {
final int secondDigit = replacement.charAt(cursor) - '0';
if ((secondDigit >= 0) && (secondDigit <= 9)) {
final int newRefNum = (firstDigit * 10) + secondDigit;
! if (newRefNum <= matcher.groupCount() && newRefNum > 0) {
// $nn ($01-$99)
refNum = newRefNum;
cursor++;
}
}
--- 573,594 ----
if (nextChar == '$') {
// Skip past $
cursor++;
nextChar = replacement.charAt(cursor);
final int firstDigit = nextChar - '0';
+ Region region = matcher.getRegion();
+ int groupCount = region == null ? 0 : region.numRegs - 1;
! if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= groupCount) {
// $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
int refNum = firstDigit;
cursor++;
! if (cursor < replacement.length() && firstDigit < groupCount) {
final int secondDigit = replacement.charAt(cursor) - '0';
if ((secondDigit >= 0) && (secondDigit <= 9)) {
final int newRefNum = (firstDigit * 10) + secondDigit;
! if (newRefNum <= groupCount && newRefNum > 0) {
// $nn ($01-$99)
refNum = newRefNum;
cursor++;
}
}
*** 554,570 ****
}
} else if (nextChar == '$') {
result.append('$');
cursor++;
} else if (nextChar == '&') {
! result.append(matcher.group());
cursor++;
} else if (nextChar == '`') {
! result.append(text.substring(0, matcher.start()));
cursor++;
} else if (nextChar == '\'') {
! result.append(text.substring(matcher.end()));
cursor++;
} else {
// unknown substitution or $n with n>m. skip.
result.append('$');
}
--- 607,623 ----
}
} else if (nextChar == '$') {
result.append('$');
cursor++;
} else if (nextChar == '&') {
! result.append(text, matcher.getBegin(), matcher.getEnd());
cursor++;
} else if (nextChar == '`') {
! result.append(text, 0, matcher.getBegin());
cursor++;
} else if (nextChar == '\'') {
! result.append(text, matcher.getEnd(), text.length());
cursor++;
} else {
// unknown substitution or $n with n>m. skip.
result.append('$');
}
*** 572,591 ****
result.append(nextChar);
cursor++;
}
}
// Append the intervening text
! sb.append(text, lastAppendPosition, matcher.start());
// Append the match substitution
sb.append(result);
}
private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
final Object[] groups = groups(matcher);
final Object[] args = Arrays.copyOf(groups, groups.length + 2);
! args[groups.length] = matcher.start();
args[groups.length + 1] = string;
final Object self = function.isStrict() ? UNDEFINED : Global.instance();
return JSType.toString(ScriptRuntime.apply(function, self, args));
--- 625,644 ----
result.append(nextChar);
cursor++;
}
}
// Append the intervening text
! sb.append(text, lastAppendPosition, matcher.getBegin());
// Append the match substitution
sb.append(result);
}
private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
final Object[] groups = groups(matcher);
final Object[] args = Arrays.copyOf(groups, groups.length + 2);
! args[groups.length] = matcher.getBegin();
args[groups.length + 1] = string;
final Object self = function.isStrict() ? UNDEFINED : Global.instance();
return JSType.toString(ScriptRuntime.apply(function, self, args));
*** 663,683 ****
*
* @param string String to match.
* @return Index of match.
*/
Object search(final String string) {
! final Matcher matcher = pattern.matcher(string);
int start = 0;
if (global) {
start = getLastIndex();
}
! start = matcher.find(start) ? matcher.start() : -1;
if (global) {
! setLastIndex(matcher.end());
}
return start;
}
--- 716,738 ----
*
* @param string String to match.
* @return Index of match.
*/
Object search(final String string) {
! setInput(string);
!
! final Matcher matcher = regex.matcher(inputChars);
int start = 0;
if (global) {
start = getLastIndex();
}
! start = matcher.search(start, inputChars.length, Option.NONE);
if (global) {
! setLastIndex(matcher.getEnd());
}
return start;
}
*** 719,734 ****
typeError(Global.instance(), "not.a.regexp", ScriptRuntime.safeToString(self));
return null;
}
}
! private String getInput() {
! return input;
}
! private void setInput(final String input) {
! this.input = input;
}
boolean getGlobal() {
return global;
}
--- 774,789 ----
typeError(Global.instance(), "not.a.regexp", ScriptRuntime.safeToString(self));
return null;
}
}
! private String getSource() {
! return source;
}
! private void setSource(final String source) {
! this.source = source;
}
boolean getGlobal() {
return global;
}
*** 751,766 ****
private void setMultiline(final boolean multiline) {
this.multiline = multiline;
}
! private Pattern getPattern() {
! return pattern;
}
! private void setPattern(final Pattern pattern) {
! this.pattern = pattern;
}
private BitVector getGroupsInNegativeLookahead() {
return groupsInNegativeLookahead;
}
--- 806,821 ----
private void setMultiline(final boolean multiline) {
this.multiline = multiline;
}
! private Regex getRegex() {
! return regex;
}
! private void setRegex(final Regex regex) {
! this.regex = regex;
}
private BitVector getGroupsInNegativeLookahead() {
return groupsInNegativeLookahead;
}