src/jdk/nashorn/internal/objects/NativeRegExp.java
Print this page
@@ -29,12 +29,10 @@
import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import jdk.nashorn.internal.objects.annotations.Attribute;
import jdk.nashorn.internal.objects.annotations.Constructor;
import jdk.nashorn.internal.objects.annotations.Function;
import jdk.nashorn.internal.objects.annotations.Getter;
import jdk.nashorn.internal.objects.annotations.Property;
@@ -47,33 +45,44 @@
import jdk.nashorn.internal.runtime.RegExpMatch;
import jdk.nashorn.internal.runtime.ScriptFunction;
import jdk.nashorn.internal.runtime.ScriptObject;
import jdk.nashorn.internal.runtime.ScriptRuntime;
+import jdk.nashorn.internal.joni.Matcher;
+import jdk.nashorn.internal.joni.Option;
+import jdk.nashorn.internal.joni.Regex;
+import jdk.nashorn.internal.joni.Region;
+
/**
* ECMA 15.10 RegExp Objects.
*/
@ScriptClass("RegExp")
public final class NativeRegExp extends ScriptObject {
/** ECMA 15.10.7.5 lastIndex property */
@Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
public Object lastIndex;
/** Pattern string. */
+ private String source;
+
+ /** Input string. */
private String input;
+ /** Input string as char array */
+ private char[] inputChars;
+
/** Global search flag for this regexp. */
private boolean global;
/** Case insensitive flag for this regexp */
private boolean ignoreCase;
/** Multi-line flag for this regexp */
private boolean multiline;
- /** Java regex pattern to use for match. We compile to one of these */
- private Pattern pattern;
+ /** Joni regex pattern to use for match. We compile to one of these */
+ private Regex regex;
private BitVector groupsInNegativeLookahead;
/*
public NativeRegExp() {
@@ -89,45 +98,45 @@
e.throwAsEcmaException(Global.instance());
throw new AssertionError(); //guard against null warnings below
}
this.setLastIndex(0);
- this.input = regExp.getInput();
+ this.source = regExp.getSource();
this.global = regExp.isGlobal();
this.ignoreCase = regExp.isIgnoreCase();
this.multiline = regExp.isMultiline();
- this.pattern = regExp.getPattern();
+ this.regex = regExp.getRegex();
this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
init();
}
NativeRegExp(final String string) {
this(string, "");
}
NativeRegExp(final NativeRegExp regExp) {
- this.input = regExp.getInput();
+ this.source = regExp.getSource();
this.global = regExp.getGlobal();
this.multiline = regExp.getMultiline();
this.ignoreCase = regExp.getIgnoreCase();
this.lastIndex = regExp.getLastIndexObject();
- this.pattern = regExp.getPattern();
+ this.regex = regExp.getRegex();
this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
init();
}
- NativeRegExp(final Pattern pattern) {
+ /* NativeRegExp(final Pattern pattern) {
this.input = pattern.pattern();
this.multiline = (pattern.flags() & Pattern.MULTILINE) != 0;
this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
this.lastIndex = 0;
this.pattern = pattern;
init();
- }
+ } */
@Override
public String getClassName() {
return "RegExp";
}
@@ -227,10 +236,36 @@
}
return new NativeRegExp(patternString, flagString);
}
+ public static NativeRegExp flatRegExp(String string) {
+ // escape special characters
+ StringBuilder sb = new StringBuilder(string.length());
+ for (int i = 0; i < string.length(); i++) {
+ final char c = string.charAt(i);
+ switch (c) {
+ case '^':
+ case '$':
+ case '\\':
+ case '.':
+ case '*':
+ case '+':
+ case '?':
+ case '(':
+ case ')':
+ case '[':
+ case '{':
+ case '|':
+ sb.append('\\');
+ default:
+ sb.append(c);
+ }
+ }
+ return new NativeRegExp(sb.toString(), "");
+ }
+
private String getFlagString() {
final StringBuilder sb = new StringBuilder();
if (global) {
sb.append('g');
@@ -250,11 +285,11 @@
return "[RegExp " + toString() + "]";
}
@Override
public String toString() {
- return "/" + input + "/" + getFlagString();
+ return "/" + source + "/" + getFlagString();
}
/**
* Nashorn extension: RegExp.prototype.compile - everybody implements this!
*
@@ -266,15 +301,15 @@
@Function(attributes = Attribute.NOT_ENUMERABLE)
public static Object compile(final Object self, final Object pattern, final Object flags) {
final NativeRegExp regExp = checkRegExp(self);
final NativeRegExp compiled = newRegExp(pattern, flags);
// copy over fields to 'self'
- regExp.setInput(compiled.getInput());
+ regExp.setSource(compiled.getSource());
regExp.setGlobal(compiled.getGlobal());
regExp.setIgnoreCase(compiled.getIgnoreCase());
regExp.setMultiline(compiled.getMultiline());
- regExp.setPattern(compiled.getPattern());
+ regExp.setRegex(compiled.getRegex());
regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
// Some implementations return undefined. Some return 'self'. Since return
// value is most likely be ignored, we can play safe and return 'self'.
return regExp;
@@ -321,11 +356,11 @@
* @param self self reference
* @return the input string for the regexp
*/
@Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
public static Object source(final Object self) {
- return checkRegExp(self).input;
+ return checkRegExp(self).source;
}
/**
* ECMA 15.10.7.2 global
*
@@ -358,58 +393,71 @@
public static Object multiline(final Object self) {
return checkRegExp(self).multiline;
}
private RegExpMatch execInner(final String string) {
- if (this.pattern == null) {
+ if (this.regex == null) {
return null; // never matches or similar, e.g. a[]
}
- final Matcher matcher = pattern.matcher(string);
+ setInput(string);
+
+ final Matcher matcher = regex.matcher(inputChars);
final int start = this.global ? getLastIndex() : 0;
if (start < 0 || start > string.length()) {
setLastIndex(0);
return null;
}
- if (!matcher.find(start)) {
+ if (matcher.search(start, inputChars.length, Option.NONE) == -1) {
setLastIndex(0);
return null;
}
if (global) {
- setLastIndex(matcher.end());
+ setLastIndex(matcher.getEnd());
}
- return new RegExpMatch(string, matcher.start(), groups(matcher));
+ return new RegExpMatch(string, matcher.getBegin(), groups(matcher));
}
/**
* Convert java.util.regex.Matcher groups to JavaScript groups.
* That is, replace null and groups that didn't match with undefined.
*/
private Object[] groups(final Matcher matcher) {
- final int groupCount = matcher.groupCount();
+ Region region = matcher.getRegion();
+ final int groupCount = region == null ? 0 : region.numRegs - 1;
final Object[] groups = new Object[groupCount + 1];
- for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
- final int groupStart = matcher.start(i);
+ groups[0] = input.substring(matcher.getBegin(), matcher.getEnd());
+
+ for (int i = 1, lastGroupStart = matcher.getBegin(); i <= groupCount; i++) {
+ final int groupStart = region.beg[i];
if (lastGroupStart > groupStart
|| (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) {
// (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
// (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
// in the pattern always return undefined because the negative lookahead must fail.
groups[i] = UNDEFINED;
continue;
}
- final String group = matcher.group(i);
- groups[i] = group == null ? UNDEFINED : group;
+ final int begin = region.beg[i];
+ final int end = region.end[i];
+ groups[i] = begin == -1 ? UNDEFINED : input.substring(begin, end);
lastGroupStart = groupStart;
}
return groups;
}
+ private void setInput(String input) {
+ if (!input.equals(this.input)) {
+ this.input = input;
+ this.inputChars = input.toCharArray();
+ }
+ }
+
/**
* Executes a search for a match within a string based on a regular
* expression. It returns an array of information or null if no match is
* found.
*
@@ -446,11 +494,12 @@
* @param string String to match.
* @param replacement Replacement string.
* @return String with substitutions.
*/
Object replace(final String string, final String replacement, final ScriptFunction function) {
- final Matcher matcher = pattern.matcher(string);
+ setInput(string);
+ final Matcher matcher = regex.matcher(inputChars);
/*
* $$ -> $
* $& -> the matched substring
* $` -> the portion of string that preceeds matched substring
* $' -> the portion of string that follows the matched substring
@@ -458,57 +507,59 @@
* $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
*/
String replace = replacement;
if (!global) {
- if (!matcher.find()) {
+ if (matcher.search(0, inputChars.length, Option.NONE) == -1) {
return string;
}
final StringBuilder sb = new StringBuilder();
if (function != null) {
replace = callReplaceValue(function, matcher, string);
}
appendReplacement(matcher, string, replace, sb, 0);
- sb.append(string, matcher.end(), string.length());
+ sb.append(string, matcher.getEnd(), string.length());
return sb.toString();
}
int end = 0; // a.k.a. lastAppendPosition
setLastIndex(0);
- boolean found;
+ int found;
try {
- found = matcher.find(end);
+ found = matcher.search(end, inputChars.length, Option.NONE);
} catch (final IndexOutOfBoundsException e) {
- found = false;
+ found = -1;
}
- if (!found) {
+ if (found == -1) {
return string;
}
+ int thisIndex = 0;
int previousLastIndex = 0;
final StringBuilder sb = new StringBuilder();
do {
if (function != null) {
replace = callReplaceValue(function, matcher, string);
}
- appendReplacement(matcher, string, replace, sb, end);
- end = matcher.end();
+ appendReplacement(matcher, string, replace, sb, thisIndex);
+ end = matcher.getEnd();
// ECMA 15.5.4.10 String.prototype.match(regexp)
- final int thisIndex = end;
+ thisIndex = end;
if (thisIndex == previousLastIndex) {
setLastIndex(thisIndex + 1);
previousLastIndex = thisIndex + 1;
+ end++;
} else {
previousLastIndex = thisIndex;
}
- } while (matcher.find());
+ } while (matcher.search(end, inputChars.length, Option.NONE) > -1);
- sb.append(string, end, string.length());
+ sb.append(string, thisIndex, string.length());
return sb.toString();
}
private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
@@ -522,20 +573,22 @@
if (nextChar == '$') {
// Skip past $
cursor++;
nextChar = replacement.charAt(cursor);
final int firstDigit = nextChar - '0';
+ Region region = matcher.getRegion();
+ int groupCount = region == null ? 0 : region.numRegs - 1;
- if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) {
+ if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= groupCount) {
// $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
int refNum = firstDigit;
cursor++;
- if (cursor < replacement.length() && firstDigit < matcher.groupCount()) {
+ if (cursor < replacement.length() && firstDigit < groupCount) {
final int secondDigit = replacement.charAt(cursor) - '0';
if ((secondDigit >= 0) && (secondDigit <= 9)) {
final int newRefNum = (firstDigit * 10) + secondDigit;
- if (newRefNum <= matcher.groupCount() && newRefNum > 0) {
+ if (newRefNum <= groupCount && newRefNum > 0) {
// $nn ($01-$99)
refNum = newRefNum;
cursor++;
}
}
@@ -554,17 +607,17 @@
}
} else if (nextChar == '$') {
result.append('$');
cursor++;
} else if (nextChar == '&') {
- result.append(matcher.group());
+ result.append(text, matcher.getBegin(), matcher.getEnd());
cursor++;
} else if (nextChar == '`') {
- result.append(text.substring(0, matcher.start()));
+ result.append(text, 0, matcher.getBegin());
cursor++;
} else if (nextChar == '\'') {
- result.append(text.substring(matcher.end()));
+ result.append(text, matcher.getEnd(), text.length());
cursor++;
} else {
// unknown substitution or $n with n>m. skip.
result.append('$');
}
@@ -572,20 +625,20 @@
result.append(nextChar);
cursor++;
}
}
// Append the intervening text
- sb.append(text, lastAppendPosition, matcher.start());
+ sb.append(text, lastAppendPosition, matcher.getBegin());
// Append the match substitution
sb.append(result);
}
private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
final Object[] groups = groups(matcher);
final Object[] args = Arrays.copyOf(groups, groups.length + 2);
- args[groups.length] = matcher.start();
+ args[groups.length] = matcher.getBegin();
args[groups.length + 1] = string;
final Object self = function.isStrict() ? UNDEFINED : Global.instance();
return JSType.toString(ScriptRuntime.apply(function, self, args));
@@ -663,21 +716,23 @@
*
* @param string String to match.
* @return Index of match.
*/
Object search(final String string) {
- final Matcher matcher = pattern.matcher(string);
+ setInput(string);
+
+ final Matcher matcher = regex.matcher(inputChars);
int start = 0;
if (global) {
start = getLastIndex();
}
- start = matcher.find(start) ? matcher.start() : -1;
+ start = matcher.search(start, inputChars.length, Option.NONE);
if (global) {
- setLastIndex(matcher.end());
+ setLastIndex(matcher.getEnd());
}
return start;
}
@@ -719,16 +774,16 @@
typeError(Global.instance(), "not.a.regexp", ScriptRuntime.safeToString(self));
return null;
}
}
- private String getInput() {
- return input;
+ private String getSource() {
+ return source;
}
- private void setInput(final String input) {
- this.input = input;
+ private void setSource(final String source) {
+ this.source = source;
}
boolean getGlobal() {
return global;
}
@@ -751,16 +806,16 @@
private void setMultiline(final boolean multiline) {
this.multiline = multiline;
}
- private Pattern getPattern() {
- return pattern;
+ private Regex getRegex() {
+ return regex;
}
- private void setPattern(final Pattern pattern) {
- this.pattern = pattern;
+ private void setRegex(final Regex regex) {
+ this.regex = regex;
}
private BitVector getGroupsInNegativeLookahead() {
return groupsInNegativeLookahead;
}