src/jdk/nashorn/internal/objects/NativeRegExp.java

Print this page

        

@@ -29,12 +29,10 @@
 import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import jdk.nashorn.internal.objects.annotations.Attribute;
 import jdk.nashorn.internal.objects.annotations.Constructor;
 import jdk.nashorn.internal.objects.annotations.Function;
 import jdk.nashorn.internal.objects.annotations.Getter;
 import jdk.nashorn.internal.objects.annotations.Property;

@@ -47,33 +45,44 @@
 import jdk.nashorn.internal.runtime.RegExpMatch;
 import jdk.nashorn.internal.runtime.ScriptFunction;
 import jdk.nashorn.internal.runtime.ScriptObject;
 import jdk.nashorn.internal.runtime.ScriptRuntime;
 
+import jdk.nashorn.internal.joni.Matcher;
+import jdk.nashorn.internal.joni.Option;
+import jdk.nashorn.internal.joni.Regex;
+import jdk.nashorn.internal.joni.Region;
+
 /**
  * ECMA 15.10 RegExp Objects.
  */
 @ScriptClass("RegExp")
 public final class NativeRegExp extends ScriptObject {
     /** ECMA 15.10.7.5 lastIndex property */
     @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
     public Object lastIndex;
 
     /** Pattern string. */
+    private String source;
+
+    /** Input string. */
     private String input;
 
+    /** Input string as char array */
+    private char[] inputChars;
+
     /** Global search flag for this regexp. */
     private boolean global;
 
     /** Case insensitive flag for this regexp */
     private boolean ignoreCase;
 
     /** Multi-line flag for this regexp */
     private boolean multiline;
 
-    /** Java regex pattern to use for match. We compile to one of these */
-    private Pattern pattern;
+    /** Joni regex pattern to use for match. We compile to one of these */
+    private Regex regex;
 
     private BitVector groupsInNegativeLookahead;
 
     /*
     public NativeRegExp() {

@@ -89,45 +98,45 @@
             e.throwAsEcmaException(Global.instance());
             throw new AssertionError(); //guard against null warnings below
         }
 
         this.setLastIndex(0);
-        this.input = regExp.getInput();
+        this.source = regExp.getSource();
         this.global = regExp.isGlobal();
         this.ignoreCase = regExp.isIgnoreCase();
         this.multiline = regExp.isMultiline();
-        this.pattern = regExp.getPattern();
+        this.regex = regExp.getRegex();
         this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
 
         init();
     }
 
     NativeRegExp(final String string) {
         this(string, "");
     }
 
     NativeRegExp(final NativeRegExp regExp) {
-        this.input      = regExp.getInput();
+        this.source     = regExp.getSource();
         this.global     = regExp.getGlobal();
         this.multiline  = regExp.getMultiline();
         this.ignoreCase = regExp.getIgnoreCase();
         this.lastIndex  = regExp.getLastIndexObject();
-        this.pattern    = regExp.getPattern();
+        this.regex      = regExp.getRegex();
         this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
 
         init();
     }
 
-    NativeRegExp(final Pattern pattern) {
+    /* NativeRegExp(final Pattern pattern) {
         this.input      = pattern.pattern();
         this.multiline  = (pattern.flags() & Pattern.MULTILINE) != 0;
         this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
         this.lastIndex  = 0;
         this.pattern    = pattern;
 
         init();
-    }
+    } */
 
     @Override
     public String getClassName() {
         return "RegExp";
     }

@@ -227,10 +236,36 @@
         }
 
         return new NativeRegExp(patternString, flagString);
     }
 
+    public static NativeRegExp flatRegExp(String string) {
+        // escape special characters
+        StringBuilder sb = new StringBuilder(string.length());
+        for (int i = 0; i < string.length(); i++) {
+            final char c = string.charAt(i);
+            switch (c) {
+                case '^':
+                case '$':
+                case '\\':
+                case '.':
+                case '*':
+                case '+':
+                case '?':
+                case '(':
+                case ')':
+                case '[':
+                case '{':
+                case '|':
+                    sb.append('\\');
+                default:
+                    sb.append(c);
+            }
+        }
+        return new NativeRegExp(sb.toString(), "");
+    }
+
     private String getFlagString() {
         final StringBuilder sb = new StringBuilder();
 
         if (global) {
             sb.append('g');

@@ -250,11 +285,11 @@
         return "[RegExp " + toString() + "]";
     }
 
     @Override
     public String toString() {
-        return "/" + input + "/" + getFlagString();
+        return "/" + source + "/" + getFlagString();
     }
 
     /**
      * Nashorn extension: RegExp.prototype.compile - everybody implements this!
      *

@@ -266,15 +301,15 @@
     @Function(attributes = Attribute.NOT_ENUMERABLE)
     public static Object compile(final Object self, final Object pattern, final Object flags) {
         final NativeRegExp regExp   = checkRegExp(self);
         final NativeRegExp compiled = newRegExp(pattern, flags);
         // copy over fields to 'self'
-        regExp.setInput(compiled.getInput());
+        regExp.setSource(compiled.getSource());
         regExp.setGlobal(compiled.getGlobal());
         regExp.setIgnoreCase(compiled.getIgnoreCase());
         regExp.setMultiline(compiled.getMultiline());
-        regExp.setPattern(compiled.getPattern());
+        regExp.setRegex(compiled.getRegex());
         regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
 
         // Some implementations return undefined. Some return 'self'. Since return
         // value is most likely be ignored, we can play safe and return 'self'.
         return regExp;

@@ -321,11 +356,11 @@
      * @param self self reference
      * @return the input string for the regexp
      */
     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
     public static Object source(final Object self) {
-        return checkRegExp(self).input;
+        return checkRegExp(self).source;
     }
 
     /**
      * ECMA 15.10.7.2 global
      *

@@ -358,58 +393,71 @@
     public static Object multiline(final Object self) {
         return checkRegExp(self).multiline;
     }
 
     private RegExpMatch execInner(final String string) {
-        if (this.pattern == null) {
+        if (this.regex == null) {
             return null; // never matches or similar, e.g. a[]
         }
 
-        final Matcher matcher = pattern.matcher(string);
+        setInput(string);
+
+        final Matcher matcher = regex.matcher(inputChars);
         final int start = this.global ? getLastIndex() : 0;
 
         if (start < 0 || start > string.length()) {
             setLastIndex(0);
             return null;
         }
 
-        if (!matcher.find(start)) {
+        if (matcher.search(start, inputChars.length, Option.NONE) == -1) {
             setLastIndex(0);
             return null;
         }
 
         if (global) {
-            setLastIndex(matcher.end());
+            setLastIndex(matcher.getEnd());
         }
 
-        return new RegExpMatch(string, matcher.start(), groups(matcher));
+        return new RegExpMatch(string, matcher.getBegin(), groups(matcher));
     }
 
     /**
      * Convert java.util.regex.Matcher groups to JavaScript groups.
      * That is, replace null and groups that didn't match with undefined.
      */
     private Object[] groups(final Matcher matcher) {
-        final int groupCount = matcher.groupCount();
+        Region region = matcher.getRegion();
+        final int groupCount = region == null ? 0 : region.numRegs - 1;
         final Object[] groups = new Object[groupCount + 1];
-        for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
-            final int groupStart = matcher.start(i);
+        groups[0] = input.substring(matcher.getBegin(), matcher.getEnd());
+
+        for (int i = 1, lastGroupStart = matcher.getBegin(); i <= groupCount; i++) {
+            final int groupStart = region.beg[i];
             if (lastGroupStart > groupStart
                     || (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) {
                 // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
                 // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
                 // in the pattern always return undefined because the negative lookahead must fail.
                 groups[i] = UNDEFINED;
                 continue;
             }
-            final String group = matcher.group(i);
-            groups[i] = group == null ? UNDEFINED : group;
+            final int begin = region.beg[i];
+            final int end = region.end[i];
+            groups[i] = begin == -1 ? UNDEFINED : input.substring(begin, end);
             lastGroupStart = groupStart;
         }
         return groups;
     }
 
+    private void setInput(String input) {
+        if (!input.equals(this.input)) {
+            this.input = input;
+            this.inputChars = input.toCharArray();
+        }
+    }
+
     /**
      * Executes a search for a match within a string based on a regular
      * expression. It returns an array of information or null if no match is
      * found.
      *

@@ -446,11 +494,12 @@
      * @param string String to match.
      * @param replacement Replacement string.
      * @return String with substitutions.
      */
     Object replace(final String string, final String replacement, final ScriptFunction function) {
-        final Matcher matcher = pattern.matcher(string);
+        setInput(string);
+        final Matcher matcher = regex.matcher(inputChars);
         /*
          * $$ -> $
          * $& -> the matched substring
          * $` -> the portion of string that preceeds matched substring
          * $' -> the portion of string that follows the matched substring

@@ -458,57 +507,59 @@
          * $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
          */
         String replace = replacement;
 
         if (!global) {
-            if (!matcher.find()) {
+            if (matcher.search(0, inputChars.length, Option.NONE) == -1) {
                 return string;
             }
 
             final StringBuilder sb = new StringBuilder();
             if (function != null) {
                 replace = callReplaceValue(function, matcher, string);
             }
             appendReplacement(matcher, string, replace, sb, 0);
-            sb.append(string, matcher.end(), string.length());
+            sb.append(string, matcher.getEnd(), string.length());
             return sb.toString();
         }
 
         int end = 0; // a.k.a. lastAppendPosition
         setLastIndex(0);
 
-        boolean found;
+        int found;
         try {
-            found = matcher.find(end);
+            found = matcher.search(end, inputChars.length, Option.NONE);
         } catch (final IndexOutOfBoundsException e) {
-            found = false;
+            found = -1;
         }
 
-        if (!found) {
+        if (found == -1) {
             return string;
         }
 
+        int thisIndex = 0;
         int previousLastIndex = 0;
         final StringBuilder sb = new StringBuilder();
         do {
             if (function != null) {
                 replace = callReplaceValue(function, matcher, string);
             }
-            appendReplacement(matcher, string, replace, sb, end);
-            end = matcher.end();
+            appendReplacement(matcher, string, replace, sb, thisIndex);
+            end = matcher.getEnd();
 
             // ECMA 15.5.4.10 String.prototype.match(regexp)
-            final int thisIndex = end;
+            thisIndex = end;
             if (thisIndex == previousLastIndex) {
                 setLastIndex(thisIndex + 1);
                 previousLastIndex = thisIndex + 1;
+                end++;
             } else {
                 previousLastIndex = thisIndex;
             }
-        } while (matcher.find());
+        } while (matcher.search(end, inputChars.length, Option.NONE) > -1);
 
-        sb.append(string, end, string.length());
+        sb.append(string, thisIndex, string.length());
 
         return sb.toString();
     }
 
     private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {

@@ -522,20 +573,22 @@
             if (nextChar == '$') {
                 // Skip past $
                 cursor++;
                 nextChar = replacement.charAt(cursor);
                 final int firstDigit = nextChar - '0';
+                Region region = matcher.getRegion();
+                int groupCount = region == null ? 0 : region.numRegs - 1;
 
-                if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) {
+                if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= groupCount) {
                     // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
                     int refNum = firstDigit;
                     cursor++;
-                    if (cursor < replacement.length() && firstDigit < matcher.groupCount()) {
+                    if (cursor < replacement.length() && firstDigit < groupCount) {
                         final int secondDigit = replacement.charAt(cursor) - '0';
                         if ((secondDigit >= 0) && (secondDigit <= 9)) {
                             final int newRefNum = (firstDigit * 10) + secondDigit;
-                            if (newRefNum <= matcher.groupCount() && newRefNum > 0) {
+                            if (newRefNum <= groupCount && newRefNum > 0) {
                                 // $nn ($01-$99)
                                 refNum = newRefNum;
                                 cursor++;
                             }
                         }

@@ -554,17 +607,17 @@
                     }
                 } else if (nextChar == '$') {
                     result.append('$');
                     cursor++;
                 } else if (nextChar == '&') {
-                    result.append(matcher.group());
+                    result.append(text, matcher.getBegin(), matcher.getEnd());
                     cursor++;
                 } else if (nextChar == '`') {
-                    result.append(text.substring(0, matcher.start()));
+                    result.append(text, 0, matcher.getBegin());
                     cursor++;
                 } else if (nextChar == '\'') {
-                    result.append(text.substring(matcher.end()));
+                    result.append(text, matcher.getEnd(), text.length());
                     cursor++;
                 } else {
                     // unknown substitution or $n with n>m. skip.
                     result.append('$');
                 }

@@ -572,20 +625,20 @@
                 result.append(nextChar);
                 cursor++;
             }
         }
         // Append the intervening text
-        sb.append(text, lastAppendPosition, matcher.start());
+        sb.append(text, lastAppendPosition, matcher.getBegin());
         // Append the match substitution
         sb.append(result);
     }
 
     private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
         final Object[] groups = groups(matcher);
         final Object[] args   = Arrays.copyOf(groups, groups.length + 2);
 
-        args[groups.length]     = matcher.start();
+        args[groups.length]     = matcher.getBegin();
         args[groups.length + 1] = string;
 
         final Object self = function.isStrict() ? UNDEFINED : Global.instance();
 
         return JSType.toString(ScriptRuntime.apply(function, self, args));

@@ -663,21 +716,23 @@
      *
      * @param string String to match.
      * @return Index of match.
      */
     Object search(final String string) {
-        final Matcher matcher = pattern.matcher(string);
+        setInput(string);
+
+        final Matcher matcher = regex.matcher(inputChars);
 
         int start = 0;
         if (global) {
             start = getLastIndex();
         }
 
-        start = matcher.find(start) ? matcher.start() : -1;
+        start = matcher.search(start, inputChars.length, Option.NONE);
 
         if (global) {
-            setLastIndex(matcher.end());
+            setLastIndex(matcher.getEnd());
         }
 
         return start;
     }
 

@@ -719,16 +774,16 @@
             typeError(Global.instance(), "not.a.regexp", ScriptRuntime.safeToString(self));
             return null;
         }
     }
 
-    private String getInput() {
-        return input;
+    private String getSource() {
+        return source;
     }
 
-    private void setInput(final String input) {
-        this.input = input;
+    private void setSource(final String source) {
+        this.source = source;
     }
 
     boolean getGlobal() {
         return global;
     }

@@ -751,16 +806,16 @@
 
     private void setMultiline(final boolean multiline) {
         this.multiline = multiline;
     }
 
-    private Pattern getPattern() {
-        return pattern;
+    private Regex getRegex() {
+        return regex;
     }
 
-    private void setPattern(final Pattern pattern) {
-        this.pattern = pattern;
+    private void setRegex(final Regex regex) {
+        this.regex = regex;
     }
 
     private BitVector getGroupsInNegativeLookahead() {
         return groupsInNegativeLookahead;
     }