src/jdk/nashorn/internal/objects/NativeRegExp.java

Print this page




  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.objects;
  27 
  28 import static jdk.nashorn.internal.runtime.ECMAErrors.typeError;
  29 import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED;
  30 
  31 import java.util.ArrayList;
  32 import java.util.Arrays;
  33 import java.util.List;
  34 import java.util.regex.Matcher;
  35 import java.util.regex.Pattern;
  36 import jdk.nashorn.internal.objects.annotations.Attribute;
  37 import jdk.nashorn.internal.objects.annotations.Constructor;
  38 import jdk.nashorn.internal.objects.annotations.Function;
  39 import jdk.nashorn.internal.objects.annotations.Getter;
  40 import jdk.nashorn.internal.objects.annotations.Property;
  41 import jdk.nashorn.internal.objects.annotations.ScriptClass;
  42 import jdk.nashorn.internal.objects.annotations.SpecializedConstructor;
  43 import jdk.nashorn.internal.parser.RegExp;
  44 import jdk.nashorn.internal.runtime.BitVector;
  45 import jdk.nashorn.internal.runtime.JSType;
  46 import jdk.nashorn.internal.runtime.ParserException;
  47 import jdk.nashorn.internal.runtime.RegExpMatch;
  48 import jdk.nashorn.internal.runtime.ScriptFunction;
  49 import jdk.nashorn.internal.runtime.ScriptObject;
  50 import jdk.nashorn.internal.runtime.ScriptRuntime;
  51 





  52 /**
  53  * ECMA 15.10 RegExp Objects.
  54  */
  55 @ScriptClass("RegExp")
  56 public final class NativeRegExp extends ScriptObject {
  57     /** ECMA 15.10.7.5 lastIndex property */
  58     @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
  59     public Object lastIndex;
  60 
  61     /** Pattern string. */



  62     private String input;
  63 



  64     /** Global search flag for this regexp. */
  65     private boolean global;
  66 
  67     /** Case insensitive flag for this regexp */
  68     private boolean ignoreCase;
  69 
  70     /** Multi-line flag for this regexp */
  71     private boolean multiline;
  72 
  73     /** Java regex pattern to use for match. We compile to one of these */
  74     private Pattern pattern;
  75 
  76     private BitVector groupsInNegativeLookahead;
  77 
  78     /*
  79     public NativeRegExp() {
  80         init();
  81     }*/
  82 
  83     NativeRegExp(final String input, final String flagString) {
  84         RegExp regExp = null;
  85         try {
  86             regExp = new RegExp(input, flagString);
  87         } catch (final ParserException e) {
  88             // translate it as SyntaxError object and throw it
  89             e.throwAsEcmaException(Global.instance());
  90             throw new AssertionError(); //guard against null warnings below
  91         }
  92 
  93         this.setLastIndex(0);
  94         this.input = regExp.getInput();
  95         this.global = regExp.isGlobal();
  96         this.ignoreCase = regExp.isIgnoreCase();
  97         this.multiline = regExp.isMultiline();
  98         this.pattern = regExp.getPattern();
  99         this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
 100 
 101         init();
 102     }
 103 
 104     NativeRegExp(final String string) {
 105         this(string, "");
 106     }
 107 
 108     NativeRegExp(final NativeRegExp regExp) {
 109         this.input      = regExp.getInput();
 110         this.global     = regExp.getGlobal();
 111         this.multiline  = regExp.getMultiline();
 112         this.ignoreCase = regExp.getIgnoreCase();
 113         this.lastIndex  = regExp.getLastIndexObject();
 114         this.pattern    = regExp.getPattern();
 115         this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
 116 
 117         init();
 118     }
 119 
 120     NativeRegExp(final Pattern pattern) {
 121         this.input      = pattern.pattern();
 122         this.multiline  = (pattern.flags() & Pattern.MULTILINE) != 0;
 123         this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
 124         this.lastIndex  = 0;
 125         this.pattern    = pattern;
 126 
 127         init();
 128     }
 129 
 130     @Override
 131     public String getClassName() {
 132         return "RegExp";
 133     }
 134 
 135     /**
 136      * ECMA 15.10.4
 137      *
 138      * Constructor
 139      *
 140      * @param isNew is the new operator used for instantiating this regexp
 141      * @param self  self reference
 142      * @param args  arguments (optional: pattern and flags)
 143      * @return new NativeRegExp
 144      */
 145     @Constructor(arity = 2)
 146     public static Object constructor(final boolean isNew, final Object self, final Object... args) {
 147         if (args.length > 1) {
 148             return newRegExp(args[0], args[1]);


 212         boolean flagsDefined  = false;
 213 
 214         if (flags != UNDEFINED) {
 215             flagsDefined = true;
 216             flagString = JSType.toString(flags);
 217         }
 218 
 219         if (regexp != UNDEFINED) {
 220             if (regexp instanceof NativeRegExp) {
 221                 if (!flagsDefined) {
 222                     return (NativeRegExp)regexp; // 15.10.3.1 - undefined flags and regexp as
 223                 }
 224                 typeError(Global.instance(), "regex.cant.supply.flags");
 225             }
 226             patternString = JSType.toString(regexp);
 227         }
 228 
 229         return new NativeRegExp(patternString, flagString);
 230     }
 231 


























 232     private String getFlagString() {
 233         final StringBuilder sb = new StringBuilder();
 234 
 235         if (global) {
 236             sb.append('g');
 237         }
 238         if (ignoreCase) {
 239             sb.append('i');
 240         }
 241         if (multiline) {
 242             sb.append('m');
 243         }
 244 
 245         return sb.toString();
 246     }
 247 
 248     @Override
 249     public String safeToString() {
 250         return "[RegExp " + toString() + "]";
 251     }
 252 
 253     @Override
 254     public String toString() {
 255         return "/" + input + "/" + getFlagString();
 256     }
 257 
 258     /**
 259      * Nashorn extension: RegExp.prototype.compile - everybody implements this!
 260      *
 261      * @param self    self reference
 262      * @param pattern pattern
 263      * @param flags   flags
 264      * @return new NativeRegExp
 265      */
 266     @Function(attributes = Attribute.NOT_ENUMERABLE)
 267     public static Object compile(final Object self, final Object pattern, final Object flags) {
 268         final NativeRegExp regExp   = checkRegExp(self);
 269         final NativeRegExp compiled = newRegExp(pattern, flags);
 270         // copy over fields to 'self'
 271         regExp.setInput(compiled.getInput());
 272         regExp.setGlobal(compiled.getGlobal());
 273         regExp.setIgnoreCase(compiled.getIgnoreCase());
 274         regExp.setMultiline(compiled.getMultiline());
 275         regExp.setPattern(compiled.getPattern());
 276         regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
 277 
 278         // Some implementations return undefined. Some return 'self'. Since return
 279         // value is most likely be ignored, we can play safe and return 'self'.
 280         return regExp;
 281     }
 282 
 283     /**
 284      * ECMA 15.10.6.2 RegExp.prototype.exec(string)
 285      *
 286      * @param self   self reference
 287      * @param string string to match against regexp
 288      * @return array containing the matches or {@code null} if no match
 289      */
 290     @Function(attributes = Attribute.NOT_ENUMERABLE)
 291     public static Object exec(final Object self, final Object string) {
 292         return checkRegExp(self).exec(JSType.toString(string));
 293     }
 294 
 295     /**


 306 
 307     /**
 308      * ECMA 15.10.6.4 RegExp.prototype.toString()
 309      *
 310      * @param self self reference
 311      * @return string version of regexp
 312      */
 313     @Function(attributes = Attribute.NOT_ENUMERABLE)
 314     public static Object toString(final Object self) {
 315         return checkRegExp(self).toString();
 316     }
 317 
 318     /**
 319      * ECMA 15.10.7.1 source
 320      *
 321      * @param self self reference
 322      * @return the input string for the regexp
 323      */
 324     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
 325     public static Object source(final Object self) {
 326         return checkRegExp(self).input;
 327     }
 328 
 329     /**
 330      * ECMA 15.10.7.2 global
 331      *
 332      * @param self self reference
 333      * @return true if this regexp is flagged global, false otherwise
 334      */
 335     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
 336     public static Object global(final Object self) {
 337         return checkRegExp(self).global;
 338     }
 339 
 340     /**
 341      * ECMA 15.10.7.3 ignoreCase
 342      *
 343      * @param self self reference
 344      * @return true if this regexp if flagged to ignore case, false otherwise
 345      */
 346     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
 347     public static Object ignoreCase(final Object self) {
 348         return checkRegExp(self).ignoreCase;
 349     }
 350 
 351     /**
 352      * ECMA 15.10.7.4 multiline
 353      *
 354      * @param self self reference
 355      * @return true if this regexp is flagged to be multiline, false otherwise
 356      */
 357     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
 358     public static Object multiline(final Object self) {
 359         return checkRegExp(self).multiline;
 360     }
 361 
 362     private RegExpMatch execInner(final String string) {
 363         if (this.pattern == null) {
 364             return null; // never matches or similar, e.g. a[]
 365         }
 366 
 367         final Matcher matcher = pattern.matcher(string);


 368         final int start = this.global ? getLastIndex() : 0;
 369 
 370         if (start < 0 || start > string.length()) {
 371             setLastIndex(0);
 372             return null;
 373         }
 374 
 375         if (!matcher.find(start)) {
 376             setLastIndex(0);
 377             return null;
 378         }
 379 
 380         if (global) {
 381             setLastIndex(matcher.end());
 382         }
 383 
 384         return new RegExpMatch(string, matcher.start(), groups(matcher));
 385     }
 386 
 387     /**
 388      * Convert java.util.regex.Matcher groups to JavaScript groups.
 389      * That is, replace null and groups that didn't match with undefined.
 390      */
 391     private Object[] groups(final Matcher matcher) {
 392         final int groupCount = matcher.groupCount();

 393         final Object[] groups = new Object[groupCount + 1];
 394         for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
 395             final int groupStart = matcher.start(i);


 396             if (lastGroupStart > groupStart
 397                     || (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) {
 398                 // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
 399                 // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
 400                 // in the pattern always return undefined because the negative lookahead must fail.
 401                 groups[i] = UNDEFINED;
 402                 continue;
 403             }
 404             final String group = matcher.group(i);
 405             groups[i] = group == null ? UNDEFINED : group;

 406             lastGroupStart = groupStart;
 407         }
 408         return groups;
 409     }
 410 







 411     /**
 412      * Executes a search for a match within a string based on a regular
 413      * expression. It returns an array of information or null if no match is
 414      * found.
 415      *
 416      * @param string String to match.
 417      * @return NativeArray of matches, string or null.
 418      */
 419     public Object exec(final String string) {
 420         final RegExpMatch m = execInner(string);
 421         // the input string
 422         if (m == null) {
 423             return null;
 424         }
 425 
 426         return new NativeRegExpExecResult(m);
 427     }
 428 
 429     /**
 430      * Executes a search for a match within a string based on a regular
 431      * expression.
 432      *
 433      * @param string String to match.
 434      * @return True if a match is found.
 435      */
 436     public Object test(final String string) {
 437         return exec(string) != null;
 438     }
 439 
 440     /**
 441      * Searches and replaces the regular expression portion (match) with the
 442      * replaced text instead. For the "replacement text" parameter, you can use
 443      * the keywords $1 to $2 to replace the original text with values from
 444      * sub-patterns defined within the main pattern.
 445      *
 446      * @param string String to match.
 447      * @param replacement Replacement string.
 448      * @return String with substitutions.
 449      */
 450     Object replace(final String string, final String replacement, final ScriptFunction function) {
 451         final Matcher matcher = pattern.matcher(string);

 452         /*
 453          * $$ -> $
 454          * $& -> the matched substring
 455          * $` -> the portion of string that preceeds matched substring
 456          * $' -> the portion of string that follows the matched substring
 457          * $n -> the nth capture, where n is [1-9] and $n is NOT followed by a decimal digit
 458          * $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
 459          */
 460         String replace = replacement;
 461 
 462         if (!global) {
 463             if (!matcher.find()) {
 464                 return string;
 465             }
 466 
 467             final StringBuilder sb = new StringBuilder();
 468             if (function != null) {
 469                 replace = callReplaceValue(function, matcher, string);
 470             }
 471             appendReplacement(matcher, string, replace, sb, 0);
 472             sb.append(string, matcher.end(), string.length());
 473             return sb.toString();
 474         }
 475 
 476         int end = 0; // a.k.a. lastAppendPosition
 477         setLastIndex(0);
 478 
 479         boolean found;
 480         try {
 481             found = matcher.find(end);
 482         } catch (final IndexOutOfBoundsException e) {
 483             found = false;
 484         }
 485 
 486         if (!found) {
 487             return string;
 488         }
 489 

 490         int previousLastIndex = 0;
 491         final StringBuilder sb = new StringBuilder();
 492         do {
 493             if (function != null) {
 494                 replace = callReplaceValue(function, matcher, string);
 495             }
 496             appendReplacement(matcher, string, replace, sb, end);
 497             end = matcher.end();
 498 
 499             // ECMA 15.5.4.10 String.prototype.match(regexp)
 500             final int thisIndex = end;
 501             if (thisIndex == previousLastIndex) {
 502                 setLastIndex(thisIndex + 1);
 503                 previousLastIndex = thisIndex + 1;

 504             } else {
 505                 previousLastIndex = thisIndex;
 506             }
 507         } while (matcher.find());
 508 
 509         sb.append(string, end, string.length());
 510 
 511         return sb.toString();
 512     }
 513 
 514     private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
 515         // Process substitution string to replace group references with groups
 516         int cursor = 0;
 517         final StringBuilder result = new StringBuilder();
 518         Object[] groups = null;
 519 
 520         while (cursor < replacement.length()) {
 521             char nextChar = replacement.charAt(cursor);
 522             if (nextChar == '$') {
 523                 // Skip past $
 524                 cursor++;
 525                 nextChar = replacement.charAt(cursor);
 526                 final int firstDigit = nextChar - '0';


 527 
 528                 if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) {
 529                     // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
 530                     int refNum = firstDigit;
 531                     cursor++;
 532                     if (cursor < replacement.length() && firstDigit < matcher.groupCount()) {
 533                         final int secondDigit = replacement.charAt(cursor) - '0';
 534                         if ((secondDigit >= 0) && (secondDigit <= 9)) {
 535                             final int newRefNum = (firstDigit * 10) + secondDigit;
 536                             if (newRefNum <= matcher.groupCount() && newRefNum > 0) {
 537                                 // $nn ($01-$99)
 538                                 refNum = newRefNum;
 539                                 cursor++;
 540                             }
 541                         }
 542                     }
 543                     if (refNum > 0) {
 544                         if (groups == null) {
 545                             groups = groups(matcher);
 546                         }
 547                         // Append group if matched.
 548                         if (groups[refNum] != UNDEFINED) {
 549                             result.append((String) groups[refNum]);
 550                         }
 551                     } else { // $0. ignore.
 552                         assert refNum == 0;
 553                         result.append("$0");
 554                     }
 555                 } else if (nextChar == '$') {
 556                     result.append('$');
 557                     cursor++;
 558                 } else if (nextChar == '&') {
 559                     result.append(matcher.group());
 560                     cursor++;
 561                 } else if (nextChar == '`') {
 562                     result.append(text.substring(0, matcher.start()));
 563                     cursor++;
 564                 } else if (nextChar == '\'') {
 565                     result.append(text.substring(matcher.end()));
 566                     cursor++;
 567                 } else {
 568                     // unknown substitution or $n with n>m. skip.
 569                     result.append('$');
 570                 }
 571             } else {
 572                 result.append(nextChar);
 573                 cursor++;
 574             }
 575         }
 576         // Append the intervening text
 577         sb.append(text, lastAppendPosition, matcher.start());
 578         // Append the match substitution
 579         sb.append(result);
 580     }
 581 
 582     private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
 583         final Object[] groups = groups(matcher);
 584         final Object[] args   = Arrays.copyOf(groups, groups.length + 2);
 585 
 586         args[groups.length]     = matcher.start();
 587         args[groups.length + 1] = string;
 588 
 589         final Object self = function.isStrict() ? UNDEFINED : Global.instance();
 590 
 591         return JSType.toString(ScriptRuntime.apply(function, self, args));
 592     }
 593 
 594     /**
 595      * Breaks up a string into an array of substrings based on a regular
 596      * expression or fixed string.
 597      *
 598      * @param string String to match.
 599      * @param limit  Split limit.
 600      * @return Array of substrings.
 601      */
 602     Object split(final String string, final long limit) {
 603         return split(this, string, limit);
 604     }
 605 
 606     private static Object split(final NativeRegExp regexp0, final String input, final long limit) {


 648             if (lastLastIndex == input.length()) {
 649                 if (lastLength > 0 || regexp.test("") == Boolean.FALSE) {
 650                     matches.add("");
 651                 }
 652             } else {
 653                 matches.add(input.substring(lastLastIndex, inputLength));
 654             }
 655         }
 656 
 657         return new NativeArray(matches.toArray());
 658     }
 659 
 660     /**
 661      * Tests for a match in a string. It returns the index of the match, or -1
 662      * if not found.
 663      *
 664      * @param string String to match.
 665      * @return Index of match.
 666      */
 667     Object search(final String string) {
 668         final Matcher matcher = pattern.matcher(string);


 669 
 670         int start = 0;
 671         if (global) {
 672             start = getLastIndex();
 673         }
 674 
 675         start = matcher.find(start) ? matcher.start() : -1;
 676 
 677         if (global) {
 678             setLastIndex(matcher.end());
 679         }
 680 
 681         return start;
 682     }
 683 
 684     /**
 685      * Fast lastIndex getter
 686      * @return last index property as int
 687      */
 688     public int getLastIndex() {
 689         return JSType.toInt32(lastIndex);
 690     }
 691 
 692     /**
 693      * Fast lastIndex getter
 694      * @return last index property as boxed integer
 695      */
 696     public Object getLastIndexObject() {
 697         return lastIndex;
 698     }


 704     public void setLastIndex(final int lastIndex) {
 705         this.lastIndex = JSType.toObject(lastIndex);
 706     }
 707 
 708     private void init() {
 709         this.setProto(Global.instance().getRegExpPrototype());
 710     }
 711 
 712     private static NativeRegExp checkRegExp(final Object self) {
 713         Global.checkObjectCoercible(self);
 714         if (self instanceof NativeRegExp) {
 715             return (NativeRegExp)self;
 716         } else if (self != null && self == Global.instance().getRegExpPrototype()) {
 717             return Global.instance().DEFAULT_REGEXP;
 718         } else {
 719             typeError(Global.instance(), "not.a.regexp", ScriptRuntime.safeToString(self));
 720             return null;
 721         }
 722     }
 723 
 724     private String getInput() {
 725         return input;
 726     }
 727 
 728     private void setInput(final String input) {
 729         this.input = input;
 730     }
 731 
 732     boolean getGlobal() {
 733         return global;
 734     }
 735 
 736     private void setGlobal(final boolean global) {
 737         this.global = global;
 738     }
 739 
 740     private boolean getIgnoreCase() {
 741         return ignoreCase;
 742     }
 743 
 744     private void setIgnoreCase(final boolean ignoreCase) {
 745         this.ignoreCase = ignoreCase;
 746     }
 747 
 748     private boolean getMultiline() {
 749         return multiline;
 750     }
 751 
 752     private void setMultiline(final boolean multiline) {
 753         this.multiline = multiline;
 754     }
 755 
 756     private Pattern getPattern() {
 757         return pattern;
 758     }
 759 
 760     private void setPattern(final Pattern pattern) {
 761         this.pattern = pattern;
 762     }
 763 
 764     private BitVector getGroupsInNegativeLookahead() {
 765         return groupsInNegativeLookahead;
 766     }
 767 
 768     private void setGroupsInNegativeLookahead(final BitVector groupsInNegativeLookahead) {
 769         this.groupsInNegativeLookahead = groupsInNegativeLookahead;
 770     }
 771 
 772 }


  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.objects;
  27 
  28 import static jdk.nashorn.internal.runtime.ECMAErrors.typeError;
  29 import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED;
  30 
  31 import java.util.ArrayList;
  32 import java.util.Arrays;
  33 import java.util.List;


  34 import jdk.nashorn.internal.objects.annotations.Attribute;
  35 import jdk.nashorn.internal.objects.annotations.Constructor;
  36 import jdk.nashorn.internal.objects.annotations.Function;
  37 import jdk.nashorn.internal.objects.annotations.Getter;
  38 import jdk.nashorn.internal.objects.annotations.Property;
  39 import jdk.nashorn.internal.objects.annotations.ScriptClass;
  40 import jdk.nashorn.internal.objects.annotations.SpecializedConstructor;
  41 import jdk.nashorn.internal.parser.RegExp;
  42 import jdk.nashorn.internal.runtime.BitVector;
  43 import jdk.nashorn.internal.runtime.JSType;
  44 import jdk.nashorn.internal.runtime.ParserException;
  45 import jdk.nashorn.internal.runtime.RegExpMatch;
  46 import jdk.nashorn.internal.runtime.ScriptFunction;
  47 import jdk.nashorn.internal.runtime.ScriptObject;
  48 import jdk.nashorn.internal.runtime.ScriptRuntime;
  49 
  50 import jdk.nashorn.internal.joni.Matcher;
  51 import jdk.nashorn.internal.joni.Option;
  52 import jdk.nashorn.internal.joni.Regex;
  53 import jdk.nashorn.internal.joni.Region;
  54 
  55 /**
  56  * ECMA 15.10 RegExp Objects.
  57  */
  58 @ScriptClass("RegExp")
  59 public final class NativeRegExp extends ScriptObject {
  60     /** ECMA 15.10.7.5 lastIndex property */
  61     @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
  62     public Object lastIndex;
  63 
  64     /** Pattern string. */
  65     private String source;
  66 
  67     /** Input string. */
  68     private String input;
  69 
  70     /** Input string as char array */
  71     private char[] inputChars;
  72 
  73     /** Global search flag for this regexp. */
  74     private boolean global;
  75 
  76     /** Case insensitive flag for this regexp */
  77     private boolean ignoreCase;
  78 
  79     /** Multi-line flag for this regexp */
  80     private boolean multiline;
  81 
  82     /** Joni regex pattern to use for match. We compile to one of these */
  83     private Regex regex;
  84 
  85     private BitVector groupsInNegativeLookahead;
  86 
  87     /*
  88     public NativeRegExp() {
  89         init();
  90     }*/
  91 
  92     NativeRegExp(final String input, final String flagString) {
  93         RegExp regExp = null;
  94         try {
  95             regExp = new RegExp(input, flagString);
  96         } catch (final ParserException e) {
  97             // translate it as SyntaxError object and throw it
  98             e.throwAsEcmaException(Global.instance());
  99             throw new AssertionError(); //guard against null warnings below
 100         }
 101 
 102         this.setLastIndex(0);
 103         this.source = regExp.getSource();
 104         this.global = regExp.isGlobal();
 105         this.ignoreCase = regExp.isIgnoreCase();
 106         this.multiline = regExp.isMultiline();
 107         this.regex = regExp.getRegex();
 108         this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
 109 
 110         init();
 111     }
 112 
 113     NativeRegExp(final String string) {
 114         this(string, "");
 115     }
 116 
 117     NativeRegExp(final NativeRegExp regExp) {
 118         this.source     = regExp.getSource();
 119         this.global     = regExp.getGlobal();
 120         this.multiline  = regExp.getMultiline();
 121         this.ignoreCase = regExp.getIgnoreCase();
 122         this.lastIndex  = regExp.getLastIndexObject();
 123         this.regex      = regExp.getRegex();
 124         this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
 125 
 126         init();
 127     }
 128 
 129     /* NativeRegExp(final Pattern pattern) {
 130         this.input      = pattern.pattern();
 131         this.multiline  = (pattern.flags() & Pattern.MULTILINE) != 0;
 132         this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
 133         this.lastIndex  = 0;
 134         this.pattern    = pattern;
 135 
 136         init();
 137     } */
 138 
 139     @Override
 140     public String getClassName() {
 141         return "RegExp";
 142     }
 143 
 144     /**
 145      * ECMA 15.10.4
 146      *
 147      * Constructor
 148      *
 149      * @param isNew is the new operator used for instantiating this regexp
 150      * @param self  self reference
 151      * @param args  arguments (optional: pattern and flags)
 152      * @return new NativeRegExp
 153      */
 154     @Constructor(arity = 2)
 155     public static Object constructor(final boolean isNew, final Object self, final Object... args) {
 156         if (args.length > 1) {
 157             return newRegExp(args[0], args[1]);


 221         boolean flagsDefined  = false;
 222 
 223         if (flags != UNDEFINED) {
 224             flagsDefined = true;
 225             flagString = JSType.toString(flags);
 226         }
 227 
 228         if (regexp != UNDEFINED) {
 229             if (regexp instanceof NativeRegExp) {
 230                 if (!flagsDefined) {
 231                     return (NativeRegExp)regexp; // 15.10.3.1 - undefined flags and regexp as
 232                 }
 233                 typeError(Global.instance(), "regex.cant.supply.flags");
 234             }
 235             patternString = JSType.toString(regexp);
 236         }
 237 
 238         return new NativeRegExp(patternString, flagString);
 239     }
 240 
 241     public static NativeRegExp flatRegExp(String string) {
 242         // escape special characters
 243         StringBuilder sb = new StringBuilder(string.length());
 244         for (int i = 0; i < string.length(); i++) {
 245             final char c = string.charAt(i);
 246             switch (c) {
 247                 case '^':
 248                 case '$':
 249                 case '\\':
 250                 case '.':
 251                 case '*':
 252                 case '+':
 253                 case '?':
 254                 case '(':
 255                 case ')':
 256                 case '[':
 257                 case '{':
 258                 case '|':
 259                     sb.append('\\');
 260                 default:
 261                     sb.append(c);
 262             }
 263         }
 264         return new NativeRegExp(sb.toString(), "");
 265     }
 266 
 267     private String getFlagString() {
 268         final StringBuilder sb = new StringBuilder();
 269 
 270         if (global) {
 271             sb.append('g');
 272         }
 273         if (ignoreCase) {
 274             sb.append('i');
 275         }
 276         if (multiline) {
 277             sb.append('m');
 278         }
 279 
 280         return sb.toString();
 281     }
 282 
 283     @Override
 284     public String safeToString() {
 285         return "[RegExp " + toString() + "]";
 286     }
 287 
 288     @Override
 289     public String toString() {
 290         return "/" + source + "/" + getFlagString();
 291     }
 292 
 293     /**
 294      * Nashorn extension: RegExp.prototype.compile - everybody implements this!
 295      *
 296      * @param self    self reference
 297      * @param pattern pattern
 298      * @param flags   flags
 299      * @return new NativeRegExp
 300      */
 301     @Function(attributes = Attribute.NOT_ENUMERABLE)
 302     public static Object compile(final Object self, final Object pattern, final Object flags) {
 303         final NativeRegExp regExp   = checkRegExp(self);
 304         final NativeRegExp compiled = newRegExp(pattern, flags);
 305         // copy over fields to 'self'
 306         regExp.setSource(compiled.getSource());
 307         regExp.setGlobal(compiled.getGlobal());
 308         regExp.setIgnoreCase(compiled.getIgnoreCase());
 309         regExp.setMultiline(compiled.getMultiline());
 310         regExp.setRegex(compiled.getRegex());
 311         regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
 312 
 313         // Some implementations return undefined. Some return 'self'. Since return
 314         // value is most likely be ignored, we can play safe and return 'self'.
 315         return regExp;
 316     }
 317 
 318     /**
 319      * ECMA 15.10.6.2 RegExp.prototype.exec(string)
 320      *
 321      * @param self   self reference
 322      * @param string string to match against regexp
 323      * @return array containing the matches or {@code null} if no match
 324      */
 325     @Function(attributes = Attribute.NOT_ENUMERABLE)
 326     public static Object exec(final Object self, final Object string) {
 327         return checkRegExp(self).exec(JSType.toString(string));
 328     }
 329 
 330     /**


 341 
 342     /**
 343      * ECMA 15.10.6.4 RegExp.prototype.toString()
 344      *
 345      * @param self self reference
 346      * @return string version of regexp
 347      */
 348     @Function(attributes = Attribute.NOT_ENUMERABLE)
 349     public static Object toString(final Object self) {
 350         return checkRegExp(self).toString();
 351     }
 352 
 353     /**
 354      * ECMA 15.10.7.1 source
 355      *
 356      * @param self self reference
 357      * @return the input string for the regexp
 358      */
 359     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
 360     public static Object source(final Object self) {
 361         return checkRegExp(self).source;
 362     }
 363 
 364     /**
 365      * ECMA 15.10.7.2 global
 366      *
 367      * @param self self reference
 368      * @return true if this regexp is flagged global, false otherwise
 369      */
 370     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
 371     public static Object global(final Object self) {
 372         return checkRegExp(self).global;
 373     }
 374 
 375     /**
 376      * ECMA 15.10.7.3 ignoreCase
 377      *
 378      * @param self self reference
 379      * @return true if this regexp if flagged to ignore case, false otherwise
 380      */
 381     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
 382     public static Object ignoreCase(final Object self) {
 383         return checkRegExp(self).ignoreCase;
 384     }
 385 
 386     /**
 387      * ECMA 15.10.7.4 multiline
 388      *
 389      * @param self self reference
 390      * @return true if this regexp is flagged to be multiline, false otherwise
 391      */
 392     @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
 393     public static Object multiline(final Object self) {
 394         return checkRegExp(self).multiline;
 395     }
 396 
 397     private RegExpMatch execInner(final String string) {
 398         if (this.regex == null) {
 399             return null; // never matches or similar, e.g. a[]
 400         }
 401 
 402         setInput(string);
 403 
 404         final Matcher matcher = regex.matcher(inputChars);
 405         final int start = this.global ? getLastIndex() : 0;
 406 
 407         if (start < 0 || start > string.length()) {
 408             setLastIndex(0);
 409             return null;
 410         }
 411 
 412         if (matcher.search(start, inputChars.length, Option.NONE) == -1) {
 413             setLastIndex(0);
 414             return null;
 415         }
 416 
 417         if (global) {
 418             setLastIndex(matcher.getEnd());
 419         }
 420 
 421         return new RegExpMatch(string, matcher.getBegin(), groups(matcher));
 422     }
 423 
 424     /**
 425      * Convert java.util.regex.Matcher groups to JavaScript groups.
 426      * That is, replace null and groups that didn't match with undefined.
 427      */
 428     private Object[] groups(final Matcher matcher) {
 429         Region region = matcher.getRegion();
 430         final int groupCount = region == null ? 0 : region.numRegs - 1;
 431         final Object[] groups = new Object[groupCount + 1];
 432         groups[0] = input.substring(matcher.getBegin(), matcher.getEnd());
 433 
 434         for (int i = 1, lastGroupStart = matcher.getBegin(); i <= groupCount; i++) {
 435             final int groupStart = region.beg[i];
 436             if (lastGroupStart > groupStart
 437                     || (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) {
 438                 // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
 439                 // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
 440                 // in the pattern always return undefined because the negative lookahead must fail.
 441                 groups[i] = UNDEFINED;
 442                 continue;
 443             }
 444             final int begin = region.beg[i];
 445             final int end = region.end[i];
 446             groups[i] = begin == -1 ? UNDEFINED : input.substring(begin, end);
 447             lastGroupStart = groupStart;
 448         }
 449         return groups;
 450     }
 451 
 452     private void setInput(String input) {
 453         if (!input.equals(this.input)) {
 454             this.input = input;
 455             this.inputChars = input.toCharArray();
 456         }
 457     }
 458 
 459     /**
 460      * Executes a search for a match within a string based on a regular
 461      * expression. It returns an array of information or null if no match is
 462      * found.
 463      *
 464      * @param string String to match.
 465      * @return NativeArray of matches, string or null.
 466      */
 467     public Object exec(final String string) {
 468         final RegExpMatch m = execInner(string);
 469         // the input string
 470         if (m == null) {
 471             return null;
 472         }
 473 
 474         return new NativeRegExpExecResult(m);
 475     }
 476 
 477     /**
 478      * Executes a search for a match within a string based on a regular
 479      * expression.
 480      *
 481      * @param string String to match.
 482      * @return True if a match is found.
 483      */
 484     public Object test(final String string) {
 485         return exec(string) != null;
 486     }
 487 
 488     /**
 489      * Searches and replaces the regular expression portion (match) with the
 490      * replaced text instead. For the "replacement text" parameter, you can use
 491      * the keywords $1 to $2 to replace the original text with values from
 492      * sub-patterns defined within the main pattern.
 493      *
 494      * @param string String to match.
 495      * @param replacement Replacement string.
 496      * @return String with substitutions.
 497      */
 498     Object replace(final String string, final String replacement, final ScriptFunction function) {
 499         setInput(string);
 500         final Matcher matcher = regex.matcher(inputChars);
 501         /*
 502          * $$ -> $
 503          * $& -> the matched substring
 504          * $` -> the portion of string that preceeds matched substring
 505          * $' -> the portion of string that follows the matched substring
 506          * $n -> the nth capture, where n is [1-9] and $n is NOT followed by a decimal digit
 507          * $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
 508          */
 509         String replace = replacement;
 510 
 511         if (!global) {
 512             if (matcher.search(0, inputChars.length, Option.NONE) == -1) {
 513                 return string;
 514             }
 515 
 516             final StringBuilder sb = new StringBuilder();
 517             if (function != null) {
 518                 replace = callReplaceValue(function, matcher, string);
 519             }
 520             appendReplacement(matcher, string, replace, sb, 0);
 521             sb.append(string, matcher.getEnd(), string.length());
 522             return sb.toString();
 523         }
 524 
 525         int end = 0; // a.k.a. lastAppendPosition
 526         setLastIndex(0);
 527 
 528         int found;
 529         try {
 530             found = matcher.search(end, inputChars.length, Option.NONE);
 531         } catch (final IndexOutOfBoundsException e) {
 532             found = -1;
 533         }
 534 
 535         if (found == -1) {
 536             return string;
 537         }
 538 
 539         int thisIndex = 0;
 540         int previousLastIndex = 0;
 541         final StringBuilder sb = new StringBuilder();
 542         do {
 543             if (function != null) {
 544                 replace = callReplaceValue(function, matcher, string);
 545             }
 546             appendReplacement(matcher, string, replace, sb, thisIndex);
 547             end = matcher.getEnd();
 548 
 549             // ECMA 15.5.4.10 String.prototype.match(regexp)
 550             thisIndex = end;
 551             if (thisIndex == previousLastIndex) {
 552                 setLastIndex(thisIndex + 1);
 553                 previousLastIndex = thisIndex + 1;
 554                 end++;
 555             } else {
 556                 previousLastIndex = thisIndex;
 557             }
 558         } while (matcher.search(end, inputChars.length, Option.NONE) > -1);
 559 
 560         sb.append(string, thisIndex, string.length());
 561 
 562         return sb.toString();
 563     }
 564 
 565     private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
 566         // Process substitution string to replace group references with groups
 567         int cursor = 0;
 568         final StringBuilder result = new StringBuilder();
 569         Object[] groups = null;
 570 
 571         while (cursor < replacement.length()) {
 572             char nextChar = replacement.charAt(cursor);
 573             if (nextChar == '$') {
 574                 // Skip past $
 575                 cursor++;
 576                 nextChar = replacement.charAt(cursor);
 577                 final int firstDigit = nextChar - '0';
 578                 Region region = matcher.getRegion();
 579                 int groupCount = region == null ? 0 : region.numRegs - 1;
 580 
 581                 if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= groupCount) {
 582                     // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
 583                     int refNum = firstDigit;
 584                     cursor++;
 585                     if (cursor < replacement.length() && firstDigit < groupCount) {
 586                         final int secondDigit = replacement.charAt(cursor) - '0';
 587                         if ((secondDigit >= 0) && (secondDigit <= 9)) {
 588                             final int newRefNum = (firstDigit * 10) + secondDigit;
 589                             if (newRefNum <= groupCount && newRefNum > 0) {
 590                                 // $nn ($01-$99)
 591                                 refNum = newRefNum;
 592                                 cursor++;
 593                             }
 594                         }
 595                     }
 596                     if (refNum > 0) {
 597                         if (groups == null) {
 598                             groups = groups(matcher);
 599                         }
 600                         // Append group if matched.
 601                         if (groups[refNum] != UNDEFINED) {
 602                             result.append((String) groups[refNum]);
 603                         }
 604                     } else { // $0. ignore.
 605                         assert refNum == 0;
 606                         result.append("$0");
 607                     }
 608                 } else if (nextChar == '$') {
 609                     result.append('$');
 610                     cursor++;
 611                 } else if (nextChar == '&') {
 612                     result.append(text, matcher.getBegin(), matcher.getEnd());
 613                     cursor++;
 614                 } else if (nextChar == '`') {
 615                     result.append(text, 0, matcher.getBegin());
 616                     cursor++;
 617                 } else if (nextChar == '\'') {
 618                     result.append(text, matcher.getEnd(), text.length());
 619                     cursor++;
 620                 } else {
 621                     // unknown substitution or $n with n>m. skip.
 622                     result.append('$');
 623                 }
 624             } else {
 625                 result.append(nextChar);
 626                 cursor++;
 627             }
 628         }
 629         // Append the intervening text
 630         sb.append(text, lastAppendPosition, matcher.getBegin());
 631         // Append the match substitution
 632         sb.append(result);
 633     }
 634 
 635     private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
 636         final Object[] groups = groups(matcher);
 637         final Object[] args   = Arrays.copyOf(groups, groups.length + 2);
 638 
 639         args[groups.length]     = matcher.getBegin();
 640         args[groups.length + 1] = string;
 641 
 642         final Object self = function.isStrict() ? UNDEFINED : Global.instance();
 643 
 644         return JSType.toString(ScriptRuntime.apply(function, self, args));
 645     }
 646 
 647     /**
 648      * Breaks up a string into an array of substrings based on a regular
 649      * expression or fixed string.
 650      *
 651      * @param string String to match.
 652      * @param limit  Split limit.
 653      * @return Array of substrings.
 654      */
 655     Object split(final String string, final long limit) {
 656         return split(this, string, limit);
 657     }
 658 
 659     private static Object split(final NativeRegExp regexp0, final String input, final long limit) {


 701             if (lastLastIndex == input.length()) {
 702                 if (lastLength > 0 || regexp.test("") == Boolean.FALSE) {
 703                     matches.add("");
 704                 }
 705             } else {
 706                 matches.add(input.substring(lastLastIndex, inputLength));
 707             }
 708         }
 709 
 710         return new NativeArray(matches.toArray());
 711     }
 712 
 713     /**
 714      * Tests for a match in a string. It returns the index of the match, or -1
 715      * if not found.
 716      *
 717      * @param string String to match.
 718      * @return Index of match.
 719      */
 720     Object search(final String string) {
 721         setInput(string);
 722 
 723         final Matcher matcher = regex.matcher(inputChars);
 724 
 725         int start = 0;
 726         if (global) {
 727             start = getLastIndex();
 728         }
 729 
 730         start = matcher.search(start, inputChars.length, Option.NONE);
 731 
 732         if (global) {
 733             setLastIndex(matcher.getEnd());
 734         }
 735 
 736         return start;
 737     }
 738 
 739     /**
 740      * Fast lastIndex getter
 741      * @return last index property as int
 742      */
 743     public int getLastIndex() {
 744         return JSType.toInt32(lastIndex);
 745     }
 746 
 747     /**
 748      * Fast lastIndex getter
 749      * @return last index property as boxed integer
 750      */
 751     public Object getLastIndexObject() {
 752         return lastIndex;
 753     }


 759     public void setLastIndex(final int lastIndex) {
 760         this.lastIndex = JSType.toObject(lastIndex);
 761     }
 762 
 763     private void init() {
 764         this.setProto(Global.instance().getRegExpPrototype());
 765     }
 766 
 767     private static NativeRegExp checkRegExp(final Object self) {
 768         Global.checkObjectCoercible(self);
 769         if (self instanceof NativeRegExp) {
 770             return (NativeRegExp)self;
 771         } else if (self != null && self == Global.instance().getRegExpPrototype()) {
 772             return Global.instance().DEFAULT_REGEXP;
 773         } else {
 774             typeError(Global.instance(), "not.a.regexp", ScriptRuntime.safeToString(self));
 775             return null;
 776         }
 777     }
 778 
 779     private String getSource() {
 780         return source;
 781     }
 782 
 783     private void setSource(final String source) {
 784         this.source = source;
 785     }
 786 
 787     boolean getGlobal() {
 788         return global;
 789     }
 790 
 791     private void setGlobal(final boolean global) {
 792         this.global = global;
 793     }
 794 
 795     private boolean getIgnoreCase() {
 796         return ignoreCase;
 797     }
 798 
 799     private void setIgnoreCase(final boolean ignoreCase) {
 800         this.ignoreCase = ignoreCase;
 801     }
 802 
 803     private boolean getMultiline() {
 804         return multiline;
 805     }
 806 
 807     private void setMultiline(final boolean multiline) {
 808         this.multiline = multiline;
 809     }
 810 
 811     private Regex getRegex() {
 812         return regex;
 813     }
 814 
 815     private void setRegex(final Regex regex) {
 816         this.regex = regex;
 817     }
 818 
 819     private BitVector getGroupsInNegativeLookahead() {
 820         return groupsInNegativeLookahead;
 821     }
 822 
 823     private void setGroupsInNegativeLookahead(final BitVector groupsInNegativeLookahead) {
 824         this.groupsInNegativeLookahead = groupsInNegativeLookahead;
 825     }
 826 
 827 }