1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.objects; 27 28 import static jdk.nashorn.internal.runtime.ECMAErrors.typeError; 29 import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED; 30 31 import java.lang.invoke.MethodHandle; 32 import java.util.ArrayList; 33 import java.util.Arrays; 34 import java.util.List; 35 import java.util.concurrent.Callable; 36 37 import jdk.nashorn.internal.objects.annotations.Attribute; 38 import jdk.nashorn.internal.objects.annotations.Constructor; 39 import jdk.nashorn.internal.objects.annotations.Function; 40 import jdk.nashorn.internal.objects.annotations.Getter; 41 import jdk.nashorn.internal.objects.annotations.Property; 42 import jdk.nashorn.internal.objects.annotations.ScriptClass; 43 import jdk.nashorn.internal.objects.annotations.SpecializedFunction; 44 import jdk.nashorn.internal.objects.annotations.Where; 45 import jdk.nashorn.internal.runtime.BitVector; 46 import jdk.nashorn.internal.runtime.JSType; 47 import jdk.nashorn.internal.runtime.ParserException; 48 import jdk.nashorn.internal.runtime.PropertyMap; 49 import jdk.nashorn.internal.runtime.ScriptFunction; 50 import jdk.nashorn.internal.runtime.ScriptObject; 51 import jdk.nashorn.internal.runtime.ScriptRuntime; 52 import jdk.nashorn.internal.runtime.linker.Bootstrap; 53 import jdk.nashorn.internal.runtime.regexp.RegExp; 54 import jdk.nashorn.internal.runtime.regexp.RegExpFactory; 55 import jdk.nashorn.internal.runtime.regexp.RegExpMatcher; 56 import jdk.nashorn.internal.runtime.regexp.RegExpResult; 57 58 /** 59 * ECMA 15.10 RegExp Objects. 60 */ 61 @ScriptClass("RegExp") 62 public final class NativeRegExp extends ScriptObject { 63 /** ECMA 15.10.7.5 lastIndex property */ 64 @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE) 65 public Object lastIndex; 66 67 /** Compiled regexp */ 68 private RegExp regexp; 69 70 // Reference to global object needed to support static RegExp properties 71 private final Global globalObject; 72 73 // initialized by nasgen 74 private static PropertyMap $nasgenmap$; 75 76 private NativeRegExp(final Global global) { 77 super(global.getRegExpPrototype(), $nasgenmap$); 78 this.globalObject = global; 79 } 80 81 NativeRegExp(final String input, final String flagString, final Global global) { 82 this(global); 83 try { 84 this.regexp = RegExpFactory.create(input, flagString); 85 } catch (final ParserException e) { 86 // translate it as SyntaxError object and throw it 87 e.throwAsEcmaException(); 88 throw new AssertionError(); //guard against null warnings below 89 } 90 91 this.setLastIndex(0); 92 } 93 94 NativeRegExp(final String input, final String flagString) { 95 this(input, flagString, Global.instance()); 96 } 97 98 NativeRegExp(final String string, final Global global) { 99 this(string, "", global); 100 } 101 102 NativeRegExp(final String string) { 103 this(string, Global.instance()); 104 } 105 106 NativeRegExp(final NativeRegExp regExp) { 107 this(Global.instance()); 108 this.lastIndex = regExp.getLastIndexObject(); 109 this.regexp = regExp.getRegExp(); 110 } 111 112 @Override 113 public String getClassName() { 114 return "RegExp"; 115 } 116 117 /** 118 * ECMA 15.10.4 119 * 120 * Constructor 121 * 122 * @param isNew is the new operator used for instantiating this regexp 123 * @param self self reference 124 * @param args arguments (optional: pattern and flags) 125 * @return new NativeRegExp 126 */ 127 @Constructor(arity = 2) 128 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object... args) { 129 if (args.length > 1) { 130 return newRegExp(args[0], args[1]); 131 } else if (args.length > 0) { 132 return newRegExp(args[0], UNDEFINED); 133 } 134 135 return newRegExp(UNDEFINED, UNDEFINED); 136 } 137 138 /** 139 * ECMA 15.10.4 140 * 141 * Constructor - specialized version, no args, empty regexp 142 * 143 * @param isNew is the new operator used for instantiating this regexp 144 * @param self self reference 145 * @return new NativeRegExp 146 */ 147 @SpecializedFunction(isConstructor=true) 148 public static NativeRegExp constructor(final boolean isNew, final Object self) { 149 return new NativeRegExp("", ""); 150 } 151 152 /** 153 * ECMA 15.10.4 154 * 155 * Constructor - specialized version, pattern, no flags 156 * 157 * @param isNew is the new operator used for instantiating this regexp 158 * @param self self reference 159 * @param pattern pattern 160 * @return new NativeRegExp 161 */ 162 @SpecializedFunction(isConstructor=true) 163 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern) { 164 return newRegExp(pattern, UNDEFINED); 165 } 166 167 /** 168 * ECMA 15.10.4 169 * 170 * Constructor - specialized version, pattern and flags 171 * 172 * @param isNew is the new operator used for instantiating this regexp 173 * @param self self reference 174 * @param pattern pattern 175 * @param flags flags 176 * @return new NativeRegExp 177 */ 178 @SpecializedFunction(isConstructor=true) 179 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern, final Object flags) { 180 return newRegExp(pattern, flags); 181 } 182 183 /** 184 * External constructor used in generated code, which explains the public access 185 * 186 * @param regexp regexp 187 * @param flags flags 188 * @return new NativeRegExp 189 */ 190 public static NativeRegExp newRegExp(final Object regexp, final Object flags) { 191 String patternString = ""; 192 String flagString = ""; 193 194 if (regexp != UNDEFINED) { 195 if (regexp instanceof NativeRegExp) { 196 if (flags != UNDEFINED) { 197 throw typeError("regex.cant.supply.flags"); 198 } 199 return (NativeRegExp)regexp; // 15.10.3.1 - undefined flags and regexp as 200 } 201 patternString = JSType.toString(regexp); 202 } 203 204 if (flags != UNDEFINED) { 205 flagString = JSType.toString(flags); 206 } 207 208 return new NativeRegExp(patternString, flagString); 209 } 210 211 /** 212 * Build a regexp that matches {@code string} as-is. All meta-characters will be escaped. 213 * 214 * @param string pattern string 215 * @return flat regexp 216 */ 217 static NativeRegExp flatRegExp(final String string) { 218 // escape special characters 219 StringBuilder sb = null; 220 final int length = string.length(); 221 222 for (int i = 0; i < length; i++) { 223 final char c = string.charAt(i); 224 switch (c) { 225 case '^': 226 case '$': 227 case '\\': 228 case '.': 229 case '*': 230 case '+': 231 case '?': 232 case '(': 233 case ')': 234 case '[': 235 case '{': 236 case '|': 237 if (sb == null) { 238 sb = new StringBuilder(length * 2); 239 sb.append(string, 0, i); 240 } 241 sb.append('\\'); 242 sb.append(c); 243 break; 244 default: 245 if (sb != null) { 246 sb.append(c); 247 } 248 break; 249 } 250 } 251 return new NativeRegExp(sb == null ? string : sb.toString(), ""); 252 } 253 254 private String getFlagString() { 255 final StringBuilder sb = new StringBuilder(3); 256 257 if (regexp.isGlobal()) { 258 sb.append('g'); 259 } 260 if (regexp.isIgnoreCase()) { 261 sb.append('i'); 262 } 263 if (regexp.isMultiline()) { 264 sb.append('m'); 265 } 266 267 return sb.toString(); 268 } 269 270 @Override 271 public String safeToString() { 272 return "[RegExp " + toString() + "]"; 273 } 274 275 @Override 276 public String toString() { 277 return "/" + regexp.getSource() + "/" + getFlagString(); 278 } 279 280 /** 281 * Nashorn extension: RegExp.prototype.compile - everybody implements this! 282 * 283 * @param self self reference 284 * @param pattern pattern 285 * @param flags flags 286 * @return new NativeRegExp 287 */ 288 @Function(attributes = Attribute.NOT_ENUMERABLE) 289 public static ScriptObject compile(final Object self, final Object pattern, final Object flags) { 290 final NativeRegExp regExp = checkRegExp(self); 291 final NativeRegExp compiled = newRegExp(pattern, flags); 292 // copy over regexp to 'self' 293 regExp.setRegExp(compiled.getRegExp()); 294 295 // Some implementations return undefined. Some return 'self'. Since return 296 // value is most likely be ignored, we can play safe and return 'self'. 297 return regExp; 298 } 299 300 /** 301 * ECMA 15.10.6.2 RegExp.prototype.exec(string) 302 * 303 * @param self self reference 304 * @param string string to match against regexp 305 * @return array containing the matches or {@code null} if no match 306 */ 307 @Function(attributes = Attribute.NOT_ENUMERABLE) 308 public static ScriptObject exec(final Object self, final Object string) { 309 return checkRegExp(self).exec(JSType.toString(string)); 310 } 311 312 /** 313 * ECMA 15.10.6.3 RegExp.prototype.test(string) 314 * 315 * @param self self reference 316 * @param string string to test for matches against regexp 317 * @return true if matches found, false otherwise 318 */ 319 @Function(attributes = Attribute.NOT_ENUMERABLE) 320 public static boolean test(final Object self, final Object string) { 321 return checkRegExp(self).test(JSType.toString(string)); 322 } 323 324 /** 325 * ECMA 15.10.6.4 RegExp.prototype.toString() 326 * 327 * @param self self reference 328 * @return string version of regexp 329 */ 330 @Function(attributes = Attribute.NOT_ENUMERABLE) 331 public static String toString(final Object self) { 332 return checkRegExp(self).toString(); 333 } 334 335 /** 336 * ECMA 15.10.7.1 source 337 * 338 * @param self self reference 339 * @return the input string for the regexp 340 */ 341 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 342 public static Object source(final Object self) { 343 return checkRegExp(self).getRegExp().getSource(); 344 } 345 346 /** 347 * ECMA 15.10.7.2 global 348 * 349 * @param self self reference 350 * @return true if this regexp is flagged global, false otherwise 351 */ 352 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 353 public static Object global(final Object self) { 354 return checkRegExp(self).getRegExp().isGlobal(); 355 } 356 357 /** 358 * ECMA 15.10.7.3 ignoreCase 359 * 360 * @param self self reference 361 * @return true if this regexp if flagged to ignore case, false otherwise 362 */ 363 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 364 public static Object ignoreCase(final Object self) { 365 return checkRegExp(self).getRegExp().isIgnoreCase(); 366 } 367 368 /** 369 * ECMA 15.10.7.4 multiline 370 * 371 * @param self self reference 372 * @return true if this regexp is flagged to be multiline, false otherwise 373 */ 374 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 375 public static Object multiline(final Object self) { 376 return checkRegExp(self).getRegExp().isMultiline(); 377 } 378 379 /** 380 * Getter for non-standard RegExp.input property. 381 * @param self self object 382 * @return last regexp input 383 */ 384 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "input") 385 public static Object getLastInput(final Object self) { 386 final RegExpResult match = Global.instance().getLastRegExpResult(); 387 return match == null ? "" : match.getInput(); 388 } 389 390 /** 391 * Getter for non-standard RegExp.multiline property. 392 * @param self self object 393 * @return last regexp input 394 */ 395 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "multiline") 396 public static Object getLastMultiline(final Object self) { 397 return false; // doesn't ever seem to become true and isn't documented anyhwere 398 } 399 400 /** 401 * Getter for non-standard RegExp.lastMatch property. 402 * @param self self object 403 * @return last regexp input 404 */ 405 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastMatch") 406 public static Object getLastMatch(final Object self) { 407 final RegExpResult match = Global.instance().getLastRegExpResult(); 408 return match == null ? "" : match.getGroup(0); 409 } 410 411 /** 412 * Getter for non-standard RegExp.lastParen property. 413 * @param self self object 414 * @return last regexp input 415 */ 416 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastParen") 417 public static Object getLastParen(final Object self) { 418 final RegExpResult match = Global.instance().getLastRegExpResult(); 419 return match == null ? "" : match.getLastParen(); 420 } 421 422 /** 423 * Getter for non-standard RegExp.leftContext property. 424 * @param self self object 425 * @return last regexp input 426 */ 427 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "leftContext") 428 public static Object getLeftContext(final Object self) { 429 final RegExpResult match = Global.instance().getLastRegExpResult(); 430 return match == null ? "" : match.getInput().substring(0, match.getIndex()); 431 } 432 433 /** 434 * Getter for non-standard RegExp.rightContext property. 435 * @param self self object 436 * @return last regexp input 437 */ 438 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "rightContext") 439 public static Object getRightContext(final Object self) { 440 final RegExpResult match = Global.instance().getLastRegExpResult(); 441 return match == null ? "" : match.getInput().substring(match.getIndex() + match.length()); 442 } 443 444 /** 445 * Getter for non-standard RegExp.$1 property. 446 * @param self self object 447 * @return last regexp input 448 */ 449 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$1") 450 public static Object getGroup1(final Object self) { 451 final RegExpResult match = Global.instance().getLastRegExpResult(); 452 return match == null ? "" : match.getGroup(1); 453 } 454 455 /** 456 * Getter for non-standard RegExp.$2 property. 457 * @param self self object 458 * @return last regexp input 459 */ 460 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$2") 461 public static Object getGroup2(final Object self) { 462 final RegExpResult match = Global.instance().getLastRegExpResult(); 463 return match == null ? "" : match.getGroup(2); 464 } 465 466 /** 467 * Getter for non-standard RegExp.$3 property. 468 * @param self self object 469 * @return last regexp input 470 */ 471 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$3") 472 public static Object getGroup3(final Object self) { 473 final RegExpResult match = Global.instance().getLastRegExpResult(); 474 return match == null ? "" : match.getGroup(3); 475 } 476 477 /** 478 * Getter for non-standard RegExp.$4 property. 479 * @param self self object 480 * @return last regexp input 481 */ 482 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$4") 483 public static Object getGroup4(final Object self) { 484 final RegExpResult match = Global.instance().getLastRegExpResult(); 485 return match == null ? "" : match.getGroup(4); 486 } 487 488 /** 489 * Getter for non-standard RegExp.$5 property. 490 * @param self self object 491 * @return last regexp input 492 */ 493 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$5") 494 public static Object getGroup5(final Object self) { 495 final RegExpResult match = Global.instance().getLastRegExpResult(); 496 return match == null ? "" : match.getGroup(5); 497 } 498 499 /** 500 * Getter for non-standard RegExp.$6 property. 501 * @param self self object 502 * @return last regexp input 503 */ 504 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$6") 505 public static Object getGroup6(final Object self) { 506 final RegExpResult match = Global.instance().getLastRegExpResult(); 507 return match == null ? "" : match.getGroup(6); 508 } 509 510 /** 511 * Getter for non-standard RegExp.$7 property. 512 * @param self self object 513 * @return last regexp input 514 */ 515 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$7") 516 public static Object getGroup7(final Object self) { 517 final RegExpResult match = Global.instance().getLastRegExpResult(); 518 return match == null ? "" : match.getGroup(7); 519 } 520 521 /** 522 * Getter for non-standard RegExp.$8 property. 523 * @param self self object 524 * @return last regexp input 525 */ 526 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$8") 527 public static Object getGroup8(final Object self) { 528 final RegExpResult match = Global.instance().getLastRegExpResult(); 529 return match == null ? "" : match.getGroup(8); 530 } 531 532 /** 533 * Getter for non-standard RegExp.$9 property. 534 * @param self self object 535 * @return last regexp input 536 */ 537 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$9") 538 public static Object getGroup9(final Object self) { 539 final RegExpResult match = Global.instance().getLastRegExpResult(); 540 return match == null ? "" : match.getGroup(9); 541 } 542 543 private RegExpResult execInner(final String string) { 544 final boolean isGlobal = regexp.isGlobal(); 545 int start = getLastIndex(); 546 if (!isGlobal) { 547 start = 0; 548 } 549 550 if (start < 0 || start > string.length()) { 551 if (isGlobal) { 552 setLastIndex(0); 553 } 554 return null; 555 } 556 557 final RegExpMatcher matcher = regexp.match(string); 558 if (matcher == null || !matcher.search(start)) { 559 if (isGlobal) { 560 setLastIndex(0); 561 } 562 return null; 563 } 564 565 if (isGlobal) { 566 setLastIndex(matcher.end()); 567 } 568 569 final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher)); 570 globalObject.setLastRegExpResult(match); 571 return match; 572 } 573 574 // String.prototype.split method ignores the global flag and should not update lastIndex property. 575 private RegExpResult execSplit(final String string, final int start) { 576 if (start < 0 || start > string.length()) { 577 return null; 578 } 579 580 final RegExpMatcher matcher = regexp.match(string); 581 if (matcher == null || !matcher.search(start)) { 582 return null; 583 } 584 585 final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher)); 586 globalObject.setLastRegExpResult(match); 587 return match; 588 } 589 590 /** 591 * Convert java.util.regex.Matcher groups to JavaScript groups. 592 * That is, replace null and groups that didn't match with undefined. 593 */ 594 private Object[] groups(final RegExpMatcher matcher) { 595 final int groupCount = matcher.groupCount(); 596 final Object[] groups = new Object[groupCount + 1]; 597 final BitVector groupsInNegativeLookahead = regexp.getGroupsInNegativeLookahead(); 598 599 for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) { 600 final int groupStart = matcher.start(i); 601 if (lastGroupStart > groupStart 602 || groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i)) { 603 // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated. 604 // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere 605 // in the pattern always return undefined because the negative lookahead must fail. 606 groups[i] = UNDEFINED; 607 continue; 608 } 609 final String group = matcher.group(i); 610 groups[i] = group == null ? UNDEFINED : group; 611 lastGroupStart = groupStart; 612 } 613 return groups; 614 } 615 616 /** 617 * Executes a search for a match within a string based on a regular 618 * expression. It returns an array of information or null if no match is 619 * found. 620 * 621 * @param string String to match. 622 * @return NativeArray of matches, string or null. 623 */ 624 public NativeRegExpExecResult exec(final String string) { 625 final RegExpResult match = execInner(string); 626 627 if (match == null) { 628 return null; 629 } 630 631 return new NativeRegExpExecResult(match, globalObject); 632 } 633 634 /** 635 * Executes a search for a match within a string based on a regular 636 * expression. 637 * 638 * @param string String to match. 639 * @return True if a match is found. 640 */ 641 public boolean test(final String string) { 642 return execInner(string) != null; 643 } 644 645 /** 646 * Searches and replaces the regular expression portion (match) with the 647 * replaced text instead. For the "replacement text" parameter, you can use 648 * the keywords $1 to $2 to replace the original text with values from 649 * sub-patterns defined within the main pattern. 650 * 651 * @param string String to match. 652 * @param replacement Replacement string. 653 * @return String with substitutions. 654 */ 655 String replace(final String string, final String replacement, final ScriptFunction function) throws Throwable { 656 final RegExpMatcher matcher = regexp.match(string); 657 658 if (matcher == null) { 659 return string; 660 } 661 662 if (!regexp.isGlobal()) { 663 if (!matcher.search(0)) { 664 return string; 665 } 666 667 final StringBuilder sb = new StringBuilder(); 668 sb.append(string, 0, matcher.start()); 669 670 if (function != null) { 671 final Object self = function.isStrict() ? UNDEFINED : Global.instance(); 672 sb.append(callReplaceValue(getReplaceValueInvoker(), function, self, matcher, string)); 673 } else { 674 appendReplacement(matcher, string, replacement, sb); 675 } 676 sb.append(string, matcher.end(), string.length()); 677 return sb.toString(); 678 } 679 680 setLastIndex(0); 681 682 if (!matcher.search(0)) { 683 return string; 684 } 685 686 int thisIndex = 0; 687 int previousLastIndex = 0; 688 final StringBuilder sb = new StringBuilder(); 689 690 final MethodHandle invoker = function == null ? null : getReplaceValueInvoker(); 691 final Object self = function == null || function.isStrict() ? UNDEFINED : Global.instance(); 692 693 do { 694 sb.append(string, thisIndex, matcher.start()); 695 if (function != null) { 696 sb.append(callReplaceValue(invoker, function, self, matcher, string)); 697 } else { 698 appendReplacement(matcher, string, replacement, sb); 699 } 700 701 thisIndex = matcher.end(); 702 if (thisIndex == string.length() && matcher.start() == matcher.end()) { 703 // Avoid getting empty match at end of string twice 704 break; 705 } 706 707 // ECMA 15.5.4.10 String.prototype.match(regexp) 708 if (thisIndex == previousLastIndex) { 709 setLastIndex(thisIndex + 1); 710 previousLastIndex = thisIndex + 1; 711 } else { 712 previousLastIndex = thisIndex; 713 } 714 } while (previousLastIndex <= string.length() && matcher.search(previousLastIndex)); 715 716 sb.append(string, thisIndex, string.length()); 717 718 return sb.toString(); 719 } 720 721 private void appendReplacement(final RegExpMatcher matcher, final String text, final String replacement, final StringBuilder sb) { 722 /* 723 * Process substitution patterns: 724 * 725 * $$ -> $ 726 * $& -> the matched substring 727 * $` -> the portion of string that preceeds matched substring 728 * $' -> the portion of string that follows the matched substring 729 * $n -> the nth capture, where n is [1-9] and $n is NOT followed by a decimal digit 730 * $nn -> the nnth capture, where nn is a two digit decimal number [01-99]. 731 */ 732 733 int cursor = 0; 734 Object[] groups = null; 735 736 while (cursor < replacement.length()) { 737 char nextChar = replacement.charAt(cursor); 738 if (nextChar == '$') { 739 // Skip past $ 740 cursor++; 741 if (cursor == replacement.length()) { 742 // nothing after "$" 743 sb.append('$'); 744 break; 745 } 746 747 nextChar = replacement.charAt(cursor); 748 final int firstDigit = nextChar - '0'; 749 750 if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) { 751 // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit. 752 int refNum = firstDigit; 753 cursor++; 754 if (cursor < replacement.length() && firstDigit < matcher.groupCount()) { 755 final int secondDigit = replacement.charAt(cursor) - '0'; 756 if (secondDigit >= 0 && secondDigit <= 9) { 757 final int newRefNum = firstDigit * 10 + secondDigit; 758 if (newRefNum <= matcher.groupCount() && newRefNum > 0) { 759 // $nn ($01-$99) 760 refNum = newRefNum; 761 cursor++; 762 } 763 } 764 } 765 if (refNum > 0) { 766 if (groups == null) { 767 groups = groups(matcher); 768 } 769 // Append group if matched. 770 if (groups[refNum] != UNDEFINED) { 771 sb.append((String) groups[refNum]); 772 } 773 } else { // $0. ignore. 774 assert refNum == 0; 775 sb.append("$0"); 776 } 777 } else if (nextChar == '$') { 778 sb.append('$'); 779 cursor++; 780 } else if (nextChar == '&') { 781 sb.append(matcher.group()); 782 cursor++; 783 } else if (nextChar == '`') { 784 sb.append(text, 0, matcher.start()); 785 cursor++; 786 } else if (nextChar == '\'') { 787 sb.append(text, matcher.end(), text.length()); 788 cursor++; 789 } else { 790 // unknown substitution or $n with n>m. skip. 791 sb.append('$'); 792 } 793 } else { 794 sb.append(nextChar); 795 cursor++; 796 } 797 } 798 } 799 800 private static final Object REPLACE_VALUE = new Object(); 801 802 private static final MethodHandle getReplaceValueInvoker() { 803 return Global.instance().getDynamicInvoker(REPLACE_VALUE, 804 new Callable<MethodHandle>() { 805 @Override 806 public MethodHandle call() { 807 return Bootstrap.createDynamicInvoker("dyn:call", String.class, ScriptFunction.class, Object.class, Object[].class); 808 } 809 }); 810 } 811 812 private String callReplaceValue(final MethodHandle invoker, final ScriptFunction function, final Object self, final RegExpMatcher matcher, final String string) throws Throwable { 813 final Object[] groups = groups(matcher); 814 final Object[] args = Arrays.copyOf(groups, groups.length + 2); 815 816 args[groups.length] = matcher.start(); 817 args[groups.length + 1] = string; 818 819 return (String)invoker.invokeExact(function, self, args); 820 } 821 822 /** 823 * Breaks up a string into an array of substrings based on a regular 824 * expression or fixed string. 825 * 826 * @param string String to match. 827 * @param limit Split limit. 828 * @return Array of substrings. 829 */ 830 NativeArray split(final String string, final long limit) { 831 if (limit == 0L) { 832 return new NativeArray(); 833 } 834 835 final List<Object> matches = new ArrayList<>(); 836 837 RegExpResult match; 838 final int inputLength = string.length(); 839 int splitLastLength = -1; 840 int splitLastIndex = 0; 841 int splitLastLastIndex = 0; 842 843 while ((match = execSplit(string, splitLastIndex)) != null) { 844 splitLastIndex = match.getIndex() + match.length(); 845 846 if (splitLastIndex > splitLastLastIndex) { 847 matches.add(string.substring(splitLastLastIndex, match.getIndex())); 848 final Object[] groups = match.getGroups(); 849 if (groups.length > 1 && match.getIndex() < inputLength) { 850 for (int index = 1; index < groups.length && matches.size() < limit; index++) { 851 matches.add(groups[index]); 852 } 853 } 854 855 splitLastLength = match.length(); 856 857 if (matches.size() >= limit) { 858 break; 859 } 860 } 861 862 // bump the index to avoid infinite loop 863 if (splitLastIndex == splitLastLastIndex) { 864 splitLastIndex++; 865 } else { 866 splitLastLastIndex = splitLastIndex; 867 } 868 } 869 870 if (matches.size() < limit) { 871 // check special case if we need to append an empty string at the 872 // end of the match 873 // if the lastIndex was the entire string 874 if (splitLastLastIndex == string.length()) { 875 if (splitLastLength > 0 || execSplit("", 0) == null) { 876 matches.add(""); 877 } 878 } else { 879 matches.add(string.substring(splitLastLastIndex, inputLength)); 880 } 881 } 882 883 return new NativeArray(matches.toArray()); 884 } 885 886 /** 887 * Tests for a match in a string. It returns the index of the match, or -1 888 * if not found. 889 * 890 * @param string String to match. 891 * @return Index of match. 892 */ 893 int search(final String string) { 894 final RegExpResult match = execInner(string); 895 896 if (match == null) { 897 return -1; 898 } 899 900 return match.getIndex(); 901 } 902 903 /** 904 * Fast lastIndex getter 905 * @return last index property as int 906 */ 907 public int getLastIndex() { 908 return JSType.toInteger(lastIndex); 909 } 910 911 /** 912 * Fast lastIndex getter 913 * @return last index property as boxed integer 914 */ 915 public Object getLastIndexObject() { 916 return lastIndex; 917 } 918 919 /** 920 * Fast lastIndex setter 921 * @param lastIndex lastIndex 922 */ 923 public void setLastIndex(final int lastIndex) { 924 this.lastIndex = JSType.toObject(lastIndex); 925 } 926 927 private static NativeRegExp checkRegExp(final Object self) { 928 if (self instanceof NativeRegExp) { 929 return (NativeRegExp)self; 930 } else if (self != null && self == Global.instance().getRegExpPrototype()) { 931 return Global.instance().DEFAULT_REGEXP; 932 } else { 933 throw typeError("not.a.regexp", ScriptRuntime.safeToString(self)); 934 } 935 } 936 937 boolean getGlobal() { 938 return regexp.isGlobal(); 939 } 940 941 private RegExp getRegExp() { 942 return regexp; 943 } 944 945 private void setRegExp(final RegExp regexp) { 946 this.regexp = regexp; 947 } 948 949 }