1 /*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package jdk.nashorn.internal.objects;
27
28 import static jdk.nashorn.internal.runtime.ECMAErrors.typeError;
29 import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED;
30
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.List;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36 import jdk.nashorn.internal.objects.annotations.Attribute;
37 import jdk.nashorn.internal.objects.annotations.Constructor;
38 import jdk.nashorn.internal.objects.annotations.Function;
39 import jdk.nashorn.internal.objects.annotations.Getter;
40 import jdk.nashorn.internal.objects.annotations.Property;
41 import jdk.nashorn.internal.objects.annotations.ScriptClass;
42 import jdk.nashorn.internal.objects.annotations.SpecializedConstructor;
43 import jdk.nashorn.internal.parser.RegExp;
44 import jdk.nashorn.internal.runtime.BitVector;
45 import jdk.nashorn.internal.runtime.JSType;
46 import jdk.nashorn.internal.runtime.ParserException;
47 import jdk.nashorn.internal.runtime.RegExpMatch;
48 import jdk.nashorn.internal.runtime.ScriptFunction;
49 import jdk.nashorn.internal.runtime.ScriptObject;
50 import jdk.nashorn.internal.runtime.ScriptRuntime;
51
52 /**
53 * ECMA 15.10 RegExp Objects.
54 */
55 @ScriptClass("RegExp")
56 public final class NativeRegExp extends ScriptObject {
57 /** ECMA 15.10.7.5 lastIndex property */
58 @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
59 public Object lastIndex;
60
61 /** Pattern string. */
62 private String input;
63
64 /** Global search flag for this regexp. */
65 private boolean global;
66
67 /** Case insensitive flag for this regexp */
68 private boolean ignoreCase;
69
70 /** Multi-line flag for this regexp */
71 private boolean multiline;
72
73 /** Java regex pattern to use for match. We compile to one of these */
74 private Pattern pattern;
75
76 private BitVector groupsInNegativeLookahead;
77
78 /*
79 public NativeRegExp() {
80 init();
81 }*/
82
83 NativeRegExp(final String input, final String flagString) {
84 RegExp regExp = null;
85 try {
86 regExp = new RegExp(input, flagString);
87 } catch (final ParserException e) {
88 // translate it as SyntaxError object and throw it
89 e.throwAsEcmaException();
90 throw new AssertionError(); //guard against null warnings below
91 }
92
93 this.setLastIndex(0);
94 this.input = regExp.getInput();
95 this.global = regExp.isGlobal();
96 this.ignoreCase = regExp.isIgnoreCase();
97 this.multiline = regExp.isMultiline();
98 this.pattern = regExp.getPattern();
99 this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
100
101 init();
102 }
103
104 NativeRegExp(final String string) {
105 this(string, "");
106 }
107
108 NativeRegExp(final NativeRegExp regExp) {
109 this.input = regExp.getInput();
110 this.global = regExp.getGlobal();
111 this.multiline = regExp.getMultiline();
112 this.ignoreCase = regExp.getIgnoreCase();
113 this.lastIndex = regExp.getLastIndexObject();
114 this.pattern = regExp.getPattern();
115 this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
116
117 init();
118 }
119
120 NativeRegExp(final Pattern pattern) {
121 this.input = pattern.pattern();
122 this.multiline = (pattern.flags() & Pattern.MULTILINE) != 0;
123 this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
124 this.lastIndex = 0;
125 this.pattern = pattern;
126
127 init();
128 }
129
130 @Override
131 public String getClassName() {
132 return "RegExp";
133 }
134
135 /**
136 * ECMA 15.10.4
137 *
138 * Constructor
139 *
140 * @param isNew is the new operator used for instantiating this regexp
141 * @param self self reference
142 * @param args arguments (optional: pattern and flags)
143 * @return new NativeRegExp
144 */
145 @Constructor(arity = 2)
146 public static Object constructor(final boolean isNew, final Object self, final Object... args) {
147 if (args.length > 1) {
148 return newRegExp(args[0], args[1]);
149 } else if (args.length > 0) {
150 return newRegExp(args[0], UNDEFINED);
151 }
152
153 return newRegExp(UNDEFINED, UNDEFINED);
154 }
155
156 /**
157 * ECMA 15.10.4
158 *
159 * Constructor - specialized version, no args, empty regexp
160 *
161 * @param isNew is the new operator used for instantiating this regexp
162 * @param self self reference
163 * @return new NativeRegExp
164 */
165 @SpecializedConstructor
166 public static Object constructor(final boolean isNew, final Object self) {
167 return new NativeRegExp("", "");
168 }
169
170 /**
171 * ECMA 15.10.4
172 *
173 * Constructor - specialized version, pattern, no flags
174 *
175 * @param isNew is the new operator used for instantiating this regexp
176 * @param self self reference
177 * @param pattern pattern
178 * @return new NativeRegExp
179 */
180 @SpecializedConstructor
181 public static Object constructor(final boolean isNew, final Object self, final Object pattern) {
182 return newRegExp(pattern, UNDEFINED);
183 }
184
185 /**
186 * ECMA 15.10.4
187 *
188 * Constructor - specialized version, pattern and flags
189 *
190 * @param isNew is the new operator used for instantiating this regexp
191 * @param self self reference
192 * @param pattern pattern
193 * @param flags flags
194 * @return new NativeRegExp
195 */
196 @SpecializedConstructor
197 public static Object constructor(final boolean isNew, final Object self, final Object pattern, final Object flags) {
198 return newRegExp(pattern, flags);
199 }
200
201 /**
202 * External constructor used in generated code created by {@link jdk.nashorn.internal.codegen.CodeGenerator}, which
203 * explain the {@code public} access.
204 *
205 * @param regexp regexp
206 * @param flags flags
207 * @return new NativeRegExp
208 */
209 public static NativeRegExp newRegExp(final Object regexp, final Object flags) {
210 String patternString = "";
211 String flagString = "";
212 boolean flagsDefined = false;
213
214 if (flags != UNDEFINED) {
215 flagsDefined = true;
216 flagString = JSType.toString(flags);
217 }
218
219 if (regexp != UNDEFINED) {
220 if (regexp instanceof NativeRegExp) {
221 if (!flagsDefined) {
222 return (NativeRegExp)regexp; // 15.10.3.1 - undefined flags and regexp as
223 }
224 typeError("regex.cant.supply.flags");
225 }
226 patternString = JSType.toString(regexp);
227 }
228
229 return new NativeRegExp(patternString, flagString);
230 }
231
232 private String getFlagString() {
233 final StringBuilder sb = new StringBuilder();
234
235 if (global) {
236 sb.append('g');
237 }
238 if (ignoreCase) {
239 sb.append('i');
240 }
241 if (multiline) {
242 sb.append('m');
243 }
244
245 return sb.toString();
246 }
247
248 @Override
249 public String safeToString() {
250 return "[RegExp " + toString() + "]";
251 }
252
253 @Override
254 public String toString() {
255 return "/" + input + "/" + getFlagString();
256 }
257
258 /**
259 * Nashorn extension: RegExp.prototype.compile - everybody implements this!
260 *
261 * @param self self reference
262 * @param pattern pattern
263 * @param flags flags
264 * @return new NativeRegExp
265 */
266 @Function(attributes = Attribute.NOT_ENUMERABLE)
267 public static Object compile(final Object self, final Object pattern, final Object flags) {
268 final NativeRegExp regExp = checkRegExp(self);
269 final NativeRegExp compiled = newRegExp(pattern, flags);
270 // copy over fields to 'self'
271 regExp.setInput(compiled.getInput());
272 regExp.setGlobal(compiled.getGlobal());
273 regExp.setIgnoreCase(compiled.getIgnoreCase());
274 regExp.setMultiline(compiled.getMultiline());
275 regExp.setPattern(compiled.getPattern());
276 regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
277
278 // Some implementations return undefined. Some return 'self'. Since return
279 // value is most likely be ignored, we can play safe and return 'self'.
280 return regExp;
281 }
282
283 /**
284 * ECMA 15.10.6.2 RegExp.prototype.exec(string)
285 *
286 * @param self self reference
287 * @param string string to match against regexp
288 * @return array containing the matches or {@code null} if no match
289 */
290 @Function(attributes = Attribute.NOT_ENUMERABLE)
291 public static Object exec(final Object self, final Object string) {
292 return checkRegExp(self).exec(JSType.toString(string));
293 }
294
295 /**
296 * ECMA 15.10.6.3 RegExp.prototype.test(string)
297 *
298 * @param self self reference
299 * @param string string to test for matches against regexp
300 * @return true if matches found, false otherwise
301 */
302 @Function(attributes = Attribute.NOT_ENUMERABLE)
303 public static Object test(final Object self, final Object string) {
304 return checkRegExp(self).test(JSType.toString(string));
305 }
306
307 /**
308 * ECMA 15.10.6.4 RegExp.prototype.toString()
309 *
310 * @param self self reference
311 * @return string version of regexp
312 */
313 @Function(attributes = Attribute.NOT_ENUMERABLE)
314 public static Object toString(final Object self) {
315 return checkRegExp(self).toString();
316 }
317
318 /**
319 * ECMA 15.10.7.1 source
320 *
321 * @param self self reference
322 * @return the input string for the regexp
323 */
324 @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
325 public static Object source(final Object self) {
326 return checkRegExp(self).input;
327 }
328
329 /**
330 * ECMA 15.10.7.2 global
331 *
332 * @param self self reference
333 * @return true if this regexp is flagged global, false otherwise
334 */
335 @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
336 public static Object global(final Object self) {
337 return checkRegExp(self).global;
338 }
339
340 /**
341 * ECMA 15.10.7.3 ignoreCase
342 *
343 * @param self self reference
344 * @return true if this regexp if flagged to ignore case, false otherwise
345 */
346 @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
347 public static Object ignoreCase(final Object self) {
348 return checkRegExp(self).ignoreCase;
349 }
350
351 /**
352 * ECMA 15.10.7.4 multiline
353 *
354 * @param self self reference
355 * @return true if this regexp is flagged to be multiline, false otherwise
356 */
357 @Getter(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE | Attribute.NOT_WRITABLE)
358 public static Object multiline(final Object self) {
359 return checkRegExp(self).multiline;
360 }
361
362 private RegExpMatch execInner(final String string) {
363 if (this.pattern == null) {
364 return null; // never matches or similar, e.g. a[]
365 }
366
367 final Matcher matcher = pattern.matcher(string);
368 final int start = this.global ? getLastIndex() : 0;
369
370 if (start < 0 || start > string.length()) {
371 setLastIndex(0);
372 return null;
373 }
374
375 if (!matcher.find(start)) {
376 setLastIndex(0);
377 return null;
378 }
379
380 if (global) {
381 setLastIndex(matcher.end());
382 }
383
384 return new RegExpMatch(string, matcher.start(), groups(matcher));
385 }
386
387 /**
388 * Convert java.util.regex.Matcher groups to JavaScript groups.
389 * That is, replace null and groups that didn't match with undefined.
390 */
391 private Object[] groups(final Matcher matcher) {
392 final int groupCount = matcher.groupCount();
393 final Object[] groups = new Object[groupCount + 1];
394 for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
395 final int groupStart = matcher.start(i);
396 if (lastGroupStart > groupStart
397 || (groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i))) {
398 // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
399 // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
400 // in the pattern always return undefined because the negative lookahead must fail.
401 groups[i] = UNDEFINED;
402 continue;
403 }
404 final String group = matcher.group(i);
405 groups[i] = group == null ? UNDEFINED : group;
406 lastGroupStart = groupStart;
407 }
408 return groups;
409 }
410
411 /**
412 * Executes a search for a match within a string based on a regular
413 * expression. It returns an array of information or null if no match is
414 * found.
415 *
416 * @param string String to match.
417 * @return NativeArray of matches, string or null.
418 */
419 public Object exec(final String string) {
420 final RegExpMatch m = execInner(string);
421 // the input string
422 if (m == null) {
423 return null;
424 }
425
426 return new NativeRegExpExecResult(m);
427 }
428
429 /**
430 * Executes a search for a match within a string based on a regular
431 * expression.
432 *
433 * @param string String to match.
434 * @return True if a match is found.
435 */
436 public Object test(final String string) {
437 return exec(string) != null;
438 }
439
440 /**
441 * Searches and replaces the regular expression portion (match) with the
442 * replaced text instead. For the "replacement text" parameter, you can use
443 * the keywords $1 to $2 to replace the original text with values from
444 * sub-patterns defined within the main pattern.
445 *
446 * @param string String to match.
447 * @param replacement Replacement string.
448 * @return String with substitutions.
449 */
450 Object replace(final String string, final String replacement, final ScriptFunction function) {
451 final Matcher matcher = pattern.matcher(string);
452 /*
453 * $$ -> $
454 * $& -> the matched substring
455 * $` -> the portion of string that preceeds matched substring
456 * $' -> the portion of string that follows the matched substring
457 * $n -> the nth capture, where n is [1-9] and $n is NOT followed by a decimal digit
458 * $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
459 */
460 String replace = replacement;
461
462 if (!global) {
463 if (!matcher.find()) {
464 return string;
465 }
466
467 final StringBuilder sb = new StringBuilder();
468 if (function != null) {
469 replace = callReplaceValue(function, matcher, string);
470 }
471 appendReplacement(matcher, string, replace, sb, 0);
472 sb.append(string, matcher.end(), string.length());
473 return sb.toString();
474 }
475
476 int end = 0; // a.k.a. lastAppendPosition
477 setLastIndex(0);
478
479 boolean found;
480 try {
481 found = matcher.find(end);
482 } catch (final IndexOutOfBoundsException e) {
483 found = false;
484 }
485
486 if (!found) {
487 return string;
488 }
489
490 int previousLastIndex = 0;
491 final StringBuilder sb = new StringBuilder();
492 do {
493 if (function != null) {
494 replace = callReplaceValue(function, matcher, string);
495 }
496 appendReplacement(matcher, string, replace, sb, end);
497 end = matcher.end();
498
499 // ECMA 15.5.4.10 String.prototype.match(regexp)
500 final int thisIndex = end;
501 if (thisIndex == previousLastIndex) {
502 setLastIndex(thisIndex + 1);
503 previousLastIndex = thisIndex + 1;
504 } else {
505 previousLastIndex = thisIndex;
506 }
507 } while (matcher.find());
508
509 sb.append(string, end, string.length());
510
511 return sb.toString();
512 }
513
514 private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
515 // Process substitution string to replace group references with groups
516 int cursor = 0;
517 final StringBuilder result = new StringBuilder();
518 Object[] groups = null;
519
520 while (cursor < replacement.length()) {
521 char nextChar = replacement.charAt(cursor);
522 if (nextChar == '$') {
523 // Skip past $
524 cursor++;
525 nextChar = replacement.charAt(cursor);
526 final int firstDigit = nextChar - '0';
527
528 if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) {
529 // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
530 int refNum = firstDigit;
531 cursor++;
532 if (cursor < replacement.length() && firstDigit < matcher.groupCount()) {
533 final int secondDigit = replacement.charAt(cursor) - '0';
534 if ((secondDigit >= 0) && (secondDigit <= 9)) {
535 final int newRefNum = (firstDigit * 10) + secondDigit;
536 if (newRefNum <= matcher.groupCount() && newRefNum > 0) {
537 // $nn ($01-$99)
538 refNum = newRefNum;
539 cursor++;
540 }
541 }
542 }
543 if (refNum > 0) {
544 if (groups == null) {
545 groups = groups(matcher);
546 }
547 // Append group if matched.
548 if (groups[refNum] != UNDEFINED) {
549 result.append((String) groups[refNum]);
550 }
551 } else { // $0. ignore.
552 assert refNum == 0;
553 result.append("$0");
554 }
555 } else if (nextChar == '$') {
556 result.append('$');
557 cursor++;
558 } else if (nextChar == '&') {
559 result.append(matcher.group());
560 cursor++;
561 } else if (nextChar == '`') {
562 result.append(text.substring(0, matcher.start()));
563 cursor++;
564 } else if (nextChar == '\'') {
565 result.append(text.substring(matcher.end()));
566 cursor++;
567 } else {
568 // unknown substitution or $n with n>m. skip.
569 result.append('$');
570 }
571 } else {
572 result.append(nextChar);
573 cursor++;
574 }
575 }
576 // Append the intervening text
577 sb.append(text, lastAppendPosition, matcher.start());
578 // Append the match substitution
579 sb.append(result);
580 }
581
582 private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
583 final Object[] groups = groups(matcher);
584 final Object[] args = Arrays.copyOf(groups, groups.length + 2);
585
586 args[groups.length] = matcher.start();
587 args[groups.length + 1] = string;
588
589 final Object self = function.isStrict() ? UNDEFINED : Global.instance();
590
591 return JSType.toString(ScriptRuntime.apply(function, self, args));
592 }
593
594 /**
595 * Breaks up a string into an array of substrings based on a regular
596 * expression or fixed string.
597 *
598 * @param string String to match.
599 * @param limit Split limit.
600 * @return Array of substrings.
601 */
602 Object split(final String string, final long limit) {
603 return split(this, string, limit);
604 }
605
606 private static Object split(final NativeRegExp regexp0, final String input, final long limit) {
607 final List<Object> matches = new ArrayList<>();
608
609 final NativeRegExp regexp = new NativeRegExp(regexp0);
610 regexp.setGlobal(true);
611
612 if (limit == 0L) {
613 return new NativeArray();
614 }
615
616 RegExpMatch match;
617 final int inputLength = input.length();
618 int lastLength = -1;
619 int lastLastIndex = 0;
620
621 while ((match = regexp.execInner(input)) != null) {
622 final int lastIndex = match.getIndex() + match.length();
623
624 if (lastIndex > lastLastIndex) {
625 matches.add(input.substring(lastLastIndex, match.getIndex()));
626 if (match.getGroups().length > 1 && match.getIndex() < inputLength) {
627 matches.addAll(Arrays.asList(match.getGroups()).subList(1, match.getGroups().length));
628 }
629
630 lastLength = match.length();
631 lastLastIndex = lastIndex;
632
633 if (matches.size() >= limit) {
634 break;
635 }
636 }
637
638 // bump the index to avoid infinite loop
639 if (regexp.getLastIndex() == match.getIndex()) {
640 regexp.setLastIndex(match.getIndex() + 1);
641 }
642 }
643
644 if (matches.size() < limit) {
645 // check special case if we need to append an empty string at the
646 // end of the match
647 // if the lastIndex was the entire string
648 if (lastLastIndex == input.length()) {
649 if (lastLength > 0 || regexp.test("") == Boolean.FALSE) {
650 matches.add("");
651 }
652 } else {
653 matches.add(input.substring(lastLastIndex, inputLength));
654 }
655 }
656
657 return new NativeArray(matches.toArray());
658 }
659
660 /**
661 * Tests for a match in a string. It returns the index of the match, or -1
662 * if not found.
663 *
664 * @param string String to match.
665 * @return Index of match.
666 */
667 Object search(final String string) {
668 final Matcher matcher = pattern.matcher(string);
669
670 int start = 0;
671 if (global) {
672 start = getLastIndex();
673 }
674
675 start = matcher.find(start) ? matcher.start() : -1;
676
677 if (global) {
678 setLastIndex(matcher.end());
679 }
680
681 return start;
682 }
683
684 /**
685 * Fast lastIndex getter
686 * @return last index property as int
687 */
688 public int getLastIndex() {
689 return JSType.toInt32(lastIndex);
690 }
691
692 /**
693 * Fast lastIndex getter
694 * @return last index property as boxed integer
695 */
696 public Object getLastIndexObject() {
697 return lastIndex;
698 }
699
700 /**
701 * Fast lastIndex setter
702 * @param lastIndex lastIndex
703 */
704 public void setLastIndex(final int lastIndex) {
705 this.lastIndex = JSType.toObject(lastIndex);
706 }
707
708 private void init() {
709 this.setProto(Global.instance().getRegExpPrototype());
710 }
711
712 private static NativeRegExp checkRegExp(final Object self) {
713 Global.checkObjectCoercible(self);
714 if (self instanceof NativeRegExp) {
715 return (NativeRegExp)self;
716 } else if (self != null && self == Global.instance().getRegExpPrototype()) {
717 return Global.instance().DEFAULT_REGEXP;
718 } else {
719 typeError("not.a.regexp", ScriptRuntime.safeToString(self));
720 return null;
721 }
722 }
723
724 private String getInput() {
725 return input;
726 }
727
728 private void setInput(final String input) {
729 this.input = input;
730 }
731
732 boolean getGlobal() {
733 return global;
734 }
735
736 private void setGlobal(final boolean global) {
737 this.global = global;
738 }
739
740 private boolean getIgnoreCase() {
741 return ignoreCase;
742 }
743
744 private void setIgnoreCase(final boolean ignoreCase) {
745 this.ignoreCase = ignoreCase;
746 }
747
748 private boolean getMultiline() {
749 return multiline;
750 }
751
752 private void setMultiline(final boolean multiline) {
753 this.multiline = multiline;
754 }
755
756 private Pattern getPattern() {
757 return pattern;
758 }
759
760 private void setPattern(final Pattern pattern) {
761 this.pattern = pattern;
762 }
763
764 private BitVector getGroupsInNegativeLookahead() {
765 return groupsInNegativeLookahead;
766 }
767
768 private void setGroupsInNegativeLookahead(final BitVector groupsInNegativeLookahead) {
769 this.groupsInNegativeLookahead = groupsInNegativeLookahead;
770 }
771
772 }
--- EOF ---