src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/JSONParser.java
Print this page
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -23,42 +23,63 @@
* questions.
*/
package jdk.nashorn.internal.parser;
-import static jdk.nashorn.internal.parser.TokenType.COLON;
-import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT;
-import static jdk.nashorn.internal.parser.TokenType.EOF;
-import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
-import static jdk.nashorn.internal.parser.TokenType.RBRACE;
-import static jdk.nashorn.internal.parser.TokenType.RBRACKET;
-import static jdk.nashorn.internal.parser.TokenType.STRING;
import java.util.ArrayList;
import java.util.List;
-import jdk.nashorn.internal.ir.Expression;
-import jdk.nashorn.internal.ir.LiteralNode;
-import jdk.nashorn.internal.ir.Node;
-import jdk.nashorn.internal.ir.ObjectNode;
-import jdk.nashorn.internal.ir.PropertyNode;
-import jdk.nashorn.internal.ir.UnaryNode;
+import jdk.nashorn.internal.codegen.ObjectClassGenerator;
+import jdk.nashorn.internal.objects.Global;
+import jdk.nashorn.internal.runtime.ECMAErrors;
import jdk.nashorn.internal.runtime.ErrorManager;
+import jdk.nashorn.internal.runtime.JSErrorType;
+import jdk.nashorn.internal.runtime.JSType;
+import jdk.nashorn.internal.runtime.ParserException;
+import jdk.nashorn.internal.runtime.Property;
+import jdk.nashorn.internal.runtime.PropertyMap;
+import jdk.nashorn.internal.runtime.ScriptObject;
import jdk.nashorn.internal.runtime.Source;
+import jdk.nashorn.internal.runtime.SpillProperty;
+import jdk.nashorn.internal.runtime.arrays.ArrayData;
+import jdk.nashorn.internal.runtime.arrays.ArrayIndex;
+import jdk.nashorn.internal.scripts.JO;
+
+import static jdk.nashorn.internal.parser.TokenType.STRING;
/**
* Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser.
*
* See: 15.12.1.2 The JSON Syntactic Grammar
*/
-public class JSONParser extends AbstractParser {
+public class JSONParser {
+
+ final private String source;
+ final private Global global;
+ final int length;
+ int pos = 0;
+
+ private static PropertyMap EMPTY_MAP = PropertyMap.newMap();
+
+ private static final int EOF = -1;
+
+ private static final String TRUE = "true";
+ private static final String FALSE = "false";
+ private static final String NULL = "null";
+
+ private static final int STATE_EMPTY = 0;
+ private static final int STATE_ELEMENT_PARSED = 1;
+ private static final int STATE_COMMA_PARSED = 2;
/**
* Constructor
* @param source the source
- * @param errors the error manager
+ * @param global the global object
*/
- public JSONParser(final Source source, final ErrorManager errors) {
- super(source, errors, false, 0);
+ public JSONParser(final String source, final Global global ) {
+ this.source = source;
+ this.global = global;
+ this.length = source.length();
}
/**
* Implementation of the Quote(value) operation as defined in the ECMA script spec
* It wraps a String value in double quotes and escapes characters within in
@@ -112,331 +133,410 @@
return product.toString();
}
/**
- * Public parsed method - start lexing a new token stream for
- * a JSON script
+ * Public parse method. Parse a string into a JSON object.
*
- * @return the JSON literal
+ * @return the parsed JSON Object
*/
- public Node parse() {
- stream = new TokenStream();
+ public Object parse() {
+ final Object value = parseLiteral();
+ skipWhiteSpace();
+ if (pos < length) {
+ throw expectedError(pos, "eof", toString(peek()));
+ }
+ return value;
+ }
+
+ private Object parseLiteral() {
+ skipWhiteSpace();
+
+ final int c = peek();
+ if (c == EOF) {
+ throw expectedError(pos, "json literal", "eof");
+ }
+ switch (c) {
+ case '{':
+ return parseObject();
+ case '[':
+ return parseArray();
+ case '"':
+ return parseString();
+ case 'f':
+ return parseKeyword(FALSE, Boolean.FALSE);
+ case 't':
+ return parseKeyword(TRUE, Boolean.TRUE);
+ case 'n':
+ return parseKeyword(NULL, null);
+ default:
+ if (isDigit(c) || c == '-') {
+ return parseNumber();
+ } else if (c == '.') {
+ throw numberError(pos);
+ } else {
+ throw expectedError(pos, "json literal", toString(c));
+ }
+ }
+ }
- lexer = new Lexer(source, stream) {
+ private Object parseObject() {
+ PropertyMap propertyMap = EMPTY_MAP;
+ ArrayData arrayData = ArrayData.EMPTY_ARRAY;
+ final ArrayList<Object> values = new ArrayList<>();
+ int state = STATE_EMPTY;
- @Override
- protected boolean skipComments() {
- return false;
- }
+ assert peek() == '{';
+ pos++;
- @Override
- protected boolean isStringDelimiter(final char ch) {
- return ch == '\"';
- }
+ while (pos < length) {
+ skipWhiteSpace();
+ final int c = peek();
- // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace
- @Override
- protected boolean isWhitespace(final char ch) {
- return Lexer.isJsonWhitespace(ch);
+ switch (c) {
+ case '"':
+ if (state == STATE_ELEMENT_PARSED) {
+ throw expectedError(pos - 1, ", or }", toString(c));
+ }
+ final String id = parseString();
+ expectColon();
+ final Object value = parseLiteral();
+ final int index = ArrayIndex.getArrayIndex(id);
+ if (ArrayIndex.isValidArrayIndex(index)) {
+ arrayData = addArrayElement(arrayData, index, value);
+ } else {
+ propertyMap = addObjectProperty(propertyMap, values, id, value);
+ }
+ state = STATE_ELEMENT_PARSED;
+ break;
+ case ',':
+ if (state != STATE_ELEMENT_PARSED) {
+ throw error(AbstractParser.message("trailing.comma.in.json"), pos);
+ }
+ state = STATE_COMMA_PARSED;
+ pos++;
+ break;
+ case '}':
+ if (state == STATE_COMMA_PARSED) {
+ throw error(AbstractParser.message("trailing.comma.in.json"), pos);
+ }
+ pos++;
+ return createObject(propertyMap, values, arrayData);
+ default:
+ throw expectedError(pos, ", or }", toString(c));
+ }
+ }
+ throw expectedError(pos, ", or }", "eof");
}
- @Override
- protected boolean isEOL(final char ch) {
- return Lexer.isJsonEOL(ch);
+ private static ArrayData addArrayElement(final ArrayData arrayData, final int index, final Object value) {
+ final long oldLength = arrayData.length();
+ final long longIndex = ArrayIndex.toLongIndex(index);
+ ArrayData newArrayData = arrayData;
+ if (longIndex > oldLength) {
+ if (arrayData.canDelete(oldLength, longIndex - 1, false)) {
+ newArrayData = newArrayData.delete(oldLength, longIndex - 1);
+ }
+ }
+ return newArrayData.ensure(longIndex).set(index, value, false);
}
- // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber
- @Override
- protected void scanNumber() {
- // Record beginning of number.
- final int startPosition = position;
- // Assume value is a decimal.
- TokenType valueType = TokenType.DECIMAL;
+ private static PropertyMap addObjectProperty(final PropertyMap propertyMap, final List<Object> values,
+ final String id, final Object value) {
+ final Property oldProperty = propertyMap.findProperty(id);
+ final Property newProperty;
+ final PropertyMap newMap;
+ final Class<?> type = ObjectClassGenerator.OBJECT_FIELDS_ONLY ? Object.class : getType(value);
- // floating point can't start with a "." with no leading digit before
- if (ch0 == '.') {
- error(Lexer.message("json.invalid.number"), STRING, position, limit);
+ if (oldProperty != null) {
+ values.set(oldProperty.getSlot(), value);
+ newProperty = new SpillProperty(id, 0, oldProperty.getSlot());
+ newProperty.setType(type);
+ newMap = propertyMap.replaceProperty(oldProperty, newProperty);;
+ } else {
+ values.add(value);
+ newProperty = new SpillProperty(id, 0, propertyMap.size());
+ newProperty.setType(type);
+ newMap = propertyMap.addProperty(newProperty);
}
- // First digit of number.
- final int digit = convertDigit(ch0, 10);
+ return newMap;
+ }
- // skip first digit
- skip(1);
+ private Object createObject(final PropertyMap propertyMap, final List<Object> values, final ArrayData arrayData) {
+ final long[] primitiveSpill = new long[values.size()];
+ final Object[] objectSpill = new Object[values.size()];
- if (digit != 0) {
- // Skip over remaining digits.
- while (convertDigit(ch0, 10) != -1) {
- skip(1);
+ for (final Property property : propertyMap.getProperties()) {
+ if (property.getType() == Object.class) {
+ objectSpill[property.getSlot()] = values.get(property.getSlot());
+ } else {
+ primitiveSpill[property.getSlot()] = ObjectClassGenerator.pack((Number) values.get(property.getSlot()));
}
}
- if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
- // Must be a double.
- if (ch0 == '.') {
- // Skip period.
- skip(1);
+ final ScriptObject object = new JO(propertyMap, primitiveSpill, objectSpill);
+ object.setInitialProto(global.getObjectPrototype());
+ object.setArray(arrayData);
+ return object;
+ }
- boolean mantissa = false;
- // Skip mantissa.
- while (convertDigit(ch0, 10) != -1) {
- mantissa = true;
- skip(1);
+ private static Class<?> getType(final Object value) {
+ if (value instanceof Integer) {
+ return int.class;
+ } else if (value instanceof Long) {
+ return long.class;
+ } else if (value instanceof Double) {
+ return double.class;
+ } else {
+ return Object.class;
+ }
}
- if (! mantissa) {
- // no digit after "."
- error(Lexer.message("json.invalid.number"), STRING, position, limit);
+ private void expectColon() {
+ skipWhiteSpace();
+ final int n = next();
+ if (n != ':') {
+ throw expectedError(pos - 1, ":", toString(n));
}
}
- // Detect exponent.
- if (ch0 == 'E' || ch0 == 'e') {
- // Skip E.
- skip(1);
- // Detect and skip exponent sign.
- if (ch0 == '+' || ch0 == '-') {
- skip(1);
+ private Object parseArray() {
+ ArrayData arrayData = ArrayData.EMPTY_ARRAY;
+ int state = STATE_EMPTY;
+
+ assert peek() == '[';
+ pos++;
+
+ while (pos < length) {
+ skipWhiteSpace();
+ final int c = peek();
+
+ switch (c) {
+ case ',':
+ if (state != STATE_ELEMENT_PARSED) {
+ throw error(AbstractParser.message("trailing.comma.in.json"), pos);
+ }
+ state = STATE_COMMA_PARSED;
+ pos++;
+ break;
+ case ']':
+ if (state == STATE_COMMA_PARSED) {
+ throw error(AbstractParser.message("trailing.comma.in.json"), pos);
+ }
+ pos++;
+ return global.wrapAsObject(arrayData);
+ default:
+ if (state == STATE_ELEMENT_PARSED) {
+ throw expectedError(pos, ", or ]", toString(c));
+ }
+ final long index = arrayData.length();
+ arrayData = arrayData.ensure(index).set((int) index, parseLiteral(), true);
+ state = STATE_ELEMENT_PARSED;
+ break;
}
- boolean exponent = false;
- // Skip exponent.
- while (convertDigit(ch0, 10) != -1) {
- exponent = true;
- skip(1);
}
- if (! exponent) {
- // no digit after "E"
- error(Lexer.message("json.invalid.number"), STRING, position, limit);
+ throw expectedError(pos, ", or ]", "eof");
}
+
+ private String parseString() {
+ // String buffer is only instantiated if string contains escape sequences.
+ int start = ++pos;
+ StringBuilder sb = null;
+
+ while (pos < length) {
+ final int c = next();
+ if (c <= 0x1f) {
+ // Characters < 0x1f are not allowed in JSON strings.
+ throw syntaxError(pos, "String contains control character");
+
+ } else if (c == '\\') {
+ if (sb == null) {
+ sb = new StringBuilder(pos - start + 16);
}
+ sb.append(source, start, pos - 1);
+ sb.append(parseEscapeSequence());
+ start = pos;
- valueType = TokenType.FLOATING;
+ } else if (c == '"') {
+ if (sb != null) {
+ sb.append(source, start, pos - 1);
+ return sb.toString();
+ }
+ return source.substring(start, pos - 1);
+ }
}
- // Add number token.
- add(valueType, startPosition);
+ throw error(Lexer.message("missing.close.quote"), pos, length);
}
- // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter
- @Override
- protected boolean isEscapeCharacter(final char ch) {
- switch (ch) {
+ private char parseEscapeSequence() {
+ final int c = next();
+ switch (c) {
case '"':
- case '/':
+ return '"';
case '\\':
+ return '\\';
+ case '/':
+ return '/';
case 'b':
+ return '\b';
case 'f':
+ return '\f';
case 'n':
+ return '\n';
case 'r':
+ return '\r';
case 't':
- // could be unicode escape
+ return '\t';
case 'u':
- return true;
+ return parseUnicodeEscape();
default:
- return false;
+ throw error(Lexer.message("invalid.escape.char"), pos - 1, length);
}
}
- };
- k = -1;
-
- next();
-
- final Node resultNode = jsonLiteral();
- expect(EOF);
+ private char parseUnicodeEscape() {
+ return (char) (parseHexDigit() << 12 | parseHexDigit() << 8 | parseHexDigit() << 4 | parseHexDigit());
+ }
- return resultNode;
+ private int parseHexDigit() {
+ final int c = next();
+ if (c >= '0' && c <= '9') {
+ return c - '0';
+ } else if (c >= 'A' && c <= 'F') {
+ return c + 10 - 'A';
+ } else if (c >= 'a' && c <= 'f') {
+ return c + 10 - 'a';
+ }
+ throw error(Lexer.message("invalid.hex"), pos - 1, length);
}
- @SuppressWarnings("fallthrough")
- private LiteralNode<?> getStringLiteral() {
- final LiteralNode<?> literal = getLiteral();
- final String str = (String)literal.getValue();
+ private boolean isDigit(final int c) {
+ return c >= '0' && c <= '9';
+ }
- for (int i = 0; i < str.length(); i++) {
- final char ch = str.charAt(i);
- switch (ch) {
- default:
- if (ch > 0x001f) {
+ private void skipDigits() {
+ while (pos < length) {
+ final int c = peek();
+ if (!isDigit(c)) {
break;
}
- case '"':
- case '\\':
- throw error(AbstractParser.message("unexpected.token", str));
+ pos++;
}
}
- return literal;
- }
+ private Number parseNumber() {
+ final int start = pos;
+ int c = next();
- /**
- * Parse a JSON literal from the token stream
- * @return the JSON literal as a Node
- */
- private Expression jsonLiteral() {
- final long literalToken = token;
-
- switch (type) {
- case STRING:
- return getStringLiteral();
- case ESCSTRING:
- case DECIMAL:
- case FLOATING:
- return getLiteral();
- case FALSE:
- next();
- return LiteralNode.newInstance(literalToken, finish, false);
- case TRUE:
- next();
- return LiteralNode.newInstance(literalToken, finish, true);
- case NULL:
- next();
- return LiteralNode.newInstance(literalToken, finish);
- case LBRACKET:
- return arrayLiteral();
- case LBRACE:
- return objectLiteral();
- /*
- * A.8.1 JSON Lexical Grammar
- *
- * JSONNumber :: See 15.12.1.1
- * -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt
- */
- case SUB:
- next();
-
- final long realToken = token;
- final Object value = getValue();
-
- if (value instanceof Number) {
- next();
- return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value));
+ if (c == '-') {
+ c = next();
}
-
- throw error(AbstractParser.message("expected", "number", type.getNameOrType()));
- default:
- break;
+ if (!isDigit(c)) {
+ throw numberError(start);
}
-
- throw error(AbstractParser.message("expected", "json literal", type.getNameOrType()));
+ // no more digits allowed after 0
+ if (c != '0') {
+ skipDigits();
}
- /**
- * Parse an array literal from the token stream
- * @return the array literal as a Node
- */
- private LiteralNode<Expression[]> arrayLiteral() {
- // Unlike JavaScript array literals, elison is not permitted in JSON.
-
- // Capture LBRACKET token.
- final long arrayToken = token;
- // LBRACKET tested in caller.
- next();
-
- LiteralNode<Expression[]> result = null;
- // Prepare to accummulating elements.
- final List<Expression> elements = new ArrayList<>();
-
-loop:
- while (true) {
- switch (type) {
- case RBRACKET:
- next();
- result = LiteralNode.newInstance(arrayToken, finish, elements);
- break loop;
-
- case COMMARIGHT:
- next();
- // check for trailing comma - not allowed in JSON
- if (type == RBRACKET) {
- throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType()));
+ // fraction
+ if (peek() == '.') {
+ pos++;
+ if (!isDigit(next())) {
+ throw numberError(pos - 1);
+ }
+ skipDigits();
}
- break;
- default:
- // Add expression element.
- elements.add(jsonLiteral());
- // Comma between array elements is mandatory in JSON.
- if (type != COMMARIGHT && type != RBRACKET) {
- throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType()));
+ // exponent
+ c = peek();
+ if (c == 'e' || c == 'E') {
+ pos++;
+ c = next();
+ if (c == '-' || c == '+') {
+ c = next();
}
- break;
+ if (!isDigit(c)) {
+ throw numberError(pos - 1);
}
+ skipDigits();
}
- return result;
+ final double d = Double.parseDouble(source.substring(start, pos));
+ if (JSType.isRepresentableAsInt(d)) {
+ return (int) d;
+ } else if (JSType.isRepresentableAsLong(d)) {
+ return (long) d;
+ }
+ return d;
}
- /**
- * Parse an object literal from the token stream
- * @return the object literal as a Node
- */
- private ObjectNode objectLiteral() {
- // Capture LBRACE token.
- final long objectToken = token;
- // LBRACE tested in caller.
- next();
-
- // Prepare to accumulate elements.
- final List<PropertyNode> elements = new ArrayList<>();
-
- // Create a block for the object literal.
-loop:
- while (true) {
- switch (type) {
- case RBRACE:
- next();
- break loop;
-
- case COMMARIGHT:
- next();
- // check for trailing comma - not allowed in JSON
- if (type == RBRACE) {
- throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType()));
+ private Object parseKeyword(final String keyword, final Object value) {
+ if (!source.regionMatches(pos, keyword, 0, keyword.length())) {
+ throw expectedError(pos, "json literal", "ident");
+ }
+ pos += keyword.length();
+ return value;
}
- break;
- default:
- // Get and add the next property.
- final PropertyNode property = propertyAssignment();
- elements.add(property);
+ private int peek() {
+ if (pos >= length) {
+ return -1;
+ }
+ return source.charAt(pos);
+ }
- // Comma between property assigments is mandatory in JSON.
- if (type != RBRACE && type != COMMARIGHT) {
- throw error(AbstractParser.message("expected", ", or }", type.getNameOrType()));
+ private int next() {
+ final int next = peek();
+ pos++;
+ return next;
}
+
+ private void skipWhiteSpace() {
+ while (pos < length) {
+ switch (peek()) {
+ case '\t':
+ case '\r':
+ case '\n':
+ case ' ':
+ pos++;
break;
+ default:
+ return;
+ }
}
}
- // Construct new object literal.
- return new ObjectNode(objectToken, finish, elements);
+ private static String toString(final int c) {
+ return c == EOF ? "eof" : String.valueOf((char) c);
}
- /**
- * Parse a property assignment from the token stream
- * @return the property assignment as a Node
- */
- private PropertyNode propertyAssignment() {
- // Capture firstToken.
- final long propertyToken = token;
- LiteralNode<?> name = null;
+ ParserException error(final String message, final int start, final int length) throws ParserException {
+ final long token = Token.toDesc(STRING, start, length);
+ final int pos = Token.descPosition(token);
+ final Source src = Source.sourceFor("<json>", source);
+ final int lineNum = src.getLine(pos);
+ final int columnNum = src.getColumn(pos);
+ final String formatted = ErrorManager.format(message, src, lineNum, columnNum, token);
+ return new ParserException(JSErrorType.SYNTAX_ERROR, formatted, src, lineNum, columnNum, token);
+ }
- if (type == STRING) {
- name = getStringLiteral();
- } else if (type == ESCSTRING) {
- name = getLiteral();
+ private ParserException error(final String message, final int start) {
+ return error(message, start, length);
}
- if (name != null) {
- expect(COLON);
- final Expression value = jsonLiteral();
- return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null);
+ private ParserException numberError(final int start) {
+ return error(Lexer.message("json.invalid.number"), start);
}
- // Raise an error.
- throw error(AbstractParser.message("expected", "string", type.getNameOrType()));
+ private ParserException expectedError(final int start, final String expected, final String found) {
+ return error(AbstractParser.message("expected", expected, found), start);
}
+ private ParserException syntaxError(final int start, final String reason) {
+ final String message = ECMAErrors.getMessage("syntax.error.invalid.json", reason);
+ return error(message, start);
+ }
}