1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.COLON;
  29 import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT;
  30 import static jdk.nashorn.internal.parser.TokenType.EOF;
  31 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  32 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  33 import static jdk.nashorn.internal.parser.TokenType.RBRACKET;
  34 import static jdk.nashorn.internal.parser.TokenType.STRING;
  35 
  36 import java.util.ArrayList;
  37 import java.util.List;
  38 import jdk.nashorn.internal.ir.LiteralNode;
  39 import jdk.nashorn.internal.ir.Node;
  40 import jdk.nashorn.internal.ir.ObjectNode;
  41 import jdk.nashorn.internal.ir.PropertyNode;
  42 import jdk.nashorn.internal.ir.UnaryNode;
  43 import jdk.nashorn.internal.runtime.ErrorManager;
  44 import jdk.nashorn.internal.runtime.Source;
  45 
  46 /**
  47  * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser.
  48  *
  49  * See: 15.12.1.2 The JSON Syntactic Grammar
  50  */
  51 public class JSONParser extends AbstractParser {
  52 
  53     /**
  54      * Constructor
  55      * @param source  the source
  56      * @param errors  the error manager
  57      */
  58     public JSONParser(final Source source, final ErrorManager errors) {
  59         super(source, errors, false);
  60     }
  61 
  62     /**
  63      * Implementation of the Quote(value) operation as defined in the ECMA script spec
  64      * It wraps a String value in double quotes and escapes characters within in
  65      *
  66      * @param value string to quote
  67      *
  68      * @return quoted and escaped string
  69      */
  70     public static String quote(final String value) {
  71 
  72         final StringBuilder product = new StringBuilder();
  73 
  74         product.append("\"");
  75 
  76         for (final char ch : value.toCharArray()) {
  77             // TODO: should use a table?
  78             switch (ch) {
  79             case '\\':
  80                 product.append("\\\\");
  81                 break;
  82             case '"':
  83                 product.append("\\\"");
  84                 break;
  85             case '\b':
  86                 product.append("\\b");
  87                 break;
  88             case '\f':
  89                 product.append("\\f");
  90                 break;
  91             case '\n':
  92                 product.append("\\n");
  93                 break;
  94             case '\r':
  95                 product.append("\\r");
  96                 break;
  97             case '\t':
  98                 product.append("\\t");
  99                 break;
 100             default:
 101                 if (ch < ' ') {
 102                     product.append(Lexer.unicodeEscape(ch));
 103                     break;
 104                 }
 105 
 106                 product.append(ch);
 107                 break;
 108             }
 109         }
 110 
 111         product.append("\"");
 112 
 113         return product.toString();
 114     }
 115 
 116     /**
 117      * Public parsed method - start lexing a new token stream for
 118      * a JSON script
 119      *
 120      * @return the JSON literal
 121      */
 122     public Node parse() {
 123         stream = new TokenStream();
 124 
 125         lexer = new Lexer(source, stream) {
 126 
 127             @Override
 128             protected boolean skipComments() {
 129                 return false;
 130             }
 131 
 132             @Override
 133             protected boolean isStringDelimiter(final char ch) {
 134                 return ch == '\"';
 135             }
 136 
 137             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace
 138             @Override
 139             protected boolean isWhitespace(final char ch) {
 140                 return Lexer.isJsonWhitespace(ch);
 141             }
 142 
 143             @Override
 144             protected boolean isEOL(final char ch) {
 145                 return Lexer.isJsonEOL(ch);
 146             }
 147 
 148             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber
 149             @Override
 150             protected void scanNumber() {
 151                 // Record beginning of number.
 152                 final int start = position;
 153                 // Assume value is a decimal.
 154                 TokenType type = TokenType.DECIMAL;
 155 
 156                 // floating point can't start with a "." with no leading digit before
 157                 if (ch0 == '.') {
 158                     error(Lexer.message("json.invalid.number"), STRING, position, limit);
 159                 }
 160 
 161                 // First digit of number.
 162                 int digit = convertDigit(ch0, 10);
 163 
 164                 // skip first digit
 165                 skip(1);
 166 
 167                 if (digit != 0) {
 168                     // Skip over remaining digits.
 169                     while (convertDigit(ch0, 10) != -1) {
 170                         skip(1);
 171                     }
 172                 }
 173 
 174                 if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
 175                     // Must be a double.
 176                     if (ch0 == '.') {
 177                         // Skip period.
 178                         skip(1);
 179 
 180                         boolean mantissa = false;
 181                         // Skip mantissa.
 182                         while (convertDigit(ch0, 10) != -1) {
 183                             mantissa = true;
 184                             skip(1);
 185                         }
 186 
 187                         if (! mantissa) {
 188                             // no digit after "."
 189                             error(Lexer.message("json.invalid.number"), STRING, position, limit);
 190                         }
 191                     }
 192 
 193                     // Detect exponent.
 194                     if (ch0 == 'E' || ch0 == 'e') {
 195                         // Skip E.
 196                         skip(1);
 197                         // Detect and skip exponent sign.
 198                         if (ch0 == '+' || ch0 == '-') {
 199                             skip(1);
 200                         }
 201                         boolean exponent = false;
 202                         // Skip exponent.
 203                         while (convertDigit(ch0, 10) != -1) {
 204                             exponent = true;
 205                             skip(1);
 206                         }
 207 
 208                         if (! exponent) {
 209                             // no digit after "E"
 210                             error(Lexer.message("json.invalid.number"), STRING, position, limit);
 211                         }
 212                     }
 213 
 214                     type = TokenType.FLOATING;
 215                 }
 216 
 217                 // Add number token.
 218                 add(type, start);
 219             }
 220 
 221             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter
 222             @Override
 223             protected boolean isEscapeCharacter(final char ch) {
 224                 switch (ch) {
 225                     case '"':
 226                     case '/':
 227                     case '\\':
 228                     case 'b':
 229                     case 'f':
 230                     case 'n':
 231                     case 'r':
 232                     case 't':
 233                     // could be unicode escape
 234                     case 'u':
 235                         return true;
 236                     default:
 237                         return false;
 238                 }
 239             }
 240         };
 241 
 242         k = -1;
 243 
 244         next();
 245 
 246         final Node resultNode = jsonLiteral();
 247         expect(EOF);
 248 
 249         return resultNode;
 250     }
 251 
 252     @SuppressWarnings("fallthrough")
 253     private LiteralNode<?> getStringLiteral() {
 254         final LiteralNode<?> literal = getLiteral();
 255         final String         str     = (String)literal.getValue();
 256 
 257         for (int i = 0; i < str.length(); i++) {
 258             final char ch = str.charAt(i);
 259             switch (ch) {
 260             default:
 261                 if (ch > 0x001f) {
 262                     break;
 263                 }
 264             case '"':
 265             case '\\':
 266                 throw error(AbstractParser.message("unexpected.token", str));
 267             }
 268         }
 269 
 270         return literal;
 271     }
 272 
 273     /**
 274      * Parse a JSON literal from the token stream
 275      * @return the JSON literal as a Node
 276      */
 277     private Node jsonLiteral() {
 278         final long literalToken = token;
 279 
 280         switch (type) {
 281         case STRING:
 282             return getStringLiteral();
 283         case ESCSTRING:
 284         case DECIMAL:
 285         case FLOATING:
 286             return getLiteral();
 287         case FALSE:
 288             next();
 289             return LiteralNode.newInstance(literalToken, finish, false);
 290         case TRUE:
 291             next();
 292             return LiteralNode.newInstance(literalToken, finish, true);
 293         case NULL:
 294             next();
 295             return LiteralNode.newInstance(literalToken, finish);
 296         case LBRACKET:
 297             return arrayLiteral();
 298         case LBRACE:
 299             return objectLiteral();
 300         /*
 301          * A.8.1 JSON Lexical Grammar
 302          *
 303          * JSONNumber :: See 15.12.1.1
 304          *    -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt
 305          */
 306         case SUB:
 307             next();
 308 
 309             final long realToken = token;
 310             final Object value = getValue();
 311 
 312             if (value instanceof Number) {
 313                 next();
 314                 return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value));
 315             }
 316 
 317             throw error(AbstractParser.message("expected", "number", type.getNameOrType()));
 318         default:
 319             break;
 320         }
 321 
 322         throw error(AbstractParser.message("expected", "json literal", type.getNameOrType()));
 323     }
 324 
 325     /**
 326      * Parse an array literal from the token stream
 327      * @return the array literal as a Node
 328      */
 329     private Node arrayLiteral() {
 330         // Unlike JavaScript array literals, elison is not permitted in JSON.
 331 
 332         // Capture LBRACKET token.
 333         final long arrayToken = token;
 334         // LBRACKET tested in caller.
 335         next();
 336 
 337         Node result = null;
 338         // Prepare to accummulating elements.
 339         final List<Node> elements = new ArrayList<>();
 340 
 341 loop:
 342         while (true) {
 343             switch (type) {
 344             case RBRACKET:
 345                 next();
 346                 result = LiteralNode.newInstance(arrayToken, finish, elements);
 347                 break loop;
 348 
 349             case COMMARIGHT:
 350                 next();
 351                 break;
 352 
 353             default:
 354                 // Add expression element.
 355                 elements.add(jsonLiteral());
 356                 // Comma between array elements is mandatory in JSON.
 357                 if (type != COMMARIGHT && type != RBRACKET) {
 358                    throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType()));
 359                 }
 360                 break;
 361             }
 362         }
 363 
 364         return result;
 365     }
 366 
 367     /**
 368      * Parse an object literal from the token stream
 369      * @return the object literal as a Node
 370      */
 371     private Node objectLiteral() {
 372         // Capture LBRACE token.
 373         final long objectToken = token;
 374         // LBRACE tested in caller.
 375         next();
 376 
 377         // Prepare to accumulate elements.
 378         final List<PropertyNode> elements = new ArrayList<>();
 379 
 380         // Create a block for the object literal.
 381 loop:
 382         while (true) {
 383             switch (type) {
 384             case RBRACE:
 385                 next();
 386                 break loop;
 387 
 388             case COMMARIGHT:
 389                 next();
 390                 break;
 391 
 392             default:
 393                 // Get and add the next property.
 394                 final PropertyNode property = propertyAssignment();
 395                 elements.add(property);
 396 
 397                 // Comma between property assigments is mandatory in JSON.
 398                 if (type != RBRACE && type != COMMARIGHT) {
 399                     throw error(AbstractParser.message("expected", ", or }", type.getNameOrType()));
 400                 }
 401                 break;
 402             }
 403         }
 404 
 405         // Construct new object literal.
 406         return new ObjectNode(objectToken, finish, elements);
 407     }
 408 
 409     /**
 410      * Parse a property assignment from the token stream
 411      * @return the property assignment as a Node
 412      */
 413     private PropertyNode propertyAssignment() {
 414         // Capture firstToken.
 415         final long propertyToken = token;
 416         LiteralNode<?> name = null;
 417 
 418         if (type == STRING) {
 419             name = getStringLiteral();
 420         } else if (type == ESCSTRING) {
 421             name = getLiteral();
 422         }
 423 
 424         if (name != null) {
 425             expect(COLON);
 426             final Node value = jsonLiteral();
 427             return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null);
 428         }
 429 
 430         // Raise an error.
 431         throw error(AbstractParser.message("expected", "string", type.getNameOrType()));
 432     }
 433 
 434 }