1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.COLON;
  29 import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT;
  30 import static jdk.nashorn.internal.parser.TokenType.EOF;
  31 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  32 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  33 import static jdk.nashorn.internal.parser.TokenType.RBRACKET;
  34 import static jdk.nashorn.internal.parser.TokenType.STRING;
  35 
  36 import java.util.ArrayList;
  37 import java.util.List;
  38 import jdk.nashorn.internal.ir.LiteralNode;
  39 import jdk.nashorn.internal.ir.Node;
  40 import jdk.nashorn.internal.ir.ObjectNode;
  41 import jdk.nashorn.internal.ir.PropertyNode;
  42 import jdk.nashorn.internal.ir.UnaryNode;
  43 import jdk.nashorn.internal.runtime.ErrorManager;
  44 import jdk.nashorn.internal.runtime.Source;
  45 
  46 /**
  47  * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser.
  48  *
  49  * See: 15.12.1.2 The JSON Syntactic Grammar
  50  */
  51 public class JSONParser extends AbstractParser {
  52 
  53     /**
  54      * Constructor
  55      * @param source  the source
  56      * @param errors  the error manager
  57      */
  58     public JSONParser(final Source source, final ErrorManager errors) {
  59         super(source, errors, false);
  60     }
  61 
  62     /**
  63      * Implementation of the Quote(value) operation as defined in the ECMA script spec
  64      * It wraps a String value in double quotes and escapes characters within in
  65      *
  66      * @param value string to quote
  67      *
  68      * @return quoted and escaped string
  69      */
  70     public static String quote(final String value) {
  71 
  72         final StringBuilder product = new StringBuilder();
  73 
  74         product.append("\"");
  75 
  76         for (final char ch : value.toCharArray()) {
  77             // TODO: should use a table?
  78             switch (ch) {
  79             case '\\':
  80                 product.append("\\\\");
  81                 break;
  82             case '"':
  83                 product.append("\\\"");
  84                 break;
  85             case '\b':
  86                 product.append("\\b");
  87                 break;
  88             case '\f':
  89                 product.append("\\f");
  90                 break;
  91             case '\n':
  92                 product.append("\\n");
  93                 break;
  94             case '\r':
  95                 product.append("\\r");
  96                 break;
  97             case '\t':
  98                 product.append("\\t");
  99                 break;
 100             default:
 101                 if (ch < ' ') {
 102                     product.append(Lexer.unicodeEscape(ch));
 103                     break;
 104                 }
 105 
 106                 product.append(ch);
 107                 break;
 108             }
 109         }
 110 
 111         product.append("\"");
 112 
 113         return product.toString();
 114     }
 115 
 116     /**
 117      * Public parsed method - start lexing a new token stream for
 118      * a JSON script
 119      *
 120      * @return the JSON literal
 121      */
 122     public Node parse() {
 123         stream = new TokenStream();
 124 
 125         lexer = new Lexer(source, stream) {
 126 
 127             @Override
 128             protected boolean skipComments() {
 129                 return false;
 130             }
 131 
 132             @Override
 133             protected boolean isStringDelimiter(final char ch) {
 134                 return ch == '\"';
 135             }
 136 
 137             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace
 138             @Override
 139             protected boolean isWhitespace(final char ch) {
 140                 return Lexer.isJsonWhitespace(ch);
 141             }
 142 
 143             @Override
 144             protected boolean isEOL(final char ch) {
 145                 return Lexer.isJsonEOL(ch);
 146             }
 147 
 148             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber
 149             @Override
 150             protected void scanNumber() {
 151                 // Record beginning of number.
 152                 final int start = position;
 153                 // Assume value is a decimal.
 154                 TokenType type = TokenType.DECIMAL;
 155 
 156                 // floating point can't start with a "." with no leading digit before
 157                 if (ch0 == '.') {
 158                     error(Lexer.message("json.invalid.number"), STRING, position, limit);
 159                 }
 160 
 161                 // First digit of number.
 162                 int digit = convertDigit(ch0, 10);
 163 
 164                 // If number begins with 0x.
 165                 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
 166                     // Skip over 0xN.
 167                     skip(3);
 168                     // Skip over remaining digits.
 169                     while (convertDigit(ch0, 16) != -1) {
 170                         skip(1);
 171                     }
 172                     error(Lexer.message("json.no.hexadecimal.number"), STRING, position, limit);
 173                     type = TokenType.HEXADECIMAL;
 174                 } else {
 175                     // Check for possible octal constant.
 176                     boolean octal = digit == 0;
 177                     // Skip first digit if not leading '.'.
 178                     if (digit != -1) {
 179                         skip(1);
 180                     }
 181 
 182                     // Skip remaining digits.
 183                     while (convertDigit(ch0, 10) != -1) {
 184                         // Skip digit.
 185                         skip(1);
 186                     }
 187 
 188                     if (octal && position - start > 1) {
 189                         error(Lexer.message("json.no.octal.number"), STRING, position, limit);
 190                         type = TokenType.OCTAL;
 191                     } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
 192                         // Must be a double.
 193                         if (ch0 == '.') {
 194                             // Skip period.
 195                             skip(1);
 196                             boolean mantissa = false;
 197                             // Skip mantissa.
 198                             while (convertDigit(ch0, 10) != -1) {
 199                                 skip(1);
 200                                 mantissa = true;
 201                             }
 202 
 203                             if (! mantissa) {
 204                                 // no digit after "."
 205                                 error(Lexer.message("json.invalid.number"), STRING, position, limit);
 206                             }
 207                         }
 208 
 209                         // Detect exponent.
 210                         if (ch0 == 'E' || ch0 == 'e') {
 211                             // Skip E.
 212                             skip(1);
 213                             // Detect and skip exponent sign.
 214                             if (ch0 == '+' || ch0 == '-') {
 215                                 skip(1);
 216                             }
 217                             boolean exponent = false;
 218                             // Skip exponent.
 219                             while (convertDigit(ch0, 10) != -1) {
 220                                 exponent = true;
 221                                 skip(1);
 222                             }
 223 
 224                             if (! exponent) {
 225                                 // no digit after "E"
 226                                 error(Lexer.message("json.invalid.number"), STRING, position, limit);
 227                             }
 228                         }
 229 
 230                         type = TokenType.FLOATING;
 231                     }
 232                 }
 233 
 234                 // Add number token.
 235                 add(type, start);
 236             }
 237 
 238             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter
 239             @Override
 240             protected boolean isEscapeCharacter(final char ch) {
 241                 switch (ch) {
 242                     case '"':
 243                     case '/':
 244                     case '\\':
 245                     case 'b':
 246                     case 'f':
 247                     case 'n':
 248                     case 'r':
 249                     case 't':
 250                     // could be unicode escape
 251                     case 'u':
 252                         return true;
 253                     default:
 254                         return false;
 255                 }
 256             }
 257         };
 258 
 259         k = -1;
 260 
 261         next();
 262 
 263         final Node resultNode = jsonLiteral();
 264         expect(EOF);
 265 
 266         return resultNode;
 267     }
 268 
 269     @SuppressWarnings("fallthrough")
 270     private LiteralNode<?> getStringLiteral() {
 271         final LiteralNode<?> literal = getLiteral();
 272         final String         str     = (String)literal.getValue();
 273 
 274         for (int i = 0; i < str.length(); i++) {
 275             final char ch = str.charAt(i);
 276             switch (ch) {
 277             default:
 278                 if (ch > 0x001f) {
 279                     break;
 280                 }
 281             case '"':
 282             case '\\':
 283                 throw error(AbstractParser.message("unexpected.token", str));
 284             }
 285         }
 286 
 287         return literal;
 288     }
 289 
 290     /**
 291      * Parse a JSON literal from the token stream
 292      * @return the JSON literal as a Node
 293      */
 294     private Node jsonLiteral() {
 295         final long literalToken = token;
 296 
 297         switch (type) {
 298         case STRING:
 299             return getStringLiteral();
 300         case ESCSTRING:
 301         case DECIMAL:
 302         case FLOATING:
 303             return getLiteral();
 304         case FALSE:
 305             next();
 306             return LiteralNode.newInstance(literalToken, finish, false);
 307         case TRUE:
 308             next();
 309             return LiteralNode.newInstance(literalToken, finish, true);
 310         case NULL:
 311             next();
 312             return LiteralNode.newInstance(literalToken, finish);
 313         case LBRACKET:
 314             return arrayLiteral();
 315         case LBRACE:
 316             return objectLiteral();
 317         /*
 318          * A.8.1 JSON Lexical Grammar
 319          *
 320          * JSONNumber :: See 15.12.1.1
 321          *    -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt
 322          */
 323         case SUB:
 324             next();
 325 
 326             final long realToken = token;
 327             final Object value = getValue();
 328 
 329             if (value instanceof Number) {
 330                 next();
 331                 return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value));
 332             }
 333 
 334             throw error(AbstractParser.message("expected", "number", type.getNameOrType()));
 335         default:
 336             break;
 337         }
 338 
 339         throw error(AbstractParser.message("expected", "json literal", type.getNameOrType()));
 340     }
 341 
 342     /**
 343      * Parse an array literal from the token stream
 344      * @return the array literal as a Node
 345      */
 346     private Node arrayLiteral() {
 347         // Unlike JavaScript array literals, elison is not permitted in JSON.
 348 
 349         // Capture LBRACKET token.
 350         final long arrayToken = token;
 351         // LBRACKET tested in caller.
 352         next();
 353 
 354         Node result = null;
 355         // Prepare to accummulating elements.
 356         final List<Node> elements = new ArrayList<>();
 357 
 358 loop:
 359         while (true) {
 360             switch (type) {
 361             case RBRACKET:
 362                 next();
 363                 result = LiteralNode.newInstance(arrayToken, finish, elements);
 364                 break loop;
 365 
 366             case COMMARIGHT:
 367                 next();
 368                 break;
 369 
 370             default:
 371                 // Add expression element.
 372                 elements.add(jsonLiteral());
 373                 // Comma between array elements is mandatory in JSON.
 374                 if (type != COMMARIGHT && type != RBRACKET) {
 375                    throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType()));
 376                 }
 377                 break;
 378             }
 379         }
 380 
 381         return result;
 382     }
 383 
 384     /**
 385      * Parse an object literal from the token stream
 386      * @return the object literal as a Node
 387      */
 388     private Node objectLiteral() {
 389         // Capture LBRACE token.
 390         final long objectToken = token;
 391         // LBRACE tested in caller.
 392         next();
 393 
 394         // Prepare to accumulate elements.
 395         final List<PropertyNode> elements = new ArrayList<>();
 396 
 397         // Create a block for the object literal.
 398 loop:
 399         while (true) {
 400             switch (type) {
 401             case RBRACE:
 402                 next();
 403                 break loop;
 404 
 405             case COMMARIGHT:
 406                 next();
 407                 break;
 408 
 409             default:
 410                 // Get and add the next property.
 411                 final PropertyNode property = propertyAssignment();
 412                 elements.add(property);
 413 
 414                 // Comma between property assigments is mandatory in JSON.
 415                 if (type != RBRACE && type != COMMARIGHT) {
 416                     throw error(AbstractParser.message("expected", ", or }", type.getNameOrType()));
 417                 }
 418                 break;
 419             }
 420         }
 421 
 422         // Construct new object literal.
 423         return new ObjectNode(objectToken, finish, elements);
 424     }
 425 
 426     /**
 427      * Parse a property assignment from the token stream
 428      * @return the property assignment as a Node
 429      */
 430     private PropertyNode propertyAssignment() {
 431         // Capture firstToken.
 432         final long propertyToken = token;
 433         LiteralNode<?> name = null;
 434 
 435         if (type == STRING) {
 436             name = getStringLiteral();
 437         } else if (type == ESCSTRING) {
 438             name = getLiteral();
 439         }
 440 
 441         if (name != null) {
 442             expect(COLON);
 443             final Node value = jsonLiteral();
 444             return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null);
 445         }
 446 
 447         // Raise an error.
 448         throw error(AbstractParser.message("expected", "string", type.getNameOrType()));
 449     }
 450 
 451 }