1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static jdk.nashorn.internal.parser.TokenType.COLON;
  29 import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT;
  30 import static jdk.nashorn.internal.parser.TokenType.EOF;
  31 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
  32 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
  33 import static jdk.nashorn.internal.parser.TokenType.RBRACKET;
  34 import static jdk.nashorn.internal.parser.TokenType.STRING;
  35 
  36 import java.util.ArrayList;
  37 import java.util.List;
  38 import jdk.nashorn.internal.ir.Expression;
  39 import jdk.nashorn.internal.ir.LiteralNode;
  40 import jdk.nashorn.internal.ir.Node;
  41 import jdk.nashorn.internal.ir.ObjectNode;
  42 import jdk.nashorn.internal.ir.PropertyNode;
  43 import jdk.nashorn.internal.ir.UnaryNode;
  44 import jdk.nashorn.internal.runtime.ErrorManager;
  45 import jdk.nashorn.internal.runtime.Source;
  46 
  47 /**
  48  * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser.
  49  *
  50  * See: 15.12.1.2 The JSON Syntactic Grammar
  51  */
  52 public class JSONParser extends AbstractParser {
  53 
  54     /**
  55      * Constructor
  56      * @param source  the source
  57      * @param errors  the error manager
  58      */
  59     public JSONParser(final Source source, final ErrorManager errors) {
  60         super(source, errors, false);
  61     }
  62 
  63     /**
  64      * Implementation of the Quote(value) operation as defined in the ECMA script spec
  65      * It wraps a String value in double quotes and escapes characters within in
  66      *
  67      * @param value string to quote
  68      *
  69      * @return quoted and escaped string
  70      */
  71     public static String quote(final String value) {
  72 
  73         final StringBuilder product = new StringBuilder();
  74 
  75         product.append("\"");
  76 
  77         for (final char ch : value.toCharArray()) {
  78             // TODO: should use a table?
  79             switch (ch) {
  80             case '\\':
  81                 product.append("\\\\");
  82                 break;
  83             case '"':
  84                 product.append("\\\"");
  85                 break;
  86             case '\b':
  87                 product.append("\\b");
  88                 break;
  89             case '\f':
  90                 product.append("\\f");
  91                 break;
  92             case '\n':
  93                 product.append("\\n");
  94                 break;
  95             case '\r':
  96                 product.append("\\r");
  97                 break;
  98             case '\t':
  99                 product.append("\\t");
 100                 break;
 101             default:
 102                 if (ch < ' ') {
 103                     product.append(Lexer.unicodeEscape(ch));
 104                     break;
 105                 }
 106 
 107                 product.append(ch);
 108                 break;
 109             }
 110         }
 111 
 112         product.append("\"");
 113 
 114         return product.toString();
 115     }
 116 
 117     /**
 118      * Public parsed method - start lexing a new token stream for
 119      * a JSON script
 120      *
 121      * @return the JSON literal
 122      */
 123     public Node parse() {
 124         stream = new TokenStream();
 125 
 126         lexer = new Lexer(source, stream) {
 127 
 128             @Override
 129             protected boolean skipComments() {
 130                 return false;
 131             }
 132 
 133             @Override
 134             protected boolean isStringDelimiter(final char ch) {
 135                 return ch == '\"';
 136             }
 137 
 138             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace
 139             @Override
 140             protected boolean isWhitespace(final char ch) {
 141                 return Lexer.isJsonWhitespace(ch);
 142             }
 143 
 144             @Override
 145             protected boolean isEOL(final char ch) {
 146                 return Lexer.isJsonEOL(ch);
 147             }
 148 
 149             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber
 150             @Override
 151             protected void scanNumber() {
 152                 // Record beginning of number.
 153                 final int startPosition = position;
 154                 // Assume value is a decimal.
 155                 TokenType valueType = TokenType.DECIMAL;
 156 
 157                 // floating point can't start with a "." with no leading digit before
 158                 if (ch0 == '.') {
 159                     error(Lexer.message("json.invalid.number"), STRING, position, limit);
 160                 }
 161 
 162                 // First digit of number.
 163                 int digit = convertDigit(ch0, 10);
 164 
 165                 // skip first digit
 166                 skip(1);
 167 
 168                 if (digit != 0) {
 169                     // Skip over remaining digits.
 170                     while (convertDigit(ch0, 10) != -1) {
 171                         skip(1);
 172                     }
 173                 }
 174 
 175                 if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
 176                     // Must be a double.
 177                     if (ch0 == '.') {
 178                         // Skip period.
 179                         skip(1);
 180 
 181                         boolean mantissa = false;
 182                         // Skip mantissa.
 183                         while (convertDigit(ch0, 10) != -1) {
 184                             mantissa = true;
 185                             skip(1);
 186                         }
 187 
 188                         if (! mantissa) {
 189                             // no digit after "."
 190                             error(Lexer.message("json.invalid.number"), STRING, position, limit);
 191                         }
 192                     }
 193 
 194                     // Detect exponent.
 195                     if (ch0 == 'E' || ch0 == 'e') {
 196                         // Skip E.
 197                         skip(1);
 198                         // Detect and skip exponent sign.
 199                         if (ch0 == '+' || ch0 == '-') {
 200                             skip(1);
 201                         }
 202                         boolean exponent = false;
 203                         // Skip exponent.
 204                         while (convertDigit(ch0, 10) != -1) {
 205                             exponent = true;
 206                             skip(1);
 207                         }
 208 
 209                         if (! exponent) {
 210                             // no digit after "E"
 211                             error(Lexer.message("json.invalid.number"), STRING, position, limit);
 212                         }
 213                     }
 214 
 215                     valueType = TokenType.FLOATING;
 216                 }
 217 
 218                 // Add number token.
 219                 add(valueType, startPosition);
 220             }
 221 
 222             // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter
 223             @Override
 224             protected boolean isEscapeCharacter(final char ch) {
 225                 switch (ch) {
 226                     case '"':
 227                     case '/':
 228                     case '\\':
 229                     case 'b':
 230                     case 'f':
 231                     case 'n':
 232                     case 'r':
 233                     case 't':
 234                     // could be unicode escape
 235                     case 'u':
 236                         return true;
 237                     default:
 238                         return false;
 239                 }
 240             }
 241         };
 242 
 243         k = -1;
 244 
 245         next();
 246 
 247         final Node resultNode = jsonLiteral();
 248         expect(EOF);
 249 
 250         return resultNode;
 251     }
 252 
 253     @SuppressWarnings("fallthrough")
 254     private LiteralNode<?> getStringLiteral() {
 255         final LiteralNode<?> literal = getLiteral();
 256         final String         str     = (String)literal.getValue();
 257 
 258         for (int i = 0; i < str.length(); i++) {
 259             final char ch = str.charAt(i);
 260             switch (ch) {
 261             default:
 262                 if (ch > 0x001f) {
 263                     break;
 264                 }
 265             case '"':
 266             case '\\':
 267                 throw error(AbstractParser.message("unexpected.token", str));
 268             }
 269         }
 270 
 271         return literal;
 272     }
 273 
 274     /**
 275      * Parse a JSON literal from the token stream
 276      * @return the JSON literal as a Node
 277      */
 278     private Expression jsonLiteral() {
 279         final long literalToken = token;
 280 
 281         switch (type) {
 282         case STRING:
 283             return getStringLiteral();
 284         case ESCSTRING:
 285         case DECIMAL:
 286         case FLOATING:
 287             return getLiteral();
 288         case FALSE:
 289             next();
 290             return LiteralNode.newInstance(literalToken, finish, false);
 291         case TRUE:
 292             next();
 293             return LiteralNode.newInstance(literalToken, finish, true);
 294         case NULL:
 295             next();
 296             return LiteralNode.newInstance(literalToken, finish);
 297         case LBRACKET:
 298             return arrayLiteral();
 299         case LBRACE:
 300             return objectLiteral();
 301         /*
 302          * A.8.1 JSON Lexical Grammar
 303          *
 304          * JSONNumber :: See 15.12.1.1
 305          *    -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt
 306          */
 307         case SUB:
 308             next();
 309 
 310             final long realToken = token;
 311             final Object value = getValue();
 312 
 313             if (value instanceof Number) {
 314                 next();
 315                 return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value));
 316             }
 317 
 318             throw error(AbstractParser.message("expected", "number", type.getNameOrType()));
 319         default:
 320             break;
 321         }
 322 
 323         throw error(AbstractParser.message("expected", "json literal", type.getNameOrType()));
 324     }
 325 
 326     /**
 327      * Parse an array literal from the token stream
 328      * @return the array literal as a Node
 329      */
 330     private LiteralNode<Expression[]> arrayLiteral() {
 331         // Unlike JavaScript array literals, elison is not permitted in JSON.
 332 
 333         // Capture LBRACKET token.
 334         final long arrayToken = token;
 335         // LBRACKET tested in caller.
 336         next();
 337 
 338         LiteralNode<Expression[]> result = null;
 339         // Prepare to accummulating elements.
 340         final List<Expression> elements = new ArrayList<>();
 341 
 342 loop:
 343         while (true) {
 344             switch (type) {
 345             case RBRACKET:
 346                 next();
 347                 result = LiteralNode.newInstance(arrayToken, finish, elements);
 348                 break loop;
 349 
 350             case COMMARIGHT:
 351                 next();
 352                 // check for trailing comma - not allowed in JSON
 353                 if (type == RBRACKET) {
 354                     throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType()));
 355                 }
 356                 break;
 357 
 358             default:
 359                 // Add expression element.
 360                 elements.add(jsonLiteral());
 361                 // Comma between array elements is mandatory in JSON.
 362                 if (type != COMMARIGHT && type != RBRACKET) {
 363                    throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType()));
 364                 }
 365                 break;
 366             }
 367         }
 368 
 369         return result;
 370     }
 371 
 372     /**
 373      * Parse an object literal from the token stream
 374      * @return the object literal as a Node
 375      */
 376     private ObjectNode objectLiteral() {
 377         // Capture LBRACE token.
 378         final long objectToken = token;
 379         // LBRACE tested in caller.
 380         next();
 381 
 382         // Prepare to accumulate elements.
 383         final List<PropertyNode> elements = new ArrayList<>();
 384 
 385         // Create a block for the object literal.
 386 loop:
 387         while (true) {
 388             switch (type) {
 389             case RBRACE:
 390                 next();
 391                 break loop;
 392 
 393             case COMMARIGHT:
 394                 next();
 395                 // check for trailing comma - not allowed in JSON
 396                 if (type == RBRACE) {
 397                     throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType()));
 398                 }
 399                 break;
 400 
 401             default:
 402                 // Get and add the next property.
 403                 final PropertyNode property = propertyAssignment();
 404                 elements.add(property);
 405 
 406                 // Comma between property assigments is mandatory in JSON.
 407                 if (type != RBRACE && type != COMMARIGHT) {
 408                     throw error(AbstractParser.message("expected", ", or }", type.getNameOrType()));
 409                 }
 410                 break;
 411             }
 412         }
 413 
 414         // Construct new object literal.
 415         return new ObjectNode(objectToken, finish, elements);
 416     }
 417 
 418     /**
 419      * Parse a property assignment from the token stream
 420      * @return the property assignment as a Node
 421      */
 422     private PropertyNode propertyAssignment() {
 423         // Capture firstToken.
 424         final long propertyToken = token;
 425         LiteralNode<?> name = null;
 426 
 427         if (type == STRING) {
 428             name = getStringLiteral();
 429         } else if (type == ESCSTRING) {
 430             name = getLiteral();
 431         }
 432 
 433         if (name != null) {
 434             expect(COLON);
 435             final Expression value = jsonLiteral();
 436             return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null);
 437         }
 438 
 439         // Raise an error.
 440         throw error(AbstractParser.message("expected", "string", type.getNameOrType()));
 441     }
 442 
 443 }