1 /*
   2  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import java.util.ArrayList;
  29 import java.util.LinkedHashMap;
  30 import java.util.List;
  31 import java.util.Map;
  32 import jdk.nashorn.internal.runtime.ECMAErrors;
  33 import jdk.nashorn.internal.runtime.ErrorManager;
  34 import jdk.nashorn.internal.runtime.JSErrorType;
  35 import jdk.nashorn.internal.runtime.JSType;
  36 import jdk.nashorn.internal.runtime.ParserException;
  37 import jdk.nashorn.internal.runtime.Source;
  38 
  39 import static jdk.nashorn.internal.parser.TokenType.STRING;
  40 
  41 /**
  42  * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser.
  43  *
  44  * See: 15.12.1.2 The JSON Syntactic Grammar
  45  */
  46 public class JSONParser {
  47 
  48     final private String source;
  49     final int length;
  50     int pos = 0;
  51 
  52     private static final int EOF = -1;
  53 
  54     private static final String TRUE  = "true";
  55     private static final String FALSE = "false";
  56     private static final String NULL  = "null";
  57 
  58     private static final int STATE_EMPTY          = 0;
  59     private static final int STATE_ELEMENT_PARSED = 1;
  60     private static final int STATE_COMMA_PARSED   = 2;
  61 
  62     /**
  63      * Constructor
  64      * @param source  the source
  65      */
  66     public JSONParser(final String source) {
  67         this.source = source;
  68         this.length = source.length();
  69     }
  70 
  71     /**
  72      * Implementation of the Quote(value) operation as defined in the ECMA script spec
  73      * It wraps a String value in double quotes and escapes characters within in
  74      *
  75      * @param value string to quote
  76      *
  77      * @return quoted and escaped string
  78      */
  79     public static String quote(final String value) {
  80 
  81         final StringBuilder product = new StringBuilder();
  82 
  83         product.append("\"");
  84 
  85         for (final char ch : value.toCharArray()) {
  86             // TODO: should use a table?
  87             switch (ch) {
  88                 case '\\':
  89                     product.append("\\\\");
  90                     break;
  91                 case '"':
  92                     product.append("\\\"");
  93                     break;
  94                 case '\b':
  95                     product.append("\\b");
  96                     break;
  97                 case '\f':
  98                     product.append("\\f");
  99                     break;
 100                 case '\n':
 101                     product.append("\\n");
 102                     break;
 103                 case '\r':
 104                     product.append("\\r");
 105                     break;
 106                 case '\t':
 107                     product.append("\\t");
 108                     break;
 109                 default:
 110                     if (ch < ' ') {
 111                         product.append(Lexer.unicodeEscape(ch));
 112                         break;
 113                     }
 114 
 115                     product.append(ch);
 116                     break;
 117             }
 118         }
 119 
 120         product.append("\"");
 121 
 122         return product.toString();
 123     }
 124 
 125     /**
 126      * Public parse method. Parse a string into a JSON object.
 127      *
 128      * @return the parsed JSON Object
 129      */
 130     public Object parse() {
 131         final Object value = parseLiteral();
 132         skipWhiteSpace();
 133         if (pos < length) {
 134             throw expectedError(pos, "eof", toString(peek()));
 135         }
 136         return value;
 137     }
 138 
 139     private Object parseLiteral() {
 140         skipWhiteSpace();
 141 
 142         final int c = peek();
 143         if (c == EOF) {
 144             throw expectedError(pos, "json literal", "eof");
 145         }
 146         switch (c) {
 147             case '{':
 148                 return parseObject();
 149             case '[':
 150                 return parseArray();
 151             case '"':
 152                 return parseString();
 153             case 'f':
 154                 return parseKeyword(FALSE, Boolean.FALSE);
 155             case 't':
 156                 return parseKeyword(TRUE, Boolean.TRUE);
 157             case 'n':
 158                 return parseKeyword(NULL, null);
 159             default:
 160                 if (isDigit(c) || c == '-') {
 161                     return parseNumber();
 162                 } else if (c == '.') {
 163                     throw numberError(pos);
 164                 } else {
 165                     throw expectedError(pos, "json literal", toString(c));
 166                 }
 167         }
 168     }
 169 
 170     private Object parseObject() {
 171         final Map<String, Object> map = new LinkedHashMap<>();
 172         int state = STATE_EMPTY;
 173 
 174         assert peek() == '{';
 175         pos++;
 176 
 177         while (pos < length) {
 178             skipWhiteSpace();
 179             final int c = peek();
 180 
 181             switch (c) {
 182                 case '"':
 183                     if (state == STATE_ELEMENT_PARSED) {
 184                         throw expectedError(pos - 1, ", or }", toString(c));
 185                     }
 186                     final String id = parseString();
 187                     skipWhiteSpace();
 188                     final int n = next();
 189                     if (n != ':') {
 190                         throw expectedError(pos - 1, ":", toString(n));
 191                     }
 192                     final Object value = parseLiteral();
 193                     map.put(id, value);
 194                     state = STATE_ELEMENT_PARSED;
 195                     break;
 196                 case ',':
 197                     if (state != STATE_ELEMENT_PARSED) {
 198                         throw error(AbstractParser.message("trailing.comma.in.json"), pos);
 199                     }
 200                     state = STATE_COMMA_PARSED;
 201                     pos++;
 202                     break;
 203                 case '}':
 204                     if (state == STATE_COMMA_PARSED) {
 205                         throw error(AbstractParser.message("trailing.comma.in.json"), pos);
 206                     }
 207                     pos++;
 208                     return map;
 209                 default:
 210                     throw expectedError(pos, ", or }", toString(c));
 211             }
 212         }
 213         throw expectedError(pos, ", or }", "eof");
 214     }
 215 
 216 
 217     private Object parseArray() {
 218         final List<Object> list = new ArrayList<>();
 219         int state = STATE_EMPTY;
 220 
 221         assert peek() == '[';
 222         pos++;
 223 
 224         while (pos < length) {
 225             skipWhiteSpace();
 226             final int c = peek();
 227 
 228             switch (c) {
 229                 case ',':
 230                     if (state != STATE_ELEMENT_PARSED) {
 231                         throw error(AbstractParser.message("trailing.comma.in.json"), pos);
 232                     }
 233                     state = STATE_COMMA_PARSED;
 234                     pos++;
 235                     break;
 236                 case ']':
 237                     if (state == STATE_COMMA_PARSED) {
 238                         throw error(AbstractParser.message("trailing.comma.in.json"), pos);
 239                     }
 240                     pos++;
 241                     return list;
 242                 default:
 243                     if (state == STATE_ELEMENT_PARSED) {
 244                         throw expectedError(pos, ", or ]", toString(c));
 245                     }
 246                     list.add(parseLiteral());
 247                     state = STATE_ELEMENT_PARSED;
 248                     break;
 249             }
 250         }
 251 
 252         throw expectedError(pos, ", or ]", "eof");
 253     }
 254 
 255     private String parseString() {
 256         // String buffer is only instantiated if string contains escape sequences.
 257         int start = ++pos;
 258         StringBuilder sb = null;
 259 
 260         while (pos < length) {
 261             final int c = next();
 262             if (c <= 0x1f) {
 263                 // Characters < 0x1f are not allowed in JSON strings.
 264                 throw syntaxError(pos, "String contains control character");
 265 
 266             } else if (c == '\\') {
 267                 if (sb == null) {
 268                     sb = new StringBuilder(Math.max(32, (pos - start) * 2));
 269                 }
 270                 sb.append(source, start, pos - 1);
 271                 sb.append(parseEscapeSequence());
 272                 start = pos;
 273 
 274             } else if (c == '"') {
 275                 if (sb != null) {
 276                     sb.append(source, start, pos - 1);
 277                     return sb.toString();
 278                 }
 279                 return source.substring(start, pos - 1);
 280             }
 281         }
 282 
 283         throw error(Lexer.message("missing.close.quote"), pos, length);
 284     }
 285 
 286     private char parseEscapeSequence() {
 287         final int c = next();
 288         switch (c) {
 289             case '"':
 290                 return '"';
 291             case '\\':
 292                 return '\\';
 293             case '/':
 294                 return '/';
 295             case 'b':
 296                 return '\b';
 297             case 'f':
 298                 return '\f';
 299             case 'n':
 300                 return '\n';
 301             case 'r':
 302                 return '\r';
 303             case 't':
 304                 return '\t';
 305             case 'u':
 306                 return parseUnicodeEscape();
 307             default:
 308                 throw error(Lexer.message("invalid.escape.char"), pos - 1, length);
 309         }
 310     }
 311 
 312     private char parseUnicodeEscape() {
 313         return (char) (parseHexDigit() << 12 | parseHexDigit() << 8 | parseHexDigit() << 4 | parseHexDigit());
 314     }
 315 
 316     private int parseHexDigit() {
 317         final int c = next();
 318         if (c >= '0' && c <= '9') {
 319             return c - '0';
 320         } else if (c >= 'A' && c <= 'F') {
 321             return c + 10 - 'A';
 322         } else if (c >= 'a' && c <= 'f') {
 323             return c + 10 - 'a';
 324         }
 325         throw error(Lexer.message("invalid.hex"), pos - 1, length);
 326     }
 327 
 328     private boolean isDigit(final int c) {
 329         return c >= '0' && c <= '9';
 330     }
 331 
 332     private void skipDigits() {
 333         while (pos < length) {
 334             final int c = peek();
 335             if (!isDigit(c)) {
 336                 break;
 337             }
 338             pos++;
 339         }
 340     }
 341 
 342     private Number parseNumber() {
 343         final int start = pos;
 344         int c = next();
 345 
 346         if (c == '-') {
 347             c = next();
 348         }
 349         if (!isDigit(c)) {
 350             throw numberError(start);
 351         }
 352         // no more digits allowed after 0
 353         if (c != '0') {
 354             skipDigits();
 355         }
 356 
 357         // fraction
 358         if (peek() == '.') {
 359             pos++;
 360             if (!isDigit(next())) {
 361                 throw numberError(pos - 1);
 362             }
 363             skipDigits();
 364         }
 365 
 366         // exponent
 367         c = peek();
 368         if (c == 'e' || c == 'E') {
 369             pos++;
 370             c = next();
 371             if (c == '-' || c == '+') {
 372                 c = next();
 373             }
 374             if (!isDigit(c)) {
 375                 throw numberError(pos - 1);
 376             }
 377             skipDigits();
 378         }
 379 
 380         final double d = Double.parseDouble(source.substring(start, pos));
 381         if (JSType.isRepresentableAsInt(d)) {
 382             return (int) d;
 383         } else if (JSType.isRepresentableAsLong(d)) {
 384             return (long) d;
 385         }
 386         return d;
 387     }
 388 
 389     private Object parseKeyword(final String keyword, final Object value) {
 390         if (!source.regionMatches(pos, keyword, 0, keyword.length())) {
 391             throw expectedError(pos, "json literal", "ident");
 392         }
 393         pos += keyword.length();
 394         return value;
 395     }
 396 
 397     private int peek() {
 398         if (pos >= length) {
 399             return -1;
 400         }
 401         return source.charAt(pos);
 402     }
 403 
 404     private int next() {
 405         final int next = peek();
 406         pos++;
 407         return next;
 408     }
 409 
 410     private void skipWhiteSpace() {
 411         while (pos < length) {
 412             switch (peek()) {
 413             case '\t':
 414             case '\r':
 415             case '\n':
 416             case ' ':
 417                 pos++;
 418                 break;
 419             default:
 420                 return;
 421             }
 422         }
 423     }
 424 
 425     private static String toString(final int c) {
 426         return c == EOF ? "eof" : String.valueOf((char) c);
 427     }
 428 
 429     ParserException error(final String message, final int start, final int length) throws ParserException {
 430         final long token     = Token.toDesc(STRING, start, length);
 431         final int  pos       = Token.descPosition(token);
 432         final Source src     = Source.sourceFor("<json>", source);
 433         final int  lineNum   = src.getLine(pos);
 434         final int  columnNum = src.getColumn(pos);
 435         final String formatted = ErrorManager.format(message, src, lineNum, columnNum, token);
 436         return new ParserException(JSErrorType.SYNTAX_ERROR, formatted, src, lineNum, columnNum, token);
 437     }
 438 
 439     private ParserException error(final String message, final int start) {
 440         return error(message, start, length);
 441     }
 442 
 443     private ParserException numberError(final int start) {
 444         return error(Lexer.message("json.invalid.number"), start);
 445     }
 446 
 447     private ParserException expectedError(final int start, final String expected, final String found) {
 448         return error(AbstractParser.message("expected", expected, found), start);
 449     }
 450 
 451     private ParserException syntaxError(final int start, final String reason) {
 452         final String message = ECMAErrors.getMessage("syntax.error.invalid.json", reason);
 453         return error(message, start);
 454     }
 455 }