1 /*
   2  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import java.util.ArrayList;
  29 import java.util.List;
  30 import jdk.nashorn.internal.codegen.ObjectClassGenerator;
  31 import jdk.nashorn.internal.objects.Global;
  32 import jdk.nashorn.internal.runtime.ECMAErrors;
  33 import jdk.nashorn.internal.runtime.ErrorManager;
  34 import jdk.nashorn.internal.runtime.JSErrorType;
  35 import jdk.nashorn.internal.runtime.JSType;
  36 import jdk.nashorn.internal.runtime.ParserException;
  37 import jdk.nashorn.internal.runtime.Property;
  38 import jdk.nashorn.internal.runtime.PropertyMap;
  39 import jdk.nashorn.internal.runtime.ScriptObject;
  40 import jdk.nashorn.internal.runtime.Source;
  41 import jdk.nashorn.internal.runtime.SpillProperty;
  42 import jdk.nashorn.internal.runtime.arrays.ArrayData;
  43 import jdk.nashorn.internal.runtime.arrays.ArrayIndex;
  44 import jdk.nashorn.internal.scripts.JO;
  45 
  46 import static jdk.nashorn.internal.parser.TokenType.STRING;
  47 
  48 /**
  49  * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser.
  50  *
  51  * See: 15.12.1.2 The JSON Syntactic Grammar
  52  */
  53 public class JSONParser {
  54 
  55     final private String source;
  56     final private Global global;
  57     final int length;
  58     int pos = 0;
  59 
  60     private static PropertyMap EMPTY_MAP = PropertyMap.newMap();
  61 
  62     private static final int EOF = -1;
  63 
  64     private static final String TRUE  = "true";
  65     private static final String FALSE = "false";
  66     private static final String NULL  = "null";
  67 
  68     private static final int STATE_EMPTY          = 0;
  69     private static final int STATE_ELEMENT_PARSED = 1;
  70     private static final int STATE_COMMA_PARSED   = 2;
  71 
  72     /**
  73      * Constructor
  74      * @param source  the source
  75      * @param global the global object
  76      */
  77     public JSONParser(final String source, final Global global ) {
  78         this.source = source;
  79         this.global = global;
  80         this.length = source.length();
  81     }
  82 
  83     /**
  84      * Implementation of the Quote(value) operation as defined in the ECMA script spec
  85      * It wraps a String value in double quotes and escapes characters within in
  86      *
  87      * @param value string to quote
  88      *
  89      * @return quoted and escaped string
  90      */
  91     public static String quote(final String value) {
  92 
  93         final StringBuilder product = new StringBuilder();
  94 
  95         product.append("\"");
  96 
  97         for (final char ch : value.toCharArray()) {
  98             // TODO: should use a table?
  99             switch (ch) {
 100             case '\\':
 101                 product.append("\\\\");
 102                 break;
 103             case '"':
 104                 product.append("\\\"");
 105                 break;
 106             case '\b':
 107                 product.append("\\b");
 108                 break;
 109             case '\f':
 110                 product.append("\\f");
 111                 break;
 112             case '\n':
 113                 product.append("\\n");
 114                 break;
 115             case '\r':
 116                 product.append("\\r");
 117                 break;
 118             case '\t':
 119                 product.append("\\t");
 120                 break;
 121             default:
 122                 if (ch < ' ') {
 123                     product.append(Lexer.unicodeEscape(ch));
 124                     break;
 125                 }
 126 
 127                 product.append(ch);
 128                 break;
 129             }
 130         }
 131 
 132         product.append("\"");
 133 
 134         return product.toString();
 135     }
 136 
 137     /**
 138      * Public parse method. Parse a string into a JSON object.
 139      *
 140      * @return the parsed JSON Object
 141      */
 142     public Object parse() {
 143         final Object value = parseLiteral();
 144         skipWhiteSpace();
 145         if (pos < length) {
 146             throw expectedError(pos, "eof", toString(peek()));
 147         }
 148         return value;
 149     }
 150 
 151     private Object parseLiteral() {
 152         skipWhiteSpace();
 153 
 154         final int c = peek();
 155         if (c == EOF) {
 156             throw expectedError(pos, "json literal", "eof");
 157         }
 158         switch (c) {
 159         case '{':
 160             return parseObject();
 161         case '[':
 162             return parseArray();
 163         case '"':
 164             return parseString();
 165         case 'f':
 166             return parseKeyword(FALSE, Boolean.FALSE);
 167         case 't':
 168             return parseKeyword(TRUE, Boolean.TRUE);
 169         case 'n':
 170             return parseKeyword(NULL, null);
 171         default:
 172             if (isDigit(c) || c == '-') {
 173                 return parseNumber();
 174             } else if (c == '.') {
 175                 throw numberError(pos);
 176             } else {
 177                 throw expectedError(pos, "json literal", toString(c));
 178             }
 179         }
 180     }
 181 
 182     private Object parseObject() {
 183         PropertyMap propertyMap = EMPTY_MAP;
 184         ArrayData arrayData = ArrayData.EMPTY_ARRAY;
 185         final ArrayList<Object> values = new ArrayList<>();
 186         int state = STATE_EMPTY;
 187 
 188         assert peek() == '{';
 189         pos++;
 190 
 191         while (pos < length) {
 192             skipWhiteSpace();
 193             final int c = peek();
 194 
 195             switch (c) {
 196             case '"':
 197                 if (state == STATE_ELEMENT_PARSED) {
 198                     throw expectedError(pos - 1, ", or }", toString(c));
 199                 }
 200                 final String id = parseString();
 201                 expectColon();
 202                 final Object value = parseLiteral();
 203                 final int index = ArrayIndex.getArrayIndex(id);
 204                 if (ArrayIndex.isValidArrayIndex(index)) {
 205                     arrayData = addArrayElement(arrayData, index, value);
 206                 } else {
 207                     propertyMap = addObjectProperty(propertyMap, values, id, value);
 208                 }
 209                 state = STATE_ELEMENT_PARSED;
 210                 break;
 211             case ',':
 212                 if (state != STATE_ELEMENT_PARSED) {
 213                     throw error(AbstractParser.message("trailing.comma.in.json"), pos);
 214                 }
 215                 state = STATE_COMMA_PARSED;
 216                 pos++;
 217                 break;
 218             case '}':
 219                 if (state == STATE_COMMA_PARSED) {
 220                     throw error(AbstractParser.message("trailing.comma.in.json"), pos);
 221                 }
 222                 pos++;
 223                 return createObject(propertyMap, values, arrayData);
 224             default:
 225                 throw expectedError(pos, ", or }", toString(c));
 226             }
 227         }
 228         throw expectedError(pos, ", or }", "eof");
 229     }
 230 
 231     private static ArrayData addArrayElement(final ArrayData arrayData, final int index, final Object value) {
 232         final long oldLength = arrayData.length();
 233         final long longIndex = ArrayIndex.toLongIndex(index);
 234         ArrayData newArrayData = arrayData;
 235         if (longIndex >= oldLength) {
 236             newArrayData = newArrayData.ensure(longIndex);
 237             if (longIndex > oldLength) {
 238                 newArrayData = newArrayData.delete(oldLength, longIndex - 1);
 239             }
 240         }
 241         return newArrayData.set(index, value, false);
 242     }
 243 
 244     private static PropertyMap addObjectProperty(final PropertyMap propertyMap, final List<Object> values,
 245                                                  final String id, final Object value) {
 246         final Property oldProperty = propertyMap.findProperty(id);
 247         final Property newProperty;
 248         final PropertyMap newMap;
 249         final Class<?> type = ObjectClassGenerator.OBJECT_FIELDS_ONLY ? Object.class : getType(value);
 250 
 251         if (oldProperty != null) {
 252             values.set(oldProperty.getSlot(), value);
 253             newProperty = new SpillProperty(id, 0, oldProperty.getSlot());
 254             newProperty.setType(type);
 255             newMap = propertyMap.replaceProperty(oldProperty, newProperty);;
 256         } else {
 257             values.add(value);
 258             newProperty = new SpillProperty(id, 0, propertyMap.size());
 259             newProperty.setType(type);
 260             newMap = propertyMap.addProperty(newProperty);
 261         }
 262 
 263         return newMap;
 264     }
 265 
 266     private Object createObject(final PropertyMap propertyMap, final List<Object> values, final ArrayData arrayData) {
 267         final long[] primitiveSpill = new long[values.size()];
 268         final Object[] objectSpill = new Object[values.size()];
 269 
 270         for (final Property property : propertyMap.getProperties()) {
 271             if (property.getType() == Object.class) {
 272                 objectSpill[property.getSlot()] = values.get(property.getSlot());
 273             } else {
 274                 primitiveSpill[property.getSlot()] = ObjectClassGenerator.pack((Number) values.get(property.getSlot()));
 275             }
 276         }
 277 
 278         final ScriptObject object = new JO(propertyMap, primitiveSpill, objectSpill);
 279         object.setInitialProto(global.getObjectPrototype());
 280         object.setArray(arrayData);
 281         return object;
 282     }
 283 
 284     private static Class<?> getType(final Object value) {
 285         if (value instanceof Integer) {
 286             return int.class;
 287         } else if (value instanceof Long) {
 288             return long.class;
 289         } else if (value instanceof Double) {
 290             return double.class;
 291         } else {
 292             return Object.class;
 293         }
 294     }
 295 
 296     private void expectColon() {
 297         skipWhiteSpace();
 298         final int n = next();
 299         if (n != ':') {
 300             throw expectedError(pos - 1, ":", toString(n));
 301         }
 302     }
 303 
 304     private Object parseArray() {
 305         ArrayData arrayData = ArrayData.EMPTY_ARRAY;
 306         int state = STATE_EMPTY;
 307 
 308         assert peek() == '[';
 309         pos++;
 310 
 311         while (pos < length) {
 312             skipWhiteSpace();
 313             final int c = peek();
 314 
 315             switch (c) {
 316             case ',':
 317                 if (state != STATE_ELEMENT_PARSED) {
 318                     throw error(AbstractParser.message("trailing.comma.in.json"), pos);
 319                 }
 320                 state = STATE_COMMA_PARSED;
 321                 pos++;
 322                 break;
 323             case ']':
 324                 if (state == STATE_COMMA_PARSED) {
 325                     throw error(AbstractParser.message("trailing.comma.in.json"), pos);
 326                 }
 327                 pos++;
 328                 return global.wrapAsObject(arrayData);
 329             default:
 330                 if (state == STATE_ELEMENT_PARSED) {
 331                     throw expectedError(pos, ", or ]", toString(c));
 332                 }
 333                 final long index = arrayData.length();
 334                 arrayData = arrayData.ensure(index).set((int) index, parseLiteral(), true);
 335                 state = STATE_ELEMENT_PARSED;
 336                 break;
 337             }
 338         }
 339 
 340         throw expectedError(pos, ", or ]", "eof");
 341     }
 342 
 343     private String parseString() {
 344         // String buffer is only instantiated if string contains escape sequences.
 345         int start = ++pos;
 346         StringBuilder sb = null;
 347 
 348         while (pos < length) {
 349             final int c = next();
 350             if (c <= 0x1f) {
 351                 // Characters < 0x1f are not allowed in JSON strings.
 352                 throw syntaxError(pos, "String contains control character");
 353 
 354             } else if (c == '\\') {
 355                 if (sb == null) {
 356                     sb = new StringBuilder(pos - start + 16);
 357                 }
 358                 sb.append(source, start, pos - 1);
 359                 sb.append(parseEscapeSequence());
 360                 start = pos;
 361 
 362             } else if (c == '"') {
 363                 if (sb != null) {
 364                     sb.append(source, start, pos - 1);
 365                     return sb.toString();
 366                 }
 367                 return source.substring(start, pos - 1);
 368             }
 369         }
 370 
 371         throw error(Lexer.message("missing.close.quote"), pos, length);
 372     }
 373 
 374     private char parseEscapeSequence() {
 375         final int c = next();
 376         switch (c) {
 377         case '"':
 378             return '"';
 379         case '\\':
 380             return '\\';
 381         case '/':
 382             return '/';
 383         case 'b':
 384             return '\b';
 385         case 'f':
 386             return '\f';
 387         case 'n':
 388             return '\n';
 389         case 'r':
 390             return '\r';
 391         case 't':
 392             return '\t';
 393         case 'u':
 394             return parseUnicodeEscape();
 395         default:
 396             throw error(Lexer.message("invalid.escape.char"), pos - 1, length);
 397         }
 398     }
 399 
 400     private char parseUnicodeEscape() {
 401         return (char) (parseHexDigit() << 12 | parseHexDigit() << 8 | parseHexDigit() << 4 | parseHexDigit());
 402     }
 403 
 404     private int parseHexDigit() {
 405         final int c = next();
 406         if (c >= '0' && c <= '9') {
 407             return c - '0';
 408         } else if (c >= 'A' && c <= 'F') {
 409             return c + 10 - 'A';
 410         } else if (c >= 'a' && c <= 'f') {
 411             return c + 10 - 'a';
 412         }
 413         throw error(Lexer.message("invalid.hex"), pos - 1, length);
 414     }
 415 
 416     private boolean isDigit(final int c) {
 417         return c >= '0' && c <= '9';
 418     }
 419 
 420     private void skipDigits() {
 421         while (pos < length) {
 422             final int c = peek();
 423             if (!isDigit(c)) {
 424                 break;
 425             }
 426             pos++;
 427         }
 428     }
 429 
 430     private Number parseNumber() {
 431         final int start = pos;
 432         int c = next();
 433 
 434         if (c == '-') {
 435             c = next();
 436         }
 437         if (!isDigit(c)) {
 438             throw numberError(start);
 439         }
 440         // no more digits allowed after 0
 441         if (c != '0') {
 442             skipDigits();
 443         }
 444 
 445         // fraction
 446         if (peek() == '.') {
 447             pos++;
 448             if (!isDigit(next())) {
 449                 throw numberError(pos - 1);
 450             }
 451             skipDigits();
 452         }
 453 
 454         // exponent
 455         c = peek();
 456         if (c == 'e' || c == 'E') {
 457             pos++;
 458             c = next();
 459             if (c == '-' || c == '+') {
 460                 c = next();
 461             }
 462             if (!isDigit(c)) {
 463                 throw numberError(pos - 1);
 464             }
 465             skipDigits();
 466         }
 467 
 468         final double d = Double.parseDouble(source.substring(start, pos));
 469         if (JSType.isRepresentableAsInt(d)) {
 470             return (int) d;
 471         } else if (JSType.isRepresentableAsLong(d)) {
 472             return (long) d;
 473         }
 474         return d;
 475     }
 476 
 477     private Object parseKeyword(final String keyword, final Object value) {
 478         if (!source.regionMatches(pos, keyword, 0, keyword.length())) {
 479             throw expectedError(pos, "json literal", "ident");
 480         }
 481         pos += keyword.length();
 482         return value;
 483     }
 484 
 485     private int peek() {
 486         if (pos >= length) {
 487             return -1;
 488         }
 489         return source.charAt(pos);
 490     }
 491 
 492     private int next() {
 493         final int next = peek();
 494         pos++;
 495         return next;
 496     }
 497 
 498     private void skipWhiteSpace() {
 499         while (pos < length) {
 500             switch (peek()) {
 501             case '\t':
 502             case '\r':
 503             case '\n':
 504             case ' ':
 505                 pos++;
 506                 break;
 507             default:
 508                 return;
 509             }
 510         }
 511     }
 512 
 513     private static String toString(final int c) {
 514         return c == EOF ? "eof" : String.valueOf((char) c);
 515     }
 516 
 517     ParserException error(final String message, final int start, final int length) throws ParserException {
 518         final long token     = Token.toDesc(STRING, start, length);
 519         final int  pos       = Token.descPosition(token);
 520         final Source src     = Source.sourceFor("<json>", source);
 521         final int  lineNum   = src.getLine(pos);
 522         final int  columnNum = src.getColumn(pos);
 523         final String formatted = ErrorManager.format(message, src, lineNum, columnNum, token);
 524         return new ParserException(JSErrorType.SYNTAX_ERROR, formatted, src, lineNum, columnNum, token);
 525     }
 526 
 527     private ParserException error(final String message, final int start) {
 528         return error(message, start, length);
 529     }
 530 
 531     private ParserException numberError(final int start) {
 532         return error(Lexer.message("json.invalid.number"), start);
 533     }
 534 
 535     private ParserException expectedError(final int start, final String expected, final String found) {
 536         return error(AbstractParser.message("expected", expected, found), start);
 537     }
 538 
 539     private ParserException syntaxError(final int start, final String reason) {
 540         final String message = ECMAErrors.getMessage("syntax.error.invalid.json", reason);
 541         return error(message, start);
 542     }
 543 }