1 /* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import java.util.ArrayList; 29 import java.util.List; 30 import jdk.nashorn.internal.codegen.ObjectClassGenerator; 31 import jdk.nashorn.internal.objects.Global; 32 import jdk.nashorn.internal.runtime.ECMAErrors; 33 import jdk.nashorn.internal.runtime.ErrorManager; 34 import jdk.nashorn.internal.runtime.JSErrorType; 35 import jdk.nashorn.internal.runtime.JSType; 36 import jdk.nashorn.internal.runtime.ParserException; 37 import jdk.nashorn.internal.runtime.Property; 38 import jdk.nashorn.internal.runtime.PropertyMap; 39 import jdk.nashorn.internal.runtime.ScriptObject; 40 import jdk.nashorn.internal.runtime.Source; 41 import jdk.nashorn.internal.runtime.SpillProperty; 42 import jdk.nashorn.internal.runtime.arrays.ArrayData; 43 import jdk.nashorn.internal.runtime.arrays.ArrayIndex; 44 import jdk.nashorn.internal.scripts.JO; 45 46 import static jdk.nashorn.internal.parser.TokenType.STRING; 47 48 /** 49 * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser. 50 * 51 * See: 15.12.1.2 The JSON Syntactic Grammar 52 */ 53 public class JSONParser { 54 55 final private String source; 56 final private Global global; 57 final int length; 58 int pos = 0; 59 60 private static PropertyMap EMPTY_MAP = PropertyMap.newMap(); 61 62 private static final int EOF = -1; 63 64 private static final String TRUE = "true"; 65 private static final String FALSE = "false"; 66 private static final String NULL = "null"; 67 68 private static final int STATE_EMPTY = 0; 69 private static final int STATE_ELEMENT_PARSED = 1; 70 private static final int STATE_COMMA_PARSED = 2; 71 72 /** 73 * Constructor 74 * @param source the source 75 * @param global the global object 76 */ 77 public JSONParser(final String source, final Global global ) { 78 this.source = source; 79 this.global = global; 80 this.length = source.length(); 81 } 82 83 /** 84 * Implementation of the Quote(value) operation as defined in the ECMA script spec 85 * It wraps a String value in double quotes and escapes characters within in 86 * 87 * @param value string to quote 88 * 89 * @return quoted and escaped string 90 */ 91 public static String quote(final String value) { 92 93 final StringBuilder product = new StringBuilder(); 94 95 product.append("\""); 96 97 for (final char ch : value.toCharArray()) { 98 // TODO: should use a table? 99 switch (ch) { 100 case '\\': 101 product.append("\\\\"); 102 break; 103 case '"': 104 product.append("\\\""); 105 break; 106 case '\b': 107 product.append("\\b"); 108 break; 109 case '\f': 110 product.append("\\f"); 111 break; 112 case '\n': 113 product.append("\\n"); 114 break; 115 case '\r': 116 product.append("\\r"); 117 break; 118 case '\t': 119 product.append("\\t"); 120 break; 121 default: 122 if (ch < ' ') { 123 product.append(Lexer.unicodeEscape(ch)); 124 break; 125 } 126 127 product.append(ch); 128 break; 129 } 130 } 131 132 product.append("\""); 133 134 return product.toString(); 135 } 136 137 /** 138 * Public parse method. Parse a string into a JSON object. 139 * 140 * @return the parsed JSON Object 141 */ 142 public Object parse() { 143 final Object value = parseLiteral(); 144 skipWhiteSpace(); 145 if (pos < length) { 146 throw expectedError(pos, "eof", toString(peek())); 147 } 148 return value; 149 } 150 151 private Object parseLiteral() { 152 skipWhiteSpace(); 153 154 final int c = peek(); 155 if (c == EOF) { 156 throw expectedError(pos, "json literal", "eof"); 157 } 158 switch (c) { 159 case '{': 160 return parseObject(); 161 case '[': 162 return parseArray(); 163 case '"': 164 return parseString(); 165 case 'f': 166 return parseKeyword(FALSE, Boolean.FALSE); 167 case 't': 168 return parseKeyword(TRUE, Boolean.TRUE); 169 case 'n': 170 return parseKeyword(NULL, null); 171 default: 172 if (isDigit(c) || c == '-') { 173 return parseNumber(); 174 } else if (c == '.') { 175 throw numberError(pos); 176 } else { 177 throw expectedError(pos, "json literal", toString(c)); 178 } 179 } 180 } 181 182 private Object parseObject() { 183 PropertyMap propertyMap = EMPTY_MAP; 184 ArrayData arrayData = ArrayData.EMPTY_ARRAY; 185 final ArrayList<Object> values = new ArrayList<>(); 186 int state = STATE_EMPTY; 187 188 assert peek() == '{'; 189 pos++; 190 191 while (pos < length) { 192 skipWhiteSpace(); 193 final int c = peek(); 194 195 switch (c) { 196 case '"': 197 if (state == STATE_ELEMENT_PARSED) { 198 throw expectedError(pos - 1, ", or }", toString(c)); 199 } 200 final String id = parseString(); 201 expectColon(); 202 final Object value = parseLiteral(); 203 final int index = ArrayIndex.getArrayIndex(id); 204 if (ArrayIndex.isValidArrayIndex(index)) { 205 arrayData = addArrayElement(arrayData, index, value); 206 } else { 207 propertyMap = addObjectProperty(propertyMap, values, id, value); 208 } 209 state = STATE_ELEMENT_PARSED; 210 break; 211 case ',': 212 if (state != STATE_ELEMENT_PARSED) { 213 throw error(AbstractParser.message("trailing.comma.in.json"), pos); 214 } 215 state = STATE_COMMA_PARSED; 216 pos++; 217 break; 218 case '}': 219 if (state == STATE_COMMA_PARSED) { 220 throw error(AbstractParser.message("trailing.comma.in.json"), pos); 221 } 222 pos++; 223 return createObject(propertyMap, values, arrayData); 224 default: 225 throw expectedError(pos, ", or }", toString(c)); 226 } 227 } 228 throw expectedError(pos, ", or }", "eof"); 229 } 230 231 private static ArrayData addArrayElement(final ArrayData arrayData, final int index, final Object value) { 232 final long oldLength = arrayData.length(); 233 final long longIndex = ArrayIndex.toLongIndex(index); 234 ArrayData newArrayData = arrayData; 235 if (longIndex > oldLength) { 236 if (arrayData.canDelete(oldLength, longIndex - 1, false)) { 237 newArrayData = newArrayData.delete(oldLength, longIndex - 1); 238 } 239 } 240 return newArrayData.ensure(longIndex).set(index, value, false); 241 } 242 243 private static PropertyMap addObjectProperty(final PropertyMap propertyMap, final List<Object> values, 244 final String id, final Object value) { 245 final Property oldProperty = propertyMap.findProperty(id); 246 final Property newProperty; 247 final PropertyMap newMap; 248 final Class<?> type = ObjectClassGenerator.OBJECT_FIELDS_ONLY ? Object.class : getType(value); 249 250 if (oldProperty != null) { 251 values.set(oldProperty.getSlot(), value); 252 newProperty = new SpillProperty(id, 0, oldProperty.getSlot()); 253 newProperty.setType(type); 254 newMap = propertyMap.replaceProperty(oldProperty, newProperty);; 255 } else { 256 values.add(value); 257 newProperty = new SpillProperty(id, 0, propertyMap.size()); 258 newProperty.setType(type); 259 newMap = propertyMap.addProperty(newProperty); 260 } 261 262 return newMap; 263 } 264 265 private Object createObject(final PropertyMap propertyMap, final List<Object> values, final ArrayData arrayData) { 266 final long[] primitiveSpill = new long[values.size()]; 267 final Object[] objectSpill = new Object[values.size()]; 268 269 for (final Property property : propertyMap.getProperties()) { 270 if (property.getType() == Object.class) { 271 objectSpill[property.getSlot()] = values.get(property.getSlot()); 272 } else { 273 primitiveSpill[property.getSlot()] = ObjectClassGenerator.pack((Number) values.get(property.getSlot())); 274 } 275 } 276 277 final ScriptObject object = new JO(propertyMap, primitiveSpill, objectSpill); 278 object.setInitialProto(global.getObjectPrototype()); 279 object.setArray(arrayData); 280 return object; 281 } 282 283 private static Class<?> getType(final Object value) { 284 if (value instanceof Integer) { 285 return int.class; 286 } else if (value instanceof Long) { 287 return long.class; 288 } else if (value instanceof Double) { 289 return double.class; 290 } else { 291 return Object.class; 292 } 293 } 294 295 private void expectColon() { 296 skipWhiteSpace(); 297 final int n = next(); 298 if (n != ':') { 299 throw expectedError(pos - 1, ":", toString(n)); 300 } 301 } 302 303 private Object parseArray() { 304 ArrayData arrayData = ArrayData.EMPTY_ARRAY; 305 int state = STATE_EMPTY; 306 307 assert peek() == '['; 308 pos++; 309 310 while (pos < length) { 311 skipWhiteSpace(); 312 final int c = peek(); 313 314 switch (c) { 315 case ',': 316 if (state != STATE_ELEMENT_PARSED) { 317 throw error(AbstractParser.message("trailing.comma.in.json"), pos); 318 } 319 state = STATE_COMMA_PARSED; 320 pos++; 321 break; 322 case ']': 323 if (state == STATE_COMMA_PARSED) { 324 throw error(AbstractParser.message("trailing.comma.in.json"), pos); 325 } 326 pos++; 327 return global.wrapAsObject(arrayData); 328 default: 329 if (state == STATE_ELEMENT_PARSED) { 330 throw expectedError(pos, ", or ]", toString(c)); 331 } 332 final long index = arrayData.length(); 333 arrayData = arrayData.ensure(index).set((int) index, parseLiteral(), true); 334 state = STATE_ELEMENT_PARSED; 335 break; 336 } 337 } 338 339 throw expectedError(pos, ", or ]", "eof"); 340 } 341 342 private String parseString() { 343 // String buffer is only instantiated if string contains escape sequences. 344 int start = ++pos; 345 StringBuilder sb = null; 346 347 while (pos < length) { 348 final int c = next(); 349 if (c <= 0x1f) { 350 // Characters < 0x1f are not allowed in JSON strings. 351 throw syntaxError(pos, "String contains control character"); 352 353 } else if (c == '\\') { 354 if (sb == null) { 355 sb = new StringBuilder(pos - start + 16); 356 } 357 sb.append(source, start, pos - 1); 358 sb.append(parseEscapeSequence()); 359 start = pos; 360 361 } else if (c == '"') { 362 if (sb != null) { 363 sb.append(source, start, pos - 1); 364 return sb.toString(); 365 } 366 return source.substring(start, pos - 1); 367 } 368 } 369 370 throw error(Lexer.message("missing.close.quote"), pos, length); 371 } 372 373 private char parseEscapeSequence() { 374 final int c = next(); 375 switch (c) { 376 case '"': 377 return '"'; 378 case '\\': 379 return '\\'; 380 case '/': 381 return '/'; 382 case 'b': 383 return '\b'; 384 case 'f': 385 return '\f'; 386 case 'n': 387 return '\n'; 388 case 'r': 389 return '\r'; 390 case 't': 391 return '\t'; 392 case 'u': 393 return parseUnicodeEscape(); 394 default: 395 throw error(Lexer.message("invalid.escape.char"), pos - 1, length); 396 } 397 } 398 399 private char parseUnicodeEscape() { 400 return (char) (parseHexDigit() << 12 | parseHexDigit() << 8 | parseHexDigit() << 4 | parseHexDigit()); 401 } 402 403 private int parseHexDigit() { 404 final int c = next(); 405 if (c >= '0' && c <= '9') { 406 return c - '0'; 407 } else if (c >= 'A' && c <= 'F') { 408 return c + 10 - 'A'; 409 } else if (c >= 'a' && c <= 'f') { 410 return c + 10 - 'a'; 411 } 412 throw error(Lexer.message("invalid.hex"), pos - 1, length); 413 } 414 415 private boolean isDigit(final int c) { 416 return c >= '0' && c <= '9'; 417 } 418 419 private void skipDigits() { 420 while (pos < length) { 421 final int c = peek(); 422 if (!isDigit(c)) { 423 break; 424 } 425 pos++; 426 } 427 } 428 429 private Number parseNumber() { 430 final int start = pos; 431 int c = next(); 432 433 if (c == '-') { 434 c = next(); 435 } 436 if (!isDigit(c)) { 437 throw numberError(start); 438 } 439 // no more digits allowed after 0 440 if (c != '0') { 441 skipDigits(); 442 } 443 444 // fraction 445 if (peek() == '.') { 446 pos++; 447 if (!isDigit(next())) { 448 throw numberError(pos - 1); 449 } 450 skipDigits(); 451 } 452 453 // exponent 454 c = peek(); 455 if (c == 'e' || c == 'E') { 456 pos++; 457 c = next(); 458 if (c == '-' || c == '+') { 459 c = next(); 460 } 461 if (!isDigit(c)) { 462 throw numberError(pos - 1); 463 } 464 skipDigits(); 465 } 466 467 final double d = Double.parseDouble(source.substring(start, pos)); 468 if (JSType.isRepresentableAsInt(d)) { 469 return (int) d; 470 } else if (JSType.isRepresentableAsLong(d)) { 471 return (long) d; 472 } 473 return d; 474 } 475 476 private Object parseKeyword(final String keyword, final Object value) { 477 if (!source.regionMatches(pos, keyword, 0, keyword.length())) { 478 throw expectedError(pos, "json literal", "ident"); 479 } 480 pos += keyword.length(); 481 return value; 482 } 483 484 private int peek() { 485 if (pos >= length) { 486 return -1; 487 } 488 return source.charAt(pos); 489 } 490 491 private int next() { 492 final int next = peek(); 493 pos++; 494 return next; 495 } 496 497 private void skipWhiteSpace() { 498 while (pos < length) { 499 switch (peek()) { 500 case '\t': 501 case '\r': 502 case '\n': 503 case ' ': 504 pos++; 505 break; 506 default: 507 return; 508 } 509 } 510 } 511 512 private static String toString(final int c) { 513 return c == EOF ? "eof" : String.valueOf((char) c); 514 } 515 516 ParserException error(final String message, final int start, final int length) throws ParserException { 517 final long token = Token.toDesc(STRING, start, length); 518 final int pos = Token.descPosition(token); 519 final Source src = Source.sourceFor("<json>", source); 520 final int lineNum = src.getLine(pos); 521 final int columnNum = src.getColumn(pos); 522 final String formatted = ErrorManager.format(message, src, lineNum, columnNum, token); 523 return new ParserException(JSErrorType.SYNTAX_ERROR, formatted, src, lineNum, columnNum, token); 524 } 525 526 private ParserException error(final String message, final int start) { 527 return error(message, start, length); 528 } 529 530 private ParserException numberError(final int start) { 531 return error(Lexer.message("json.invalid.number"), start); 532 } 533 534 private ParserException expectedError(final int start, final String expected, final String found) { 535 return error(AbstractParser.message("expected", expected, found), start); 536 } 537 538 private ParserException syntaxError(final int start, final String reason) { 539 final String message = ECMAErrors.getMessage("syntax.error.invalid.json", reason); 540 return error(message, start); 541 } 542 }