1 /* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import java.util.ArrayList; 29 import java.util.LinkedHashMap; 30 import java.util.List; 31 import java.util.Map; 32 import jdk.nashorn.internal.runtime.ECMAErrors; 33 import jdk.nashorn.internal.runtime.ErrorManager; 34 import jdk.nashorn.internal.runtime.JSErrorType; 35 import jdk.nashorn.internal.runtime.JSType; 36 import jdk.nashorn.internal.runtime.ParserException; 37 import jdk.nashorn.internal.runtime.Source; 38 39 import static jdk.nashorn.internal.parser.TokenType.STRING; 40 41 /** 42 * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser. 43 * 44 * See: 15.12.1.2 The JSON Syntactic Grammar 45 */ 46 public class JSONParser { 47 48 final private String source; 49 final int length; 50 int pos = 0; 51 52 private static final int EOF = -1; 53 54 private static final String TRUE = "true"; 55 private static final String FALSE = "false"; 56 private static final String NULL = "null"; 57 58 private static final int STATE_EMPTY = 0; 59 private static final int STATE_ELEMENT_PARSED = 1; 60 private static final int STATE_COMMA_PARSED = 2; 61 62 /** 63 * Constructor 64 * @param source the source 65 */ 66 public JSONParser(final String source) { 67 this.source = source; 68 this.length = source.length(); 69 } 70 71 /** 72 * Implementation of the Quote(value) operation as defined in the ECMA script spec 73 * It wraps a String value in double quotes and escapes characters within in 74 * 75 * @param value string to quote 76 * 77 * @return quoted and escaped string 78 */ 79 public static String quote(final String value) { 80 81 final StringBuilder product = new StringBuilder(); 82 83 product.append("\""); 84 85 for (final char ch : value.toCharArray()) { 86 // TODO: should use a table? 87 switch (ch) { 88 case '\\': 89 product.append("\\\\"); 90 break; 91 case '"': 92 product.append("\\\""); 93 break; 94 case '\b': 95 product.append("\\b"); 96 break; 97 case '\f': 98 product.append("\\f"); 99 break; 100 case '\n': 101 product.append("\\n"); 102 break; 103 case '\r': 104 product.append("\\r"); 105 break; 106 case '\t': 107 product.append("\\t"); 108 break; 109 default: 110 if (ch < ' ') { 111 product.append(Lexer.unicodeEscape(ch)); 112 break; 113 } 114 115 product.append(ch); 116 break; 117 } 118 } 119 120 product.append("\""); 121 122 return product.toString(); 123 } 124 125 /** 126 * Public parse method. Parse a string into a JSON object. 127 * 128 * @return the parsed JSON Object 129 */ 130 public Object parse() { 131 final Object value = parseLiteral(); 132 skipWhiteSpace(); 133 if (pos < length) { 134 throw expectedError(pos, "eof", toString(peek())); 135 } 136 return value; 137 } 138 139 private Object parseLiteral() { 140 skipWhiteSpace(); 141 142 final int c = peek(); 143 if (c == EOF) { 144 throw expectedError(pos, "json literal", "eof"); 145 } 146 switch (c) { 147 case '{': 148 return parseObject(); 149 case '[': 150 return parseArray(); 151 case '"': 152 return parseString(); 153 case 'f': 154 return parseKeyword(FALSE, Boolean.FALSE); 155 case 't': 156 return parseKeyword(TRUE, Boolean.TRUE); 157 case 'n': 158 return parseKeyword(NULL, null); 159 default: 160 if (isDigit(c) || c == '-') { 161 return parseNumber(); 162 } else if (c == '.') { 163 throw numberError(pos); 164 } else { 165 throw expectedError(pos, "json literal", toString(c)); 166 } 167 } 168 } 169 170 private Object parseObject() { 171 final Map<String, Object> map = new LinkedHashMap<>(); 172 int state = STATE_EMPTY; 173 174 assert peek() == '{'; 175 pos++; 176 177 while (pos < length) { 178 skipWhiteSpace(); 179 final int c = peek(); 180 181 switch (c) { 182 case '"': 183 if (state == STATE_ELEMENT_PARSED) { 184 throw expectedError(pos - 1, ", or }", toString(c)); 185 } 186 final String id = parseString(); 187 skipWhiteSpace(); 188 final int n = next(); 189 if (n != ':') { 190 throw expectedError(pos - 1, ":", toString(n)); 191 } 192 final Object value = parseLiteral(); 193 map.put(id, value); 194 state = STATE_ELEMENT_PARSED; 195 break; 196 case ',': 197 if (state != STATE_ELEMENT_PARSED) { 198 throw error(AbstractParser.message("trailing.comma.in.json"), pos); 199 } 200 state = STATE_COMMA_PARSED; 201 pos++; 202 break; 203 case '}': 204 if (state == STATE_COMMA_PARSED) { 205 throw error(AbstractParser.message("trailing.comma.in.json"), pos); 206 } 207 pos++; 208 return map; 209 default: 210 throw expectedError(pos, ", or }", toString(c)); 211 } 212 } 213 throw expectedError(pos, ", or }", "eof"); 214 } 215 216 217 private Object parseArray() { 218 final List<Object> list = new ArrayList<>(); 219 int state = STATE_EMPTY; 220 221 assert peek() == '['; 222 pos++; 223 224 while (pos < length) { 225 skipWhiteSpace(); 226 final int c = peek(); 227 228 switch (c) { 229 case ',': 230 if (state != STATE_ELEMENT_PARSED) { 231 throw error(AbstractParser.message("trailing.comma.in.json"), pos); 232 } 233 state = STATE_COMMA_PARSED; 234 pos++; 235 break; 236 case ']': 237 if (state == STATE_COMMA_PARSED) { 238 throw error(AbstractParser.message("trailing.comma.in.json"), pos); 239 } 240 pos++; 241 return list; 242 default: 243 if (state == STATE_ELEMENT_PARSED) { 244 throw expectedError(pos, ", or ]", toString(c)); 245 } 246 list.add(parseLiteral()); 247 state = STATE_ELEMENT_PARSED; 248 break; 249 } 250 } 251 252 throw expectedError(pos, ", or ]", "eof"); 253 } 254 255 private String parseString() { 256 // String buffer is only instantiated if string contains escape sequences. 257 int start = ++pos; 258 StringBuilder sb = null; 259 260 while (pos < length) { 261 final int c = next(); 262 if (c <= 0x1f) { 263 // Characters < 0x1f are not allowed in JSON strings. 264 throw syntaxError(pos, "String contains control character"); 265 266 } else if (c == '\\') { 267 if (sb == null) { 268 sb = new StringBuilder(Math.max(32, (pos - start) * 2)); 269 } 270 sb.append(source, start, pos - 1); 271 sb.append(parseEscapeSequence()); 272 start = pos; 273 274 } else if (c == '"') { 275 if (sb != null) { 276 sb.append(source, start, pos - 1); 277 return sb.toString(); 278 } 279 return source.substring(start, pos - 1); 280 } 281 } 282 283 throw error(Lexer.message("missing.close.quote"), pos, length); 284 } 285 286 private char parseEscapeSequence() { 287 final int c = next(); 288 switch (c) { 289 case '"': 290 return '"'; 291 case '\\': 292 return '\\'; 293 case '/': 294 return '/'; 295 case 'b': 296 return '\b'; 297 case 'f': 298 return '\f'; 299 case 'n': 300 return '\n'; 301 case 'r': 302 return '\r'; 303 case 't': 304 return '\t'; 305 case 'u': 306 return parseUnicodeEscape(); 307 default: 308 throw error(Lexer.message("invalid.escape.char"), pos - 1, length); 309 } 310 } 311 312 private char parseUnicodeEscape() { 313 return (char) (parseHexDigit() << 12 | parseHexDigit() << 8 | parseHexDigit() << 4 | parseHexDigit()); 314 } 315 316 private int parseHexDigit() { 317 final int c = next(); 318 if (c >= '0' && c <= '9') { 319 return c - '0'; 320 } else if (c >= 'A' && c <= 'F') { 321 return c + 10 - 'A'; 322 } else if (c >= 'a' && c <= 'f') { 323 return c + 10 - 'a'; 324 } 325 throw error(Lexer.message("invalid.hex"), pos - 1, length); 326 } 327 328 private boolean isDigit(final int c) { 329 return c >= '0' && c <= '9'; 330 } 331 332 private void skipDigits() { 333 while (pos < length) { 334 final int c = peek(); 335 if (!isDigit(c)) { 336 break; 337 } 338 pos++; 339 } 340 } 341 342 private Number parseNumber() { 343 final int start = pos; 344 int c = next(); 345 346 if (c == '-') { 347 c = next(); 348 } 349 if (!isDigit(c)) { 350 throw numberError(start); 351 } 352 // no more digits allowed after 0 353 if (c != '0') { 354 skipDigits(); 355 } 356 357 // fraction 358 if (peek() == '.') { 359 pos++; 360 if (!isDigit(next())) { 361 throw numberError(pos - 1); 362 } 363 skipDigits(); 364 } 365 366 // exponent 367 c = peek(); 368 if (c == 'e' || c == 'E') { 369 pos++; 370 c = next(); 371 if (c == '-' || c == '+') { 372 c = next(); 373 } 374 if (!isDigit(c)) { 375 throw numberError(pos - 1); 376 } 377 skipDigits(); 378 } 379 380 final double d = Double.parseDouble(source.substring(start, pos)); 381 if (JSType.isRepresentableAsInt(d)) { 382 return (int) d; 383 } else if (JSType.isRepresentableAsLong(d)) { 384 return (long) d; 385 } 386 return d; 387 } 388 389 private Object parseKeyword(final String keyword, final Object value) { 390 if (!source.regionMatches(pos, keyword, 0, keyword.length())) { 391 throw expectedError(pos, "json literal", "ident"); 392 } 393 pos += keyword.length(); 394 return value; 395 } 396 397 private int peek() { 398 if (pos >= length) { 399 return -1; 400 } 401 return source.charAt(pos); 402 } 403 404 private int next() { 405 final int next = peek(); 406 pos++; 407 return next; 408 } 409 410 private void skipWhiteSpace() { 411 while (pos < length) { 412 switch (peek()) { 413 case '\t': 414 case '\r': 415 case '\n': 416 case ' ': 417 pos++; 418 break; 419 default: 420 return; 421 } 422 } 423 } 424 425 private static String toString(final int c) { 426 return c == EOF ? "eof" : String.valueOf((char) c); 427 } 428 429 ParserException error(final String message, final int start, final int length) throws ParserException { 430 final long token = Token.toDesc(STRING, start, length); 431 final int pos = Token.descPosition(token); 432 final Source src = Source.sourceFor("<json>", source); 433 final int lineNum = src.getLine(pos); 434 final int columnNum = src.getColumn(pos); 435 final String formatted = ErrorManager.format(message, src, lineNum, columnNum, token); 436 return new ParserException(JSErrorType.SYNTAX_ERROR, formatted, src, lineNum, columnNum, token); 437 } 438 439 private ParserException error(final String message, final int start) { 440 return error(message, start, length); 441 } 442 443 private ParserException numberError(final int start) { 444 return error(Lexer.message("json.invalid.number"), start); 445 } 446 447 private ParserException expectedError(final int start, final String expected, final String found) { 448 return error(AbstractParser.message("expected", expected, found), start); 449 } 450 451 private ParserException syntaxError(final int start, final String reason) { 452 final String message = ECMAErrors.getMessage("syntax.error.invalid.json", reason); 453 return error(message, start); 454 } 455 }