1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import static jdk.nashorn.internal.parser.TokenType.COLON; 29 import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT; 30 import static jdk.nashorn.internal.parser.TokenType.EOF; 31 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 32 import static jdk.nashorn.internal.parser.TokenType.RBRACE; 33 import static jdk.nashorn.internal.parser.TokenType.RBRACKET; 34 import static jdk.nashorn.internal.parser.TokenType.STRING; 35 import java.util.ArrayList; 36 import java.util.List; 37 import jdk.nashorn.internal.ir.Expression; 38 import jdk.nashorn.internal.ir.LiteralNode; 39 import jdk.nashorn.internal.ir.Node; 40 import jdk.nashorn.internal.ir.ObjectNode; 41 import jdk.nashorn.internal.ir.PropertyNode; 42 import jdk.nashorn.internal.ir.UnaryNode; 43 import jdk.nashorn.internal.runtime.ErrorManager; 44 import jdk.nashorn.internal.runtime.Source; 45 46 /** 47 * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser. 48 * 49 * See: 15.12.1.2 The JSON Syntactic Grammar 50 */ 51 public class JSONParser extends AbstractParser { 52 53 /** 54 * Constructor 55 * @param source the source 56 * @param errors the error manager 57 */ 58 public JSONParser(final Source source, final ErrorManager errors) { 59 super(source, errors, false, 0); 60 } 61 62 /** 63 * Implementation of the Quote(value) operation as defined in the ECMA script spec 64 * It wraps a String value in double quotes and escapes characters within in 65 * 66 * @param value string to quote 67 * 68 * @return quoted and escaped string 69 */ 70 public static String quote(final String value) { 71 72 final StringBuilder product = new StringBuilder(); 73 74 product.append("\""); 75 76 for (final char ch : value.toCharArray()) { 77 // TODO: should use a table? 78 switch (ch) { 79 case '\\': 80 product.append("\\\\"); 81 break; 82 case '"': 83 product.append("\\\""); 84 break; 85 case '\b': 86 product.append("\\b"); 87 break; 88 case '\f': 89 product.append("\\f"); 90 break; 91 case '\n': 92 product.append("\\n"); 93 break; 94 case '\r': 95 product.append("\\r"); 96 break; 97 case '\t': 98 product.append("\\t"); 99 break; 100 default: 101 if (ch < ' ') { 102 product.append(Lexer.unicodeEscape(ch)); 103 break; 104 } 105 106 product.append(ch); 107 break; 108 } 109 } 110 111 product.append("\""); 112 113 return product.toString(); 114 } 115 116 /** 117 * Public parsed method - start lexing a new token stream for 118 * a JSON script 119 * 120 * @return the JSON literal 121 */ 122 public Node parse() { 123 stream = new TokenStream(); 124 125 lexer = new Lexer(source, stream) { 126 127 @Override 128 protected boolean skipComments() { 129 return false; 130 } 131 132 @Override 133 protected boolean isStringDelimiter(final char ch) { 134 return ch == '\"'; 135 } 136 137 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace 138 @Override 139 protected boolean isWhitespace(final char ch) { 140 return Lexer.isJsonWhitespace(ch); 141 } 142 143 @Override 144 protected boolean isEOL(final char ch) { 145 return Lexer.isJsonEOL(ch); 146 } 147 148 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber 149 @Override 150 protected void scanNumber() { 151 // Record beginning of number. 152 final int startPosition = position; 153 // Assume value is a decimal. 154 TokenType valueType = TokenType.DECIMAL; 155 156 // floating point can't start with a "." with no leading digit before 157 if (ch0 == '.') { 158 error(Lexer.message("json.invalid.number"), STRING, position, limit); 159 } 160 161 // First digit of number. 162 final int digit = convertDigit(ch0, 10); 163 164 // skip first digit 165 skip(1); 166 167 if (digit != 0) { 168 // Skip over remaining digits. 169 while (convertDigit(ch0, 10) != -1) { 170 skip(1); 171 } 172 } 173 174 if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 175 // Must be a double. 176 if (ch0 == '.') { 177 // Skip period. 178 skip(1); 179 180 boolean mantissa = false; 181 // Skip mantissa. 182 while (convertDigit(ch0, 10) != -1) { 183 mantissa = true; 184 skip(1); 185 } 186 187 if (! mantissa) { 188 // no digit after "." 189 error(Lexer.message("json.invalid.number"), STRING, position, limit); 190 } 191 } 192 193 // Detect exponent. 194 if (ch0 == 'E' || ch0 == 'e') { 195 // Skip E. 196 skip(1); 197 // Detect and skip exponent sign. 198 if (ch0 == '+' || ch0 == '-') { 199 skip(1); 200 } 201 boolean exponent = false; 202 // Skip exponent. 203 while (convertDigit(ch0, 10) != -1) { 204 exponent = true; 205 skip(1); 206 } 207 208 if (! exponent) { 209 // no digit after "E" 210 error(Lexer.message("json.invalid.number"), STRING, position, limit); 211 } 212 } 213 214 valueType = TokenType.FLOATING; 215 } 216 217 // Add number token. 218 add(valueType, startPosition); 219 } 220 221 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter 222 @Override 223 protected boolean isEscapeCharacter(final char ch) { 224 switch (ch) { 225 case '"': 226 case '/': 227 case '\\': 228 case 'b': 229 case 'f': 230 case 'n': 231 case 'r': 232 case 't': 233 // could be unicode escape 234 case 'u': 235 return true; 236 default: 237 return false; 238 } 239 } 240 }; 241 242 k = -1; 243 244 next(); 245 246 final Node resultNode = jsonLiteral(); 247 expect(EOF); 248 249 return resultNode; 250 } 251 252 @SuppressWarnings("fallthrough") 253 private LiteralNode<?> getStringLiteral() { 254 final LiteralNode<?> literal = getLiteral(); 255 final String str = (String)literal.getValue(); 256 257 for (int i = 0; i < str.length(); i++) { 258 final char ch = str.charAt(i); 259 switch (ch) { 260 default: 261 if (ch > 0x001f) { 262 break; 263 } 264 case '"': 265 case '\\': 266 throw error(AbstractParser.message("unexpected.token", str)); 267 } 268 } 269 270 return literal; 271 } 272 273 /** 274 * Parse a JSON literal from the token stream 275 * @return the JSON literal as a Node 276 */ 277 private Expression jsonLiteral() { 278 final long literalToken = token; 279 280 switch (type) { 281 case STRING: 282 return getStringLiteral(); 283 case ESCSTRING: 284 case DECIMAL: 285 case FLOATING: 286 return getLiteral(); 287 case FALSE: 288 next(); 289 return LiteralNode.newInstance(literalToken, finish, false); 290 case TRUE: 291 next(); 292 return LiteralNode.newInstance(literalToken, finish, true); 293 case NULL: 294 next(); 295 return LiteralNode.newInstance(literalToken, finish); 296 case LBRACKET: 297 return arrayLiteral(); 298 case LBRACE: 299 return objectLiteral(); 300 /* 301 * A.8.1 JSON Lexical Grammar 302 * 303 * JSONNumber :: See 15.12.1.1 304 * -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt 305 */ 306 case SUB: 307 next(); 308 309 final long realToken = token; 310 final Object value = getValue(); 311 312 if (value instanceof Number) { 313 next(); 314 return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value)); 315 } 316 317 throw error(AbstractParser.message("expected", "number", type.getNameOrType())); 318 default: 319 break; 320 } 321 322 throw error(AbstractParser.message("expected", "json literal", type.getNameOrType())); 323 } 324 325 /** 326 * Parse an array literal from the token stream 327 * @return the array literal as a Node 328 */ 329 private LiteralNode<Expression[]> arrayLiteral() { 330 // Unlike JavaScript array literals, elison is not permitted in JSON. 331 332 // Capture LBRACKET token. 333 final long arrayToken = token; 334 // LBRACKET tested in caller. 335 next(); 336 337 LiteralNode<Expression[]> result = null; 338 // Prepare to accummulating elements. 339 final List<Expression> elements = new ArrayList<>(); 340 341 loop: 342 while (true) { 343 switch (type) { 344 case RBRACKET: 345 next(); 346 result = LiteralNode.newInstance(arrayToken, finish, elements); 347 break loop; 348 349 case COMMARIGHT: 350 next(); 351 // check for trailing comma - not allowed in JSON 352 if (type == RBRACKET) { 353 throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType())); 354 } 355 break; 356 357 default: 358 // Add expression element. 359 elements.add(jsonLiteral()); 360 // Comma between array elements is mandatory in JSON. 361 if (type != COMMARIGHT && type != RBRACKET) { 362 throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType())); 363 } 364 break; 365 } 366 } 367 368 return result; 369 } 370 371 /** 372 * Parse an object literal from the token stream 373 * @return the object literal as a Node 374 */ 375 private ObjectNode objectLiteral() { 376 // Capture LBRACE token. 377 final long objectToken = token; 378 // LBRACE tested in caller. 379 next(); 380 381 // Prepare to accumulate elements. 382 final List<PropertyNode> elements = new ArrayList<>(); 383 384 // Create a block for the object literal. 385 loop: 386 while (true) { 387 switch (type) { 388 case RBRACE: 389 next(); 390 break loop; 391 392 case COMMARIGHT: 393 next(); 394 // check for trailing comma - not allowed in JSON 395 if (type == RBRACE) { 396 throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType())); 397 } 398 break; 399 400 default: 401 // Get and add the next property. 402 final PropertyNode property = propertyAssignment(); 403 elements.add(property); 404 405 // Comma between property assigments is mandatory in JSON. 406 if (type != RBRACE && type != COMMARIGHT) { 407 throw error(AbstractParser.message("expected", ", or }", type.getNameOrType())); 408 } 409 break; 410 } 411 } 412 413 // Construct new object literal. 414 return new ObjectNode(objectToken, finish, elements); 415 } 416 417 /** 418 * Parse a property assignment from the token stream 419 * @return the property assignment as a Node 420 */ 421 private PropertyNode propertyAssignment() { 422 // Capture firstToken. 423 final long propertyToken = token; 424 LiteralNode<?> name = null; 425 426 if (type == STRING) { 427 name = getStringLiteral(); 428 } else if (type == ESCSTRING) { 429 name = getLiteral(); 430 } 431 432 if (name != null) { 433 expect(COLON); 434 final Expression value = jsonLiteral(); 435 return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null); 436 } 437 438 // Raise an error. 439 throw error(AbstractParser.message("expected", "string", type.getNameOrType())); 440 } 441 442 }