1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import static jdk.nashorn.internal.parser.TokenType.COLON; 29 import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT; 30 import static jdk.nashorn.internal.parser.TokenType.EOF; 31 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 32 import static jdk.nashorn.internal.parser.TokenType.RBRACE; 33 import static jdk.nashorn.internal.parser.TokenType.RBRACKET; 34 import static jdk.nashorn.internal.parser.TokenType.STRING; 35 36 import java.util.ArrayList; 37 import java.util.List; 38 import jdk.nashorn.internal.ir.LiteralNode; 39 import jdk.nashorn.internal.ir.Node; 40 import jdk.nashorn.internal.ir.ObjectNode; 41 import jdk.nashorn.internal.ir.PropertyNode; 42 import jdk.nashorn.internal.ir.UnaryNode; 43 import jdk.nashorn.internal.runtime.ErrorManager; 44 import jdk.nashorn.internal.runtime.Source; 45 46 /** 47 * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser. 48 * 49 * See: 15.12.1.2 The JSON Syntactic Grammar 50 */ 51 public class JSONParser extends AbstractParser { 52 53 /** 54 * Constructor 55 * @param source the source 56 * @param errors the error manager 57 */ 58 public JSONParser(final Source source, final ErrorManager errors) { 59 super(source, errors, false); 60 } 61 62 /** 63 * Implementation of the Quote(value) operation as defined in the ECMA script spec 64 * It wraps a String value in double quotes and escapes characters within in 65 * 66 * @param value string to quote 67 * 68 * @return quoted and escaped string 69 */ 70 public static String quote(final String value) { 71 72 final StringBuilder product = new StringBuilder(); 73 74 product.append("\""); 75 76 for (final char ch : value.toCharArray()) { 77 // TODO: should use a table? 78 switch (ch) { 79 case '\\': 80 product.append("\\\\"); 81 break; 82 case '"': 83 product.append("\\\""); 84 break; 85 case '\b': 86 product.append("\\b"); 87 break; 88 case '\f': 89 product.append("\\f"); 90 break; 91 case '\n': 92 product.append("\\n"); 93 break; 94 case '\r': 95 product.append("\\r"); 96 break; 97 case '\t': 98 product.append("\\t"); 99 break; 100 default: 101 if (ch < ' ') { 102 product.append(Lexer.unicodeEscape(ch)); 103 break; 104 } 105 106 product.append(ch); 107 break; 108 } 109 } 110 111 product.append("\""); 112 113 return product.toString(); 114 } 115 116 /** 117 * Public parsed method - start lexing a new token stream for 118 * a JSON script 119 * 120 * @return the JSON literal 121 */ 122 public Node parse() { 123 stream = new TokenStream(); 124 125 lexer = new Lexer(source, stream) { 126 127 @Override 128 protected boolean skipComments() { 129 return false; 130 } 131 132 @Override 133 protected boolean isStringDelimiter(final char ch) { 134 return ch == '\"'; 135 } 136 137 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace 138 @Override 139 protected boolean isWhitespace(final char ch) { 140 return Lexer.isJsonWhitespace(ch); 141 } 142 143 @Override 144 protected boolean isEOL(final char ch) { 145 return Lexer.isJsonEOL(ch); 146 } 147 148 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber 149 @Override 150 protected void scanNumber() { 151 // Record beginning of number. 152 final int start = position; 153 // Assume value is a decimal. 154 TokenType type = TokenType.DECIMAL; 155 156 // floating point can't start with a "." with no leading digit before 157 if (ch0 == '.') { 158 error(Lexer.message("json.invalid.number"), STRING, position, limit); 159 } 160 161 // First digit of number. 162 int digit = convertDigit(ch0, 10); 163 164 // If number begins with 0x. 165 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) { 166 // Skip over 0xN. 167 skip(3); 168 // Skip over remaining digits. 169 while (convertDigit(ch0, 16) != -1) { 170 skip(1); 171 } 172 error(Lexer.message("json.no.hexadecimal.number"), STRING, position, limit); 173 type = TokenType.HEXADECIMAL; 174 } else { 175 // Check for possible octal constant. 176 boolean octal = digit == 0; 177 // Skip first digit if not leading '.'. 178 if (digit != -1) { 179 skip(1); 180 } 181 182 // Skip remaining digits. 183 while (convertDigit(ch0, 10) != -1) { 184 // Skip digit. 185 skip(1); 186 } 187 188 if (octal && position - start > 1) { 189 error(Lexer.message("json.no.octal.number"), STRING, position, limit); 190 type = TokenType.OCTAL; 191 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 192 // Must be a double. 193 if (ch0 == '.') { 194 // Skip period. 195 skip(1); 196 boolean mantissa = false; 197 // Skip mantissa. 198 while (convertDigit(ch0, 10) != -1) { 199 skip(1); 200 mantissa = true; 201 } 202 203 if (! mantissa) { 204 // no digit after "." 205 error(Lexer.message("json.invalid.number"), STRING, position, limit); 206 } 207 } 208 209 // Detect exponent. 210 if (ch0 == 'E' || ch0 == 'e') { 211 // Skip E. 212 skip(1); 213 // Detect and skip exponent sign. 214 if (ch0 == '+' || ch0 == '-') { 215 skip(1); 216 } 217 boolean exponent = false; 218 // Skip exponent. 219 while (convertDigit(ch0, 10) != -1) { 220 exponent = true; 221 skip(1); 222 } 223 224 if (! exponent) { 225 // no digit after "E" 226 error(Lexer.message("json.invalid.number"), STRING, position, limit); 227 } 228 } 229 230 type = TokenType.FLOATING; 231 } 232 } 233 234 // Add number token. 235 add(type, start); 236 } 237 238 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter 239 @Override 240 protected boolean isEscapeCharacter(final char ch) { 241 switch (ch) { 242 case '"': 243 case '/': 244 case '\\': 245 case 'b': 246 case 'f': 247 case 'n': 248 case 'r': 249 case 't': 250 // could be unicode escape 251 case 'u': 252 return true; 253 default: 254 return false; 255 } 256 } 257 }; 258 259 k = -1; 260 261 next(); 262 263 final Node resultNode = jsonLiteral(); 264 expect(EOF); 265 266 return resultNode; 267 } 268 269 @SuppressWarnings("fallthrough") 270 private LiteralNode<?> getStringLiteral() { 271 final LiteralNode<?> literal = getLiteral(); 272 final String str = (String)literal.getValue(); 273 274 for (int i = 0; i < str.length(); i++) { 275 final char ch = str.charAt(i); 276 switch (ch) { 277 default: 278 if (ch > 0x001f) { 279 break; 280 } 281 case '"': 282 case '\\': 283 throw error(AbstractParser.message("unexpected.token", str)); 284 } 285 } 286 287 return literal; 288 } 289 290 /** 291 * Parse a JSON literal from the token stream 292 * @return the JSON literal as a Node 293 */ 294 private Node jsonLiteral() { 295 final long literalToken = token; 296 297 switch (type) { 298 case STRING: 299 return getStringLiteral(); 300 case ESCSTRING: 301 case DECIMAL: 302 case FLOATING: 303 return getLiteral(); 304 case FALSE: 305 next(); 306 return LiteralNode.newInstance(literalToken, finish, false); 307 case TRUE: 308 next(); 309 return LiteralNode.newInstance(literalToken, finish, true); 310 case NULL: 311 next(); 312 return LiteralNode.newInstance(literalToken, finish); 313 case LBRACKET: 314 return arrayLiteral(); 315 case LBRACE: 316 return objectLiteral(); 317 /* 318 * A.8.1 JSON Lexical Grammar 319 * 320 * JSONNumber :: See 15.12.1.1 321 * -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt 322 */ 323 case SUB: 324 next(); 325 326 final long realToken = token; 327 final Object value = getValue(); 328 329 if (value instanceof Number) { 330 next(); 331 return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value)); 332 } 333 334 throw error(AbstractParser.message("expected", "number", type.getNameOrType())); 335 default: 336 break; 337 } 338 339 throw error(AbstractParser.message("expected", "json literal", type.getNameOrType())); 340 } 341 342 /** 343 * Parse an array literal from the token stream 344 * @return the array literal as a Node 345 */ 346 private Node arrayLiteral() { 347 // Unlike JavaScript array literals, elison is not permitted in JSON. 348 349 // Capture LBRACKET token. 350 final long arrayToken = token; 351 // LBRACKET tested in caller. 352 next(); 353 354 Node result = null; 355 // Prepare to accummulating elements. 356 final List<Node> elements = new ArrayList<>(); 357 358 loop: 359 while (true) { 360 switch (type) { 361 case RBRACKET: 362 next(); 363 result = LiteralNode.newInstance(arrayToken, finish, elements); 364 break loop; 365 366 case COMMARIGHT: 367 next(); 368 break; 369 370 default: 371 // Add expression element. 372 elements.add(jsonLiteral()); 373 // Comma between array elements is mandatory in JSON. 374 if (type != COMMARIGHT && type != RBRACKET) { 375 throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType())); 376 } 377 break; 378 } 379 } 380 381 return result; 382 } 383 384 /** 385 * Parse an object literal from the token stream 386 * @return the object literal as a Node 387 */ 388 private Node objectLiteral() { 389 // Capture LBRACE token. 390 final long objectToken = token; 391 // LBRACE tested in caller. 392 next(); 393 394 // Prepare to accumulate elements. 395 final List<PropertyNode> elements = new ArrayList<>(); 396 397 // Create a block for the object literal. 398 loop: 399 while (true) { 400 switch (type) { 401 case RBRACE: 402 next(); 403 break loop; 404 405 case COMMARIGHT: 406 next(); 407 break; 408 409 default: 410 // Get and add the next property. 411 final PropertyNode property = propertyAssignment(); 412 elements.add(property); 413 414 // Comma between property assigments is mandatory in JSON. 415 if (type != RBRACE && type != COMMARIGHT) { 416 throw error(AbstractParser.message("expected", ", or }", type.getNameOrType())); 417 } 418 break; 419 } 420 } 421 422 // Construct new object literal. 423 return new ObjectNode(objectToken, finish, elements); 424 } 425 426 /** 427 * Parse a property assignment from the token stream 428 * @return the property assignment as a Node 429 */ 430 private PropertyNode propertyAssignment() { 431 // Capture firstToken. 432 final long propertyToken = token; 433 LiteralNode<?> name = null; 434 435 if (type == STRING) { 436 name = getStringLiteral(); 437 } else if (type == ESCSTRING) { 438 name = getLiteral(); 439 } 440 441 if (name != null) { 442 expect(COLON); 443 final Node value = jsonLiteral(); 444 return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null); 445 } 446 447 // Raise an error. 448 throw error(AbstractParser.message("expected", "string", type.getNameOrType())); 449 } 450 451 }