1 /* 2 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.internal.util.xml.impl; 27 28 import java.io.IOException; 29 import java.io.InputStream; 30 import jdk.internal.org.xml.sax.ContentHandler; 31 import jdk.internal.org.xml.sax.DTDHandler; 32 import jdk.internal.org.xml.sax.EntityResolver; 33 import jdk.internal.org.xml.sax.ErrorHandler; 34 import jdk.internal.org.xml.sax.InputSource; 35 import jdk.internal.org.xml.sax.Locator; 36 import jdk.internal.org.xml.sax.SAXException; 37 import jdk.internal.org.xml.sax.SAXParseException; 38 import jdk.internal.org.xml.sax.XMLReader; 39 import jdk.internal.org.xml.sax.helpers.DefaultHandler; 40 41 /** 42 * XML non-validating push parser. 43 * 44 * This non-validating parser conforms to <a href="http://www.w3.org/TR/REC-xml" 45 * >Extensible Markup Language (XML) 1.0</a> and <a 46 * href="http://www.w3.org/TR/REC-xml-names" >"Namespaces in XML"</a> 47 * specifications. The API supported by the parser are <a 48 * href="http://java.sun.com/aboutJava/communityprocess/final/jsr030/index.html">CLDC 49 * 1.0</a> and <a href="http://www.jcp.org/en/jsr/detail?id=280">JSR-280</a>, a 50 * JavaME subset of <a href="http://java.sun.com/xml/jaxp/index.html">JAXP</a> 51 * and <a href="http://www.saxproject.org/">SAX2</a>. 52 * 53 * @see org.xml.sax.XMLReader 54 */ 55 56 final class ParserSAX 57 extends Parser implements XMLReader, Locator 58 { 59 public static final String FEATURE_NS = 60 "http://xml.org/sax/features/namespaces"; 61 public static final String FEATURE_PREF = 62 "http://xml.org/sax/features/namespace-prefixes"; 63 // SAX feature flags 64 private boolean mFNamespaces; 65 private boolean mFPrefixes; 66 // SAX handlers 67 private DefaultHandler mHand; // the default handler 68 private ContentHandler mHandCont; // the content handler 69 private DTDHandler mHandDtd; // the DTD handler 70 private ErrorHandler mHandErr; // the error handler 71 private EntityResolver mHandEnt; // the entity resolver 72 73 /** 74 * Constructor. 75 */ 76 public ParserSAX() { 77 super(); 78 79 // SAX feature defaut values 80 mFNamespaces = true; 81 mFPrefixes = false; 82 83 // Default handler which will be used in case the application 84 // do not set one of handlers. 85 mHand = new DefaultHandler(); 86 mHandCont = mHand; 87 mHandDtd = mHand; 88 mHandErr = mHand; 89 mHandEnt = mHand; 90 } 91 92 /** 93 * Return the current content handler. 94 * 95 * @return The current content handler, or null if none has been registered. 96 * @see #setContentHandler 97 */ 98 public ContentHandler getContentHandler() { 99 return (mHandCont != mHand) ? mHandCont : null; 100 } 101 102 /** 103 * Allow an application to register a content event handler. 104 * 105 * <p>If the application does not register a content handler, all content 106 * events reported by the SAX parser will be silently ignored.</p> 107 * 108 * <p>Applications may register a new or different handler in the middle of 109 * a parse, and the SAX parser must begin using the new handler 110 * immediately.</p> 111 * 112 * @param handler The content handler. 113 * @exception java.lang.NullPointerException If the handler argument is 114 * null. 115 * @see #getContentHandler 116 */ 117 public void setContentHandler(ContentHandler handler) { 118 if (handler == null) { 119 throw new NullPointerException(); 120 } 121 mHandCont = handler; 122 } 123 124 /** 125 * Return the current DTD handler. 126 * 127 * @return The current DTD handler, or null if none has been registered. 128 * @see #setDTDHandler 129 */ 130 public DTDHandler getDTDHandler() { 131 return (mHandDtd != mHand) ? mHandDtd : null; 132 } 133 134 /** 135 * Allow an application to register a DTD event handler. 136 * 137 * <p>If the application does not register a DTD handler, all DTD events 138 * reported by the SAX parser will be silently ignored.</p> 139 * 140 * <p>Applications may register a new or different handler in the middle of 141 * a parse, and the SAX parser must begin using the new handler 142 * immediately.</p> 143 * 144 * @param handler The DTD handler. 145 * @exception java.lang.NullPointerException If the handler argument is 146 * null. 147 * @see #getDTDHandler 148 */ 149 public void setDTDHandler(DTDHandler handler) { 150 if (handler == null) { 151 throw new NullPointerException(); 152 } 153 mHandDtd = handler; 154 } 155 156 /** 157 * Return the current error handler. 158 * 159 * @return The current error handler, or null if none has been registered. 160 * @see #setErrorHandler 161 */ 162 public ErrorHandler getErrorHandler() { 163 return (mHandErr != mHand) ? mHandErr : null; 164 } 165 166 /** 167 * Allow an application to register an error event handler. 168 * 169 * <p>If the application does not register an error handler, all error 170 * events reported by the SAX parser will be silently ignored; however, 171 * normal processing may not continue. It is highly recommended that all SAX 172 * applications implement an error handler to avoid unexpected bugs.</p> 173 * 174 * <p>Applications may register a new or different handler in the middle of 175 * a parse, and the SAX parser must begin using the new handler 176 * immediately.</p> 177 * 178 * @param handler The error handler. 179 * @exception java.lang.NullPointerException If the handler argument is 180 * null. 181 * @see #getErrorHandler 182 */ 183 public void setErrorHandler(ErrorHandler handler) { 184 if (handler == null) { 185 throw new NullPointerException(); 186 } 187 mHandErr = handler; 188 } 189 190 /** 191 * Return the current entity resolver. 192 * 193 * @return The current entity resolver, or null if none has been registered. 194 * @see #setEntityResolver 195 */ 196 public EntityResolver getEntityResolver() { 197 return (mHandEnt != mHand) ? mHandEnt : null; 198 } 199 200 /** 201 * Allow an application to register an entity resolver. 202 * 203 * <p>If the application does not register an entity resolver, the XMLReader 204 * will perform its own default resolution.</p> 205 * 206 * <p>Applications may register a new or different resolver in the middle of 207 * a parse, and the SAX parser must begin using the new resolver 208 * immediately.</p> 209 * 210 * @param resolver The entity resolver. 211 * @exception java.lang.NullPointerException If the resolver argument is 212 * null. 213 * @see #getEntityResolver 214 */ 215 public void setEntityResolver(EntityResolver resolver) { 216 if (resolver == null) { 217 throw new NullPointerException(); 218 } 219 mHandEnt = resolver; 220 } 221 222 /** 223 * Return the public identifier for the current document event. 224 * 225 * <p>The return value is the public identifier of the document entity or of 226 * the external parsed entity in which the markup triggering the event 227 * appears.</p> 228 * 229 * @return A string containing the public identifier, or null if none is 230 * available. 231 * 232 * @see #getSystemId 233 */ 234 public String getPublicId() { 235 return (mInp != null) ? mInp.pubid : null; 236 } 237 238 /** 239 * Return the system identifier for the current document event. 240 * 241 * <p>The return value is the system identifier of the document entity or of 242 * the external parsed entity in which the markup triggering the event 243 * appears.</p> 244 * 245 * <p>If the system identifier is a URL, the parser must resolve it fully 246 * before passing it to the application.</p> 247 * 248 * @return A string containing the system identifier, or null if none is 249 * available. 250 * 251 * @see #getPublicId 252 */ 253 public String getSystemId() { 254 return (mInp != null) ? mInp.sysid : null; 255 } 256 257 /** 258 * Return the line number where the current document event ends. 259 * 260 * @return Always returns -1 indicating the line number is not available. 261 * 262 * @see #getColumnNumber 263 */ 264 public int getLineNumber() { 265 return -1; 266 } 267 268 /** 269 * Return the column number where the current document event ends. 270 * 271 * @return Always returns -1 indicating the column number is not available. 272 * 273 * @see #getLineNumber 274 */ 275 public int getColumnNumber() { 276 return -1; 277 } 278 279 /** 280 * Parse an XML document from a system identifier (URI). 281 * 282 * <p>This method is a shortcut for the common case of reading a document 283 * from a system identifier. It is the exact equivalent of the 284 * following:</p> 285 * 286 * <pre> 287 * parse(new InputSource(systemId)); 288 * </pre> 289 * 290 * <p>If the system identifier is a URL, it must be fully resolved by the 291 * application before it is passed to the parser.</p> 292 * 293 * @param systemId The system identifier (URI). 294 * @exception org.xml.sax.SAXException Any SAX exception, possibly wrapping 295 * another exception. 296 * @exception java.io.IOException An IO exception from the parser, possibly 297 * from a byte stream or character stream supplied by the application. 298 * @see #parse(org.xml.sax.InputSource) 299 */ 300 public void parse(String systemId) throws IOException, SAXException { 301 parse(new InputSource(systemId)); 302 } 303 304 /** 305 * Parse an XML document. 306 * 307 * <p>The application can use this method to instruct the XML reader to 308 * begin parsing an XML document from any valid input source (a character 309 * stream, a byte stream, or a URI).</p> 310 * 311 * <p>Applications may not invoke this method while a parse is in progress 312 * (they should create a new XMLReader instead for each nested XML 313 * document). Once a parse is complete, an application may reuse the same 314 * XMLReader object, possibly with a different input source.</p> 315 * 316 * <p>During the parse, the XMLReader will provide information about the XML 317 * document through the registered event handlers.</p> 318 * 319 * <p>This method is synchronous: it will not return until parsing has 320 * ended. If a client application wants to terminate parsing early, it 321 * should throw an exception.</p> 322 * 323 * @param is The input source for the top-level of the XML document. 324 * @exception org.xml.sax.SAXException Any SAX exception, possibly wrapping 325 * another exception. 326 * @exception java.io.IOException An IO exception from the parser, possibly 327 * from a byte stream or character stream supplied by the application. 328 * @see org.xml.sax.InputSource 329 * @see #parse(java.lang.String) 330 * @see #setEntityResolver 331 * @see #setDTDHandler 332 * @see #setContentHandler 333 * @see #setErrorHandler 334 */ 335 public void parse(InputSource is) throws IOException, SAXException { 336 if (is == null) { 337 throw new IllegalArgumentException(""); 338 } 339 // Set up the document 340 mInp = new Input(BUFFSIZE_READER); 341 mPh = PH_BEFORE_DOC; // before parsing 342 try { 343 setinp(is); 344 } catch (SAXException saxe) { 345 throw saxe; 346 } catch (IOException ioe) { 347 throw ioe; 348 } catch (RuntimeException rte) { 349 throw rte; 350 } catch (Exception e) { 351 panic(e.toString()); 352 } 353 parse(); 354 } 355 356 /** 357 * Parse the content of the given {@link java.io.InputStream} instance as 358 * XML using the specified {@link org.xml.sax.helpers.DefaultHandler}. 359 * 360 * @param src InputStream containing the content to be parsed. 361 * @param handler The SAX DefaultHandler to use. 362 * @exception IOException If any IO errors occur. 363 * @exception IllegalArgumentException If the given InputStream or handler 364 * is null. 365 * @exception SAXException If the underlying parser throws a SAXException 366 * while parsing. 367 * @see org.xml.sax.helpers.DefaultHandler 368 */ 369 public void parse(InputStream src, DefaultHandler handler) 370 throws SAXException, IOException { 371 if ((src == null) || (handler == null)) { 372 throw new IllegalArgumentException(""); 373 } 374 parse(new InputSource(src), handler); 375 } 376 377 /** 378 * Parse the content given {@link org.xml.sax.InputSource} as XML using the 379 * specified {@link org.xml.sax.helpers.DefaultHandler}. 380 * 381 * @param is The InputSource containing the content to be parsed. 382 * @param handler The SAX DefaultHandler to use. 383 * @exception IOException If any IO errors occur. 384 * @exception IllegalArgumentException If the InputSource or handler is 385 * null. 386 * @exception SAXException If the underlying parser throws a SAXException 387 * while parsing. 388 * @see org.xml.sax.helpers.DefaultHandler 389 */ 390 public void parse(InputSource is, DefaultHandler handler) 391 throws SAXException, IOException 392 { 393 if ((is == null) || (handler == null)) { 394 throw new IllegalArgumentException(""); 395 } 396 // Set up the handler 397 mHandCont = handler; 398 mHandDtd = handler; 399 mHandErr = handler; 400 mHandEnt = handler; 401 // Set up the document 402 mInp = new Input(BUFFSIZE_READER); 403 mPh = PH_BEFORE_DOC; // before parsing 404 try { 405 setinp(is); 406 } catch (SAXException | IOException | RuntimeException saxe) { 407 throw saxe; 408 } catch (Exception e) { 409 panic(e.toString()); 410 } 411 parse(); 412 } 413 414 /** 415 * Parse the XML document content using specified handlers and an input 416 * source. 417 * 418 * @exception IOException If any IO errors occur. 419 * @exception SAXException If the underlying parser throws a SAXException 420 * while parsing. 421 */ 422 @SuppressWarnings("fallthrough") 423 private void parse() throws SAXException, IOException { 424 init(); 425 try { 426 mHandCont.setDocumentLocator(this); 427 mHandCont.startDocument(); 428 429 if (mPh != PH_MISC_DTD) { 430 mPh = PH_MISC_DTD; // misc before DTD 431 } 432 int evt = EV_NULL; 433 // XML document prolog 434 do { 435 wsskip(); 436 switch (evt = step()) { 437 case EV_ELM: 438 case EV_ELMS: 439 mPh = PH_DOCELM; 440 break; 441 442 case EV_COMM: 443 case EV_PI: 444 break; 445 446 case EV_DTD: 447 if (mPh >= PH_DTD_MISC) { 448 panic(FAULT); 449 } 450 mPh = PH_DTD_MISC; // misc after DTD 451 break; 452 453 default: 454 panic(FAULT); 455 } 456 } while (mPh < PH_DOCELM); // misc before DTD 457 // XML document starting with document's element 458 do { 459 switch (evt) { 460 case EV_ELM: 461 case EV_ELMS: 462 // Report the element 463 if (mIsNSAware == true) { 464 mHandCont.startElement( 465 mElm.value, 466 mElm.name, 467 "", 468 mAttrs); 469 } else { 470 mHandCont.startElement( 471 "", 472 "", 473 mElm.name, 474 mAttrs); 475 } 476 if (evt == EV_ELMS) { 477 evt = step(); 478 break; 479 } 480 481 case EV_ELME: 482 // Report the end of element 483 if (mIsNSAware == true) { 484 mHandCont.endElement(mElm.value, mElm.name, ""); 485 } else { 486 mHandCont.endElement("", "", mElm.name); 487 } 488 // Restore the top of the prefix stack 489 while (mPref.list == mElm) { 490 mHandCont.endPrefixMapping(mPref.name); 491 mPref = del(mPref); 492 } 493 // Remove the top element tag 494 mElm = del(mElm); 495 if (mElm == null) { 496 mPh = PH_DOCELM_MISC; 497 } else { 498 evt = step(); 499 } 500 break; 501 502 case EV_TEXT: 503 case EV_WSPC: 504 case EV_CDAT: 505 case EV_COMM: 506 case EV_PI: 507 case EV_ENT: 508 evt = step(); 509 break; 510 511 default: 512 panic(FAULT); 513 } 514 } while (mPh == PH_DOCELM); 515 // Misc after document's element 516 do { 517 if (wsskip() == EOS) { 518 break; 519 } 520 521 switch (step()) { 522 case EV_COMM: 523 case EV_PI: 524 break; 525 526 default: 527 panic(FAULT); 528 } 529 } while (mPh == PH_DOCELM_MISC); 530 mPh = PH_AFTER_DOC; // parsing is completed 531 532 } catch (SAXException saxe) { 533 throw saxe; 534 } catch (IOException ioe) { 535 throw ioe; 536 } catch (RuntimeException rte) { 537 throw rte; 538 } catch (Exception e) { 539 panic(e.toString()); 540 } finally { 541 mHandCont.endDocument(); 542 cleanup(); 543 } 544 } 545 546 /** 547 * Reports document type. 548 * 549 * @param name The name of the entity. 550 * @param pubid The public identifier of the entity or <code>null</code>. 551 * @param sysid The system identifier of the entity or <code>null</code>. 552 */ 553 protected void docType(String name, String pubid, String sysid) throws SAXException { 554 mHandDtd.notationDecl(name, pubid, sysid); 555 } 556 557 /** 558 * Reports a comment. 559 * 560 * @param text The comment text starting from first charcater. 561 * @param length The number of characters in comment. 562 */ 563 protected void comm(char[] text, int length) { 564 } 565 566 /** 567 * Reports a processing instruction. 568 * 569 * @param target The processing instruction target name. 570 * @param body The processing instruction body text. 571 */ 572 protected void pi(String target, String body) throws SAXException { 573 mHandCont.processingInstruction(target, body); 574 } 575 576 /** 577 * Reports new namespace prefix. The Namespace prefix ( 578 * <code>mPref.name</code>) being declared and the Namespace URI ( 579 * <code>mPref.value</code>) the prefix is mapped to. An empty string is 580 * used for the default element namespace, which has no prefix. 581 */ 582 protected void newPrefix() throws SAXException { 583 mHandCont.startPrefixMapping(mPref.name, mPref.value); 584 } 585 586 /** 587 * Reports skipped entity name. 588 * 589 * @param name The entity name. 590 */ 591 protected void skippedEnt(String name) throws SAXException { 592 mHandCont.skippedEntity(name); 593 } 594 595 /** 596 * Returns an 597 * <code>InputSource</code> for specified entity or 598 * <code>null</code>. 599 * 600 * @param name The name of the entity. 601 * @param pubid The public identifier of the entity. 602 * @param sysid The system identifier of the entity. 603 */ 604 protected InputSource resolveEnt(String name, String pubid, String sysid) 605 throws SAXException, IOException 606 { 607 return mHandEnt.resolveEntity(pubid, sysid); 608 } 609 610 /** 611 * Reports notation declaration. 612 * 613 * @param name The notation's name. 614 * @param pubid The notation's public identifier, or null if none was given. 615 * @param sysid The notation's system identifier, or null if none was given. 616 */ 617 protected void notDecl(String name, String pubid, String sysid) 618 throws SAXException 619 { 620 mHandDtd.notationDecl(name, pubid, sysid); 621 } 622 623 /** 624 * Reports unparsed entity name. 625 * 626 * @param name The unparsed entity's name. 627 * @param pubid The entity's public identifier, or null if none was given. 628 * @param sysid The entity's system identifier. 629 * @param notation The name of the associated notation. 630 */ 631 protected void unparsedEntDecl(String name, String pubid, String sysid, String notation) 632 throws SAXException 633 { 634 mHandDtd.unparsedEntityDecl(name, pubid, sysid, notation); 635 } 636 637 /** 638 * Notifies the handler about fatal parsing error. 639 * 640 * @param msg The problem description message. 641 */ 642 protected void panic(String msg) throws SAXException { 643 SAXParseException spe = new SAXParseException(msg, this); 644 mHandErr.fatalError(spe); 645 throw spe; // [#1.2] fatal error definition 646 } 647 648 /** 649 * Reports characters and empties the parser's buffer. This method is called 650 * only if parser is going to return control to the main loop. This means 651 * that this method may use parser buffer to report white space without 652 * copying characters to temporary buffer. 653 */ 654 protected void bflash() throws SAXException { 655 if (mBuffIdx >= 0) { 656 // Textual data has been read 657 mHandCont.characters(mBuff, 0, (mBuffIdx + 1)); 658 mBuffIdx = -1; 659 } 660 } 661 662 /** 663 * Reports white space characters and empties the parser's buffer. This 664 * method is called only if parser is going to return control to the main 665 * loop. This means that this method may use parser buffer to report white 666 * space without copying characters to temporary buffer. 667 */ 668 protected void bflash_ws() throws SAXException { 669 if (mBuffIdx >= 0) { 670 // BUG: With additional info from DTD and xml:space attr [#2.10] 671 // the following call can be supported: 672 // mHandCont.ignorableWhitespace(mBuff, 0, (mBuffIdx + 1)); 673 674 // Textual data has been read 675 mHandCont.characters(mBuff, 0, (mBuffIdx + 1)); 676 mBuffIdx = -1; 677 } 678 } 679 680 public boolean getFeature(String name) { 681 throw new UnsupportedOperationException("Not supported yet."); 682 } 683 684 public void setFeature(String name, boolean value) { 685 throw new UnsupportedOperationException("Not supported yet."); 686 } 687 688 public Object getProperty(String name) { 689 throw new UnsupportedOperationException("Not supported yet."); 690 } 691 692 public void setProperty(String name, Object value) { 693 throw new UnsupportedOperationException("Not supported yet."); 694 } 695 }