1 /* 2 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.internal.util.xml.impl; 27 28 import java.io.IOException; 29 import java.io.InputStream; 30 import jdk.internal.org.xml.sax.ContentHandler; 31 import jdk.internal.org.xml.sax.DTDHandler; 32 import jdk.internal.org.xml.sax.EntityResolver; 33 import jdk.internal.org.xml.sax.ErrorHandler; 34 import jdk.internal.org.xml.sax.InputSource; 35 import jdk.internal.org.xml.sax.Locator; 36 import jdk.internal.org.xml.sax.SAXException; 37 import jdk.internal.org.xml.sax.SAXNotRecognizedException; 38 import jdk.internal.org.xml.sax.SAXNotSupportedException; 39 import jdk.internal.org.xml.sax.SAXParseException; 40 import jdk.internal.org.xml.sax.XMLReader; 41 import jdk.internal.org.xml.sax.helpers.DefaultHandler; 42 43 /** 44 * XML non-validating push parser. 45 * 46 * This non-validating parser conforms to <a href="http://www.w3.org/TR/REC-xml" 47 * >Extensible Markup Language (XML) 1.0</a> and <a 48 * href="http://www.w3.org/TR/REC-xml-names" >"Namespaces in XML"</a> 49 * specifications. The API supported by the parser are <a 50 * href="http://java.sun.com/aboutJava/communityprocess/final/jsr030/index.html">CLDC 51 * 1.0</a> and <a href="http://www.jcp.org/en/jsr/detail?id=280">JSR-280</a>, a 52 * JavaME subset of <a href="http://java.sun.com/xml/jaxp/index.html">JAXP</a> 53 * and <a href="http://www.saxproject.org/">SAX2</a>. 54 * 55 * @see org.xml.sax.XMLReader 56 */ 57 58 final class ParserSAX 59 extends Parser implements XMLReader, Locator 60 { 61 public final static String FEATURE_NS = 62 "http://xml.org/sax/features/namespaces"; 63 public final static String FEATURE_PREF = 64 "http://xml.org/sax/features/namespace-prefixes"; 65 // SAX feature flags 66 private boolean mFNamespaces; 67 private boolean mFPrefixes; 68 // SAX handlers 69 private DefaultHandler mHand; // the default handler 70 private ContentHandler mHandCont; // the content handler 71 private DTDHandler mHandDtd; // the DTD handler 72 private ErrorHandler mHandErr; // the error handler 73 private EntityResolver mHandEnt; // the entity resolver 74 75 /** 76 * Constructor. 77 */ 78 public ParserSAX() { 79 super(); 80 81 // SAX feature defaut values 82 mFNamespaces = true; 83 mFPrefixes = false; 84 85 // Default handler which will be used in case the application 86 // do not set one of handlers. 87 mHand = new DefaultHandler(); 88 mHandCont = mHand; 89 mHandDtd = mHand; 90 mHandErr = mHand; 91 mHandEnt = mHand; 92 } 93 94 /** 95 * Return the current content handler. 96 * 97 * @return The current content handler, or null if none has been registered. 98 * @see #setContentHandler 99 */ 100 public ContentHandler getContentHandler() { 101 return (mHandCont != mHand) ? mHandCont : null; 102 } 103 104 /** 105 * Allow an application to register a content event handler. 106 * 107 * <p>If the application does not register a content handler, all content 108 * events reported by the SAX parser will be silently ignored.</p> 109 * 110 * <p>Applications may register a new or different handler in the middle of 111 * a parse, and the SAX parser must begin using the new handler 112 * immediately.</p> 113 * 114 * @param handler The content handler. 115 * @exception java.lang.NullPointerException If the handler argument is 116 * null. 117 * @see #getContentHandler 118 */ 119 public void setContentHandler(ContentHandler handler) { 120 if (handler == null) { 121 throw new NullPointerException(); 122 } 123 mHandCont = handler; 124 } 125 126 /** 127 * Return the current DTD handler. 128 * 129 * @return The current DTD handler, or null if none has been registered. 130 * @see #setDTDHandler 131 */ 132 public DTDHandler getDTDHandler() { 133 return (mHandDtd != mHand) ? mHandDtd : null; 134 } 135 136 /** 137 * Allow an application to register a DTD event handler. 138 * 139 * <p>If the application does not register a DTD handler, all DTD events 140 * reported by the SAX parser will be silently ignored.</p> 141 * 142 * <p>Applications may register a new or different handler in the middle of 143 * a parse, and the SAX parser must begin using the new handler 144 * immediately.</p> 145 * 146 * @param handler The DTD handler. 147 * @exception java.lang.NullPointerException If the handler argument is 148 * null. 149 * @see #getDTDHandler 150 */ 151 public void setDTDHandler(DTDHandler handler) { 152 if (handler == null) { 153 throw new NullPointerException(); 154 } 155 mHandDtd = handler; 156 } 157 158 /** 159 * Return the current error handler. 160 * 161 * @return The current error handler, or null if none has been registered. 162 * @see #setErrorHandler 163 */ 164 public ErrorHandler getErrorHandler() { 165 return (mHandErr != mHand) ? mHandErr : null; 166 } 167 168 /** 169 * Allow an application to register an error event handler. 170 * 171 * <p>If the application does not register an error handler, all error 172 * events reported by the SAX parser will be silently ignored; however, 173 * normal processing may not continue. It is highly recommended that all SAX 174 * applications implement an error handler to avoid unexpected bugs.</p> 175 * 176 * <p>Applications may register a new or different handler in the middle of 177 * a parse, and the SAX parser must begin using the new handler 178 * immediately.</p> 179 * 180 * @param handler The error handler. 181 * @exception java.lang.NullPointerException If the handler argument is 182 * null. 183 * @see #getErrorHandler 184 */ 185 public void setErrorHandler(ErrorHandler handler) { 186 if (handler == null) { 187 throw new NullPointerException(); 188 } 189 mHandErr = handler; 190 } 191 192 /** 193 * Return the current entity resolver. 194 * 195 * @return The current entity resolver, or null if none has been registered. 196 * @see #setEntityResolver 197 */ 198 public EntityResolver getEntityResolver() { 199 return (mHandEnt != mHand) ? mHandEnt : null; 200 } 201 202 /** 203 * Allow an application to register an entity resolver. 204 * 205 * <p>If the application does not register an entity resolver, the XMLReader 206 * will perform its own default resolution.</p> 207 * 208 * <p>Applications may register a new or different resolver in the middle of 209 * a parse, and the SAX parser must begin using the new resolver 210 * immediately.</p> 211 * 212 * @param resolver The entity resolver. 213 * @exception java.lang.NullPointerException If the resolver argument is 214 * null. 215 * @see #getEntityResolver 216 */ 217 public void setEntityResolver(EntityResolver resolver) { 218 if (resolver == null) { 219 throw new NullPointerException(); 220 } 221 mHandEnt = resolver; 222 } 223 224 /** 225 * Return the public identifier for the current document event. 226 * 227 * <p>The return value is the public identifier of the document entity or of 228 * the external parsed entity in which the markup triggering the event 229 * appears.</p> 230 * 231 * @return A string containing the public identifier, or null if none is 232 * available. 233 * 234 * @see #getSystemId 235 */ 236 public String getPublicId() { 237 return (mInp != null) ? mInp.pubid : null; 238 } 239 240 /** 241 * Return the system identifier for the current document event. 242 * 243 * <p>The return value is the system identifier of the document entity or of 244 * the external parsed entity in which the markup triggering the event 245 * appears.</p> 246 * 247 * <p>If the system identifier is a URL, the parser must resolve it fully 248 * before passing it to the application.</p> 249 * 250 * @return A string containing the system identifier, or null if none is 251 * available. 252 * 253 * @see #getPublicId 254 */ 255 public String getSystemId() { 256 return (mInp != null) ? mInp.sysid : null; 257 } 258 259 /** 260 * Return the line number where the current document event ends. 261 * 262 * @return Always returns -1 indicating the line number is not available. 263 * 264 * @see #getColumnNumber 265 */ 266 public int getLineNumber() { 267 return -1; 268 } 269 270 /** 271 * Return the column number where the current document event ends. 272 * 273 * @return Always returns -1 indicating the column number is not available. 274 * 275 * @see #getLineNumber 276 */ 277 public int getColumnNumber() { 278 return -1; 279 } 280 281 /** 282 * Parse an XML document from a system identifier (URI). 283 * 284 * <p>This method is a shortcut for the common case of reading a document 285 * from a system identifier. It is the exact equivalent of the 286 * following:</p> 287 * 288 * <pre> 289 * parse(new InputSource(systemId)); 290 * </pre> 291 * 292 * <p>If the system identifier is a URL, it must be fully resolved by the 293 * application before it is passed to the parser.</p> 294 * 295 * @param systemId The system identifier (URI). 296 * @exception org.xml.sax.SAXException Any SAX exception, possibly wrapping 297 * another exception. 298 * @exception java.io.IOException An IO exception from the parser, possibly 299 * from a byte stream or character stream supplied by the application. 300 * @see #parse(org.xml.sax.InputSource) 301 */ 302 public void parse(String systemId) throws IOException, SAXException { 303 parse(new InputSource(systemId)); 304 } 305 306 /** 307 * Parse an XML document. 308 * 309 * <p>The application can use this method to instruct the XML reader to 310 * begin parsing an XML document from any valid input source (a character 311 * stream, a byte stream, or a URI).</p> 312 * 313 * <p>Applications may not invoke this method while a parse is in progress 314 * (they should create a new XMLReader instead for each nested XML 315 * document). Once a parse is complete, an application may reuse the same 316 * XMLReader object, possibly with a different input source.</p> 317 * 318 * <p>During the parse, the XMLReader will provide information about the XML 319 * document through the registered event handlers.</p> 320 * 321 * <p>This method is synchronous: it will not return until parsing has 322 * ended. If a client application wants to terminate parsing early, it 323 * should throw an exception.</p> 324 * 325 * @param is The input source for the top-level of the XML document. 326 * @exception org.xml.sax.SAXException Any SAX exception, possibly wrapping 327 * another exception. 328 * @exception java.io.IOException An IO exception from the parser, possibly 329 * from a byte stream or character stream supplied by the application. 330 * @see org.xml.sax.InputSource 331 * @see #parse(java.lang.String) 332 * @see #setEntityResolver 333 * @see #setDTDHandler 334 * @see #setContentHandler 335 * @see #setErrorHandler 336 */ 337 public void parse(InputSource is) throws IOException, SAXException { 338 if (is == null) { 339 throw new IllegalArgumentException(""); 340 } 341 // Set up the document 342 mInp = new Input(BUFFSIZE_READER); 343 mPh = PH_BEFORE_DOC; // before parsing 344 try { 345 setinp(is); 346 } catch (SAXException saxe) { 347 throw saxe; 348 } catch (IOException ioe) { 349 throw ioe; 350 } catch (RuntimeException rte) { 351 throw rte; 352 } catch (Exception e) { 353 panic(e.toString()); 354 } 355 parse(); 356 } 357 358 /** 359 * Parse the content of the given {@link java.io.InputStream} instance as 360 * XML using the specified {@link org.xml.sax.helpers.DefaultHandler}. 361 * 362 * @param src InputStream containing the content to be parsed. 363 * @param handler The SAX DefaultHandler to use. 364 * @exception IOException If any IO errors occur. 365 * @exception IllegalArgumentException If the given InputStream or handler 366 * is null. 367 * @exception SAXException If the underlying parser throws a SAXException 368 * while parsing. 369 * @see org.xml.sax.helpers.DefaultHandler 370 */ 371 public void parse(InputStream src, DefaultHandler handler) 372 throws SAXException, IOException { 373 if ((src == null) || (handler == null)) { 374 throw new IllegalArgumentException(""); 375 } 376 parse(new InputSource(src), handler); 377 } 378 379 /** 380 * Parse the content given {@link org.xml.sax.InputSource} as XML using the 381 * specified {@link org.xml.sax.helpers.DefaultHandler}. 382 * 383 * @param is The InputSource containing the content to be parsed. 384 * @param handler The SAX DefaultHandler to use. 385 * @exception IOException If any IO errors occur. 386 * @exception IllegalArgumentException If the InputSource or handler is 387 * null. 388 * @exception SAXException If the underlying parser throws a SAXException 389 * while parsing. 390 * @see org.xml.sax.helpers.DefaultHandler 391 */ 392 public void parse(InputSource is, DefaultHandler handler) 393 throws SAXException, IOException 394 { 395 if ((is == null) || (handler == null)) { 396 throw new IllegalArgumentException(""); 397 } 398 // Set up the handler 399 mHandCont = handler; 400 mHandDtd = handler; 401 mHandErr = handler; 402 mHandEnt = handler; 403 // Set up the document 404 mInp = new Input(BUFFSIZE_READER); 405 mPh = PH_BEFORE_DOC; // before parsing 406 try { 407 setinp(is); 408 } catch (SAXException | IOException | RuntimeException saxe) { 409 throw saxe; 410 } catch (Exception e) { 411 panic(e.toString()); 412 } 413 parse(); 414 } 415 416 /** 417 * Parse the XML document content using specified handlers and an input 418 * source. 419 * 420 * @exception IOException If any IO errors occur. 421 * @exception SAXException If the underlying parser throws a SAXException 422 * while parsing. 423 */ 424 private void parse() throws SAXException, IOException { 425 init(); 426 try { 427 mHandCont.setDocumentLocator(this); 428 mHandCont.startDocument(); 429 430 if (mPh != PH_MISC_DTD) { 431 mPh = PH_MISC_DTD; // misc before DTD 432 } 433 int evt = EV_NULL; 434 // XML document prolog 435 do { 436 wsskip(); 437 switch (evt = step()) { 438 case EV_ELM: 439 case EV_ELMS: 440 mPh = PH_DOCELM; 441 break; 442 443 case EV_COMM: 444 case EV_PI: 445 break; 446 447 case EV_DTD: 448 if (mPh >= PH_DTD_MISC) { 449 panic(FAULT); 450 } 451 mPh = PH_DTD_MISC; // misc after DTD 452 break; 453 454 default: 455 panic(FAULT); 456 } 457 } while (mPh < PH_DOCELM); // misc before DTD 458 // XML document starting with document's element 459 do { 460 switch (evt) { 461 case EV_ELM: 462 case EV_ELMS: 463 // Report the element 464 if (mIsNSAware == true) { 465 mHandCont.startElement( 466 mElm.value, 467 mElm.name, 468 "", 469 mAttrs); 470 } else { 471 mHandCont.startElement( 472 "", 473 "", 474 mElm.name, 475 mAttrs); 476 } 477 if (evt == EV_ELMS) { 478 evt = step(); 479 break; 480 } 481 482 case EV_ELME: 483 // Report the end of element 484 if (mIsNSAware == true) { 485 mHandCont.endElement(mElm.value, mElm.name, ""); 486 } else { 487 mHandCont.endElement("", "", mElm.name); 488 } 489 // Restore the top of the prefix stack 490 while (mPref.list == mElm) { 491 mHandCont.endPrefixMapping(mPref.name); 492 mPref = del(mPref); 493 } 494 // Remove the top element tag 495 mElm = del(mElm); 496 if (mElm == null) { 497 mPh = PH_DOCELM_MISC; 498 } else { 499 evt = step(); 500 } 501 break; 502 503 case EV_TEXT: 504 case EV_WSPC: 505 case EV_CDAT: 506 case EV_COMM: 507 case EV_PI: 508 case EV_ENT: 509 evt = step(); 510 break; 511 512 default: 513 panic(FAULT); 514 } 515 } while (mPh == PH_DOCELM); 516 // Misc after document's element 517 do { 518 if (wsskip() == EOS) { 519 break; 520 } 521 522 switch (step()) { 523 case EV_COMM: 524 case EV_PI: 525 break; 526 527 default: 528 panic(FAULT); 529 } 530 } while (mPh == PH_DOCELM_MISC); 531 mPh = PH_AFTER_DOC; // parsing is completed 532 533 } catch (SAXException saxe) { 534 throw saxe; 535 } catch (IOException ioe) { 536 throw ioe; 537 } catch (RuntimeException rte) { 538 throw rte; 539 } catch (Exception e) { 540 panic(e.toString()); 541 } finally { 542 mHandCont.endDocument(); 543 cleanup(); 544 } 545 } 546 547 /** 548 * Reports document type. 549 * 550 * @param name The name of the entity. 551 * @param pubid The public identifier of the entity or <code>null</code>. 552 * @param sysid The system identifier of the entity or <code>null</code>. 553 */ 554 protected void docType(String name, String pubid, String sysid) throws SAXException { 555 mHandDtd.notationDecl(name, pubid, sysid); 556 } 557 558 /** 559 * Reports a comment. 560 * 561 * @param text The comment text starting from first charcater. 562 * @param length The number of characters in comment. 563 */ 564 protected void comm(char[] text, int length) { 565 } 566 567 /** 568 * Reports a processing instruction. 569 * 570 * @param target The processing instruction target name. 571 * @param body The processing instruction body text. 572 */ 573 protected void pi(String target, String body) throws SAXException { 574 mHandCont.processingInstruction(target, body); 575 } 576 577 /** 578 * Reports new namespace prefix. The Namespace prefix ( 579 * <code>mPref.name</code>) being declared and the Namespace URI ( 580 * <code>mPref.value</code>) the prefix is mapped to. An empty string is 581 * used for the default element namespace, which has no prefix. 582 */ 583 protected void newPrefix() throws SAXException { 584 mHandCont.startPrefixMapping(mPref.name, mPref.value); 585 } 586 587 /** 588 * Reports skipped entity name. 589 * 590 * @param name The entity name. 591 */ 592 protected void skippedEnt(String name) throws SAXException { 593 mHandCont.skippedEntity(name); 594 } 595 596 /** 597 * Returns an 598 * <code>InputSource</code> for specified entity or 599 * <code>null</code>. 600 * 601 * @param name The name of the entity. 602 * @param pubid The public identifier of the entity. 603 * @param sysid The system identifier of the entity. 604 */ 605 protected InputSource resolveEnt(String name, String pubid, String sysid) 606 throws SAXException, IOException 607 { 608 return mHandEnt.resolveEntity(pubid, sysid); 609 } 610 611 /** 612 * Reports notation declaration. 613 * 614 * @param name The notation's name. 615 * @param pubid The notation's public identifier, or null if none was given. 616 * @param sysid The notation's system identifier, or null if none was given. 617 */ 618 protected void notDecl(String name, String pubid, String sysid) 619 throws SAXException 620 { 621 mHandDtd.notationDecl(name, pubid, sysid); 622 } 623 624 /** 625 * Reports unparsed entity name. 626 * 627 * @param name The unparsed entity's name. 628 * @param pubid The entity's public identifier, or null if none was given. 629 * @param sysid The entity's system identifier. 630 * @param notation The name of the associated notation. 631 */ 632 protected void unparsedEntDecl(String name, String pubid, String sysid, String notation) 633 throws SAXException 634 { 635 mHandDtd.unparsedEntityDecl(name, pubid, sysid, notation); 636 } 637 638 /** 639 * Notifies the handler about fatal parsing error. 640 * 641 * @param msg The problem description message. 642 */ 643 protected void panic(String msg) throws SAXException { 644 SAXParseException spe = new SAXParseException(msg, this); 645 mHandErr.fatalError(spe); 646 throw spe; // [#1.2] fatal error definition 647 } 648 649 /** 650 * Reports characters and empties the parser's buffer. This method is called 651 * only if parser is going to return control to the main loop. This means 652 * that this method may use parser buffer to report white space without 653 * copeing characters to temporary buffer. 654 */ 655 protected void bflash() throws SAXException { 656 if (mBuffIdx >= 0) { 657 // Textual data has been read 658 mHandCont.characters(mBuff, 0, (mBuffIdx + 1)); 659 mBuffIdx = -1; 660 } 661 } 662 663 /** 664 * Reports white space characters and empties the parser's buffer. This 665 * method is called only if parser is going to return control to the main 666 * loop. This means that this method may use parser buffer to report white 667 * space without copeing characters to temporary buffer. 668 */ 669 protected void bflash_ws() throws SAXException { 670 if (mBuffIdx >= 0) { 671 // BUG: With additional info from DTD and xml:space attr [#2.10] 672 // the following call can be supported: 673 // mHandCont.ignorableWhitespace(mBuff, 0, (mBuffIdx + 1)); 674 675 // Textual data has been read 676 mHandCont.characters(mBuff, 0, (mBuffIdx + 1)); 677 mBuffIdx = -1; 678 } 679 } 680 681 public boolean getFeature(String name) { 682 throw new UnsupportedOperationException("Not supported yet."); 683 } 684 685 public void setFeature(String name, boolean value) { 686 throw new UnsupportedOperationException("Not supported yet."); 687 } 688 689 public Object getProperty(String name) { 690 throw new UnsupportedOperationException("Not supported yet."); 691 } 692 693 public void setProperty(String name, Object value) { 694 throw new UnsupportedOperationException("Not supported yet."); 695 } 696 }