1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.internal.util.xml.impl;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import jdk.internal.org.xml.sax.ContentHandler;
  31 import jdk.internal.org.xml.sax.DTDHandler;
  32 import jdk.internal.org.xml.sax.EntityResolver;
  33 import jdk.internal.org.xml.sax.ErrorHandler;
  34 import jdk.internal.org.xml.sax.InputSource;
  35 import jdk.internal.org.xml.sax.Locator;
  36 import jdk.internal.org.xml.sax.SAXException;
  37 import jdk.internal.org.xml.sax.SAXNotRecognizedException;
  38 import jdk.internal.org.xml.sax.SAXNotSupportedException;
  39 import jdk.internal.org.xml.sax.SAXParseException;
  40 import jdk.internal.org.xml.sax.XMLReader;
  41 import jdk.internal.org.xml.sax.helpers.DefaultHandler;
  42 
  43 /**
  44  * XML non-validating push parser.
  45  *
  46  * This non-validating parser conforms to <a href="http://www.w3.org/TR/REC-xml"
  47  * >Extensible Markup Language (XML) 1.0</a> and <a
  48  * href="http://www.w3.org/TR/REC-xml-names" >"Namespaces in XML"</a>
  49  * specifications. The API supported by the parser are <a
  50  * href="http://java.sun.com/aboutJava/communityprocess/final/jsr030/index.html">CLDC
  51  * 1.0</a> and <a href="http://www.jcp.org/en/jsr/detail?id=280">JSR-280</a>, a
  52  * JavaME subset of <a href="http://java.sun.com/xml/jaxp/index.html">JAXP</a>
  53  * and <a href="http://www.saxproject.org/">SAX2</a>.
  54  *
  55  * @see org.xml.sax.XMLReader
  56  */
  57 
  58 /* pkg */ final class ParserSAX
  59         extends Parser
  60         implements XMLReader, Locator {
  61 
  62     public final static String FEATURE_NS =
  63             "http://xml.org/sax/features/namespaces";
  64     public final static String FEATURE_PREF =
  65             "http://xml.org/sax/features/namespace-prefixes";
  66     //          SAX feature flags
  67     private boolean mFNamespaces;
  68     private boolean mFPrefixes;
  69     //          SAX handlers
  70     private DefaultHandler mHand;      // the default handler
  71     private ContentHandler mHandCont;  // the content handler
  72     private DTDHandler mHandDtd;   // the DTD handler
  73     private ErrorHandler mHandErr;   // the error handler
  74     private EntityResolver mHandEnt;   // the entity resolver
  75 
  76     /**
  77      * Constructor.
  78      */
  79     public ParserSAX() {
  80         super();
  81 
  82         //              SAX feature defaut values
  83         mFNamespaces = true;
  84         mFPrefixes = false;
  85 
  86         //              Default handler which will be used in case the application
  87         //              do not set one of handlers.
  88         mHand = new DefaultHandler();
  89         mHandCont = mHand;
  90         mHandDtd = mHand;
  91         mHandErr = mHand;
  92         mHandEnt = mHand;
  93     }
  94 
  95     /**
  96      * Return the current content handler.
  97      *
  98      * @return The current content handler, or null if none has been registered.
  99      * @see #setContentHandler
 100      */
 101     public ContentHandler getContentHandler() {
 102         return (mHandCont != mHand) ? mHandCont : null;
 103     }
 104 
 105     /**
 106      * Allow an application to register a content event handler.
 107      *
 108      * <p>If the application does not register a content handler, all content
 109      * events reported by the SAX parser will be silently ignored.</p>
 110      *
 111      * <p>Applications may register a new or different handler in the middle of
 112      * a parse, and the SAX parser must begin using the new handler
 113      * immediately.</p>
 114      *
 115      * @param handler The content handler.
 116      * @exception java.lang.NullPointerException If the handler argument is
 117      * null.
 118      * @see #getContentHandler
 119      */
 120     public void setContentHandler(ContentHandler handler) {
 121         if (handler == null) {
 122             throw new NullPointerException();
 123         }
 124         mHandCont = handler;
 125     }
 126 
 127     /**
 128      * Return the current DTD handler.
 129      *
 130      * @return The current DTD handler, or null if none has been registered.
 131      * @see #setDTDHandler
 132      */
 133     public DTDHandler getDTDHandler() {
 134         return (mHandDtd != mHand) ? mHandDtd : null;
 135     }
 136 
 137     /**
 138      * Allow an application to register a DTD event handler.
 139      *
 140      * <p>If the application does not register a DTD handler, all DTD events
 141      * reported by the SAX parser will be silently ignored.</p>
 142      *
 143      * <p>Applications may register a new or different handler in the middle of
 144      * a parse, and the SAX parser must begin using the new handler
 145      * immediately.</p>
 146      *
 147      * @param handler The DTD handler.
 148      * @exception java.lang.NullPointerException If the handler argument is
 149      * null.
 150      * @see #getDTDHandler
 151      */
 152     public void setDTDHandler(DTDHandler handler) {
 153         if (handler == null) {
 154             throw new NullPointerException();
 155         }
 156         mHandDtd = handler;
 157     }
 158 
 159     /**
 160      * Return the current error handler.
 161      *
 162      * @return The current error handler, or null if none has been registered.
 163      * @see #setErrorHandler
 164      */
 165     public ErrorHandler getErrorHandler() {
 166         return (mHandErr != mHand) ? mHandErr : null;
 167     }
 168 
 169     /**
 170      * Allow an application to register an error event handler.
 171      *
 172      * <p>If the application does not register an error handler, all error
 173      * events reported by the SAX parser will be silently ignored; however,
 174      * normal processing may not continue. It is highly recommended that all SAX
 175      * applications implement an error handler to avoid unexpected bugs.</p>
 176      *
 177      * <p>Applications may register a new or different handler in the middle of
 178      * a parse, and the SAX parser must begin using the new handler
 179      * immediately.</p>
 180      *
 181      * @param handler The error handler.
 182      * @exception java.lang.NullPointerException If the handler argument is
 183      * null.
 184      * @see #getErrorHandler
 185      */
 186     public void setErrorHandler(ErrorHandler handler) {
 187         if (handler == null) {
 188             throw new NullPointerException();
 189         }
 190         mHandErr = handler;
 191     }
 192 
 193     /**
 194      * Return the current entity resolver.
 195      *
 196      * @return The current entity resolver, or null if none has been registered.
 197      * @see #setEntityResolver
 198      */
 199     public EntityResolver getEntityResolver() {
 200         return (mHandEnt != mHand) ? mHandEnt : null;
 201     }
 202 
 203     /**
 204      * Allow an application to register an entity resolver.
 205      *
 206      * <p>If the application does not register an entity resolver, the XMLReader
 207      * will perform its own default resolution.</p>
 208      *
 209      * <p>Applications may register a new or different resolver in the middle of
 210      * a parse, and the SAX parser must begin using the new resolver
 211      * immediately.</p>
 212      *
 213      * @param resolver The entity resolver.
 214      * @exception java.lang.NullPointerException If the resolver argument is
 215      * null.
 216      * @see #getEntityResolver
 217      */
 218     public void setEntityResolver(EntityResolver resolver) {
 219         if (resolver == null) {
 220             throw new NullPointerException();
 221         }
 222         mHandEnt = resolver;
 223     }
 224 
 225     /**
 226      * Return the public identifier for the current document event.
 227      *
 228      * <p>The return value is the public identifier of the document entity or of
 229      * the external parsed entity in which the markup triggering the event
 230      * appears.</p>
 231      *
 232      * @return A string containing the public identifier, or null if none is
 233      * available.
 234      *
 235      * @see #getSystemId
 236      */
 237     public String getPublicId() {
 238         return (mInp != null) ? mInp.pubid : null;
 239     }
 240 
 241     /**
 242      * Return the system identifier for the current document event.
 243      *
 244      * <p>The return value is the system identifier of the document entity or of
 245      * the external parsed entity in which the markup triggering the event
 246      * appears.</p>
 247      *
 248      * <p>If the system identifier is a URL, the parser must resolve it fully
 249      * before passing it to the application.</p>
 250      *
 251      * @return A string containing the system identifier, or null if none is
 252      * available.
 253      *
 254      * @see #getPublicId
 255      */
 256     public String getSystemId() {
 257         return (mInp != null) ? mInp.sysid : null;
 258     }
 259 
 260     /**
 261      * Return the line number where the current document event ends.
 262      *
 263      * @return Always returns -1 indicating the line number is not available.
 264      *
 265      * @see #getColumnNumber
 266      */
 267     public int getLineNumber() {
 268         return -1;
 269     }
 270 
 271     /**
 272      * Return the column number where the current document event ends.
 273      *
 274      * @return Always returns -1 indicating the column number is not available.
 275      *
 276      * @see #getLineNumber
 277      */
 278     public int getColumnNumber() {
 279         return -1;
 280     }
 281 
 282     /**
 283      * Parse an XML document from a system identifier (URI).
 284      *
 285      * <p>This method is a shortcut for the common case of reading a document
 286      * from a system identifier. It is the exact equivalent of the
 287      * following:</p>
 288      *
 289      * <pre>
 290      * parse(new InputSource(systemId));
 291      * </pre>
 292      *
 293      * <p>If the system identifier is a URL, it must be fully resolved by the
 294      * application before it is passed to the parser.</p>
 295      *
 296      * @param systemId The system identifier (URI).
 297      * @exception org.xml.sax.SAXException Any SAX exception, possibly wrapping
 298      * another exception.
 299      * @exception java.io.IOException An IO exception from the parser, possibly
 300      * from a byte stream or character stream supplied by the application.
 301      * @see #parse(org.xml.sax.InputSource)
 302      */
 303     public void parse(String systemId)
 304             throws IOException, SAXException {
 305         parse(new InputSource(systemId));
 306     }
 307 
 308     /**
 309      * Parse an XML document.
 310      *
 311      * <p>The application can use this method to instruct the XML reader to
 312      * begin parsing an XML document from any valid input source (a character
 313      * stream, a byte stream, or a URI).</p>
 314      *
 315      * <p>Applications may not invoke this method while a parse is in progress
 316      * (they should create a new XMLReader instead for each nested XML
 317      * document). Once a parse is complete, an application may reuse the same
 318      * XMLReader object, possibly with a different input source.</p>
 319      *
 320      * <p>During the parse, the XMLReader will provide information about the XML
 321      * document through the registered event handlers.</p>
 322      *
 323      * <p>This method is synchronous: it will not return until parsing has
 324      * ended. If a client application wants to terminate parsing early, it
 325      * should throw an exception.</p>
 326      *
 327      * @param is The input source for the top-level of the XML document.
 328      * @exception org.xml.sax.SAXException Any SAX exception, possibly wrapping
 329      * another exception.
 330      * @exception java.io.IOException An IO exception from the parser, possibly
 331      * from a byte stream or character stream supplied by the application.
 332      * @see org.xml.sax.InputSource
 333      * @see #parse(java.lang.String)
 334      * @see #setEntityResolver
 335      * @see #setDTDHandler
 336      * @see #setContentHandler
 337      * @see #setErrorHandler
 338      */
 339     public void parse(InputSource is)
 340             throws IOException, SAXException {
 341         if (is == null) {
 342             throw new IllegalArgumentException("");
 343         }
 344         //              Set up the document
 345         mInp = new Input(BUFFSIZE_READER);
 346         mPh = PH_BEFORE_DOC;  // before parsing
 347         try {
 348             setinp(is);
 349         } catch (SAXException saxe) {
 350             throw saxe;
 351         } catch (IOException ioe) {
 352             throw ioe;
 353         } catch (RuntimeException rte) {
 354             throw rte;
 355         } catch (Exception e) {
 356             panic(e.toString());
 357         }
 358         parse();
 359     }
 360 
 361     /**
 362      * Parse the content of the given {@link java.io.InputStream} instance as
 363      * XML using the specified {@link org.xml.sax.helpers.DefaultHandler}.
 364      *
 365      * @param src InputStream containing the content to be parsed.
 366      * @param handler The SAX DefaultHandler to use.
 367      * @exception IOException If any IO errors occur.
 368      * @exception IllegalArgumentException If the given InputStream or handler
 369      * is null.
 370      * @exception SAXException If the underlying parser throws a SAXException
 371      * while parsing.
 372      * @see org.xml.sax.helpers.DefaultHandler
 373      */
 374     public void parse(InputStream src, DefaultHandler handler)
 375             throws SAXException, IOException {
 376         if ((src == null) || (handler == null)) {
 377             throw new IllegalArgumentException("");
 378         }
 379         parse(new InputSource(src), handler);
 380     }
 381 
 382     /**
 383      * Parse the content given {@link org.xml.sax.InputSource} as XML using the
 384      * specified {@link org.xml.sax.helpers.DefaultHandler}.
 385      *
 386      * @param is The InputSource containing the content to be parsed.
 387      * @param handler The SAX DefaultHandler to use.
 388      * @exception IOException If any IO errors occur.
 389      * @exception IllegalArgumentException If the InputSource or handler is
 390      * null.
 391      * @exception SAXException If the underlying parser throws a SAXException
 392      * while parsing.
 393      * @see org.xml.sax.helpers.DefaultHandler
 394      */
 395     public void parse(InputSource is, DefaultHandler handler)
 396             throws SAXException, IOException {
 397         if ((is == null) || (handler == null)) {
 398             throw new IllegalArgumentException("");
 399         }
 400         //              Set up the handler
 401         mHandCont = handler;
 402         mHandDtd = handler;
 403         mHandErr = handler;
 404         mHandEnt = handler;
 405         //              Set up the document
 406         mInp = new Input(BUFFSIZE_READER);
 407         mPh = PH_BEFORE_DOC;  // before parsing
 408         try {
 409             setinp(is);
 410         } catch (SAXException saxe) {
 411             throw saxe;
 412         } catch (IOException ioe) {
 413             throw ioe;
 414         } catch (RuntimeException rte) {
 415             throw rte;
 416         } catch (Exception e) {
 417             panic(e.toString());
 418         }
 419         parse();
 420     }
 421 
 422     /**
 423      * Parse the XML document content using specified handlers and an input
 424      * source.
 425      *
 426      * @exception IOException If any IO errors occur.
 427      * @exception SAXException If the underlying parser throws a SAXException
 428      * while parsing.
 429      */
 430     private void parse()
 431             throws SAXException, IOException {
 432 
 433         init();
 434         try {
 435             mHandCont.setDocumentLocator(this);
 436             mHandCont.startDocument();
 437 
 438             if (mPh != PH_MISC_DTD) {
 439                 mPh = PH_MISC_DTD;  // misc before DTD
 440             }
 441             int evt = EV_NULL;
 442             //          XML document prolog
 443             do {
 444                 wsskip();
 445                 switch (evt = step()) {
 446                     case EV_ELM:
 447                     case EV_ELMS:
 448                         mPh = PH_DOCELM;
 449                         break;
 450 
 451                     case EV_COMM:
 452                     case EV_PI:
 453                         break;
 454 
 455                     case EV_DTD:
 456                         if (mPh >= PH_DTD_MISC) {
 457                             panic(FAULT);
 458                         }
 459                         mPh = PH_DTD_MISC;  // misc after DTD
 460                         break;
 461 
 462                     default:
 463                         panic(FAULT);
 464                 }
 465             } while (mPh < PH_DOCELM);  // misc before DTD
 466             //          XML document starting with document's element
 467             do {
 468                 switch (evt) {
 469                     case EV_ELM:
 470                     case EV_ELMS:
 471                         //              Report the element
 472                         if (mIsNSAware == true) {
 473                             mHandCont.startElement(
 474                                     mElm.value,
 475                                     mElm.name,
 476                                     "",
 477                                     mAttrs);
 478                         } else {
 479                             mHandCont.startElement(
 480                                     "",
 481                                     "",
 482                                     mElm.name,
 483                                     mAttrs);
 484                         }
 485                         if (evt == EV_ELMS) {
 486                             evt = step();
 487                             break;
 488                         }
 489 
 490                     case EV_ELME:
 491                         //              Report the end of element
 492                         if (mIsNSAware == true) {
 493                             mHandCont.endElement(mElm.value, mElm.name, "");
 494                         } else {
 495                             mHandCont.endElement("", "", mElm.name);
 496                         }
 497                         //              Restore the top of the prefix stack
 498                         while (mPref.list == mElm) {
 499                             mHandCont.endPrefixMapping(mPref.name);
 500                             mPref = del(mPref);
 501                         }
 502                         //              Remove the top element tag
 503                         mElm = del(mElm);
 504                         if (mElm == null) {
 505                             mPh = PH_DOCELM_MISC;
 506                         } else {
 507                             evt = step();
 508                         }
 509                         break;
 510 
 511                     case EV_TEXT:
 512                     case EV_WSPC:
 513                     case EV_CDAT:
 514                     case EV_COMM:
 515                     case EV_PI:
 516                     case EV_ENT:
 517                         evt = step();
 518                         break;
 519 
 520                     default:
 521                         panic(FAULT);
 522                 }
 523             } while (mPh == PH_DOCELM);
 524             //          Misc after document's element
 525             do {
 526                 if (wsskip() == EOS) {
 527                     break;
 528                 }
 529 
 530                 switch (step()) {
 531                     case EV_COMM:
 532                     case EV_PI:
 533                         break;
 534 
 535                     default:
 536                         panic(FAULT);
 537                 }
 538             } while (mPh == PH_DOCELM_MISC);
 539             mPh = PH_AFTER_DOC;  // parsing is completed
 540 
 541         } catch (SAXException saxe) {
 542             throw saxe;
 543         } catch (IOException ioe) {
 544             throw ioe;
 545         } catch (RuntimeException rte) {
 546             throw rte;
 547         } catch (Exception e) {
 548             panic(e.toString());
 549         } finally {
 550             mHandCont.endDocument();
 551             cleanup();
 552         }
 553     }
 554 
 555     /**
 556      * Reports document type.
 557      *
 558      * @param name The name of the entity.
 559      * @param pubid The public identifier of the entity or <code>null</code>.
 560      * @param sysid The system identifier of the entity or <code>null</code>.
 561      */
 562     protected void docType(String name, String pubid, String sysid)
 563             throws SAXException {
 564         mHandDtd.notationDecl(name, pubid, sysid);
 565     }
 566 
 567     /**
 568      * Reports a comment.
 569      *
 570      * @param text The comment text starting from first charcater.
 571      * @param length The number of characters in comment.
 572      */
 573     protected void comm(char[] text, int length) {
 574     }
 575 
 576     /**
 577      * Reports a processing instruction.
 578      *
 579      * @param target The processing instruction target name.
 580      * @param body The processing instruction body text.
 581      */
 582     protected void pi(String target, String body)
 583             throws SAXException {
 584         mHandCont.processingInstruction(target, body);
 585     }
 586 
 587     /**
 588      * Reports new namespace prefix. The Namespace prefix (
 589      * <code>mPref.name</code>) being declared and the Namespace URI (
 590      * <code>mPref.value</code>) the prefix is mapped to. An empty string is
 591      * used for the default element namespace, which has no prefix.
 592      */
 593     protected void newPrefix()
 594             throws SAXException {
 595         mHandCont.startPrefixMapping(mPref.name, mPref.value);
 596     }
 597 
 598     /**
 599      * Reports skipped entity name.
 600      *
 601      * @param name The entity name.
 602      */
 603     protected void skippedEnt(String name)
 604             throws SAXException {
 605         mHandCont.skippedEntity(name);
 606     }
 607 
 608     /**
 609      * Returns an
 610      * <code>InputSource</code> for specified entity or
 611      * <code>null</code>.
 612      *
 613      * @param name The name of the entity.
 614      * @param pubid The public identifier of the entity.
 615      * @param sysid The system identifier of the entity.
 616      */
 617     protected InputSource resolveEnt(String name, String pubid, String sysid)
 618             throws SAXException, IOException {
 619         return mHandEnt.resolveEntity(pubid, sysid);
 620     }
 621 
 622     /**
 623      * Reports notation declaration.
 624      *
 625      * @param name The notation's name.
 626      * @param pubid The notation's public identifier, or null if none was given.
 627      * @param sysid The notation's system identifier, or null if none was given.
 628      */
 629     protected void notDecl(String name, String pubid, String sysid)
 630             throws SAXException {
 631         mHandDtd.notationDecl(name, pubid, sysid);
 632     }
 633 
 634     /**
 635      * Reports unparsed entity name.
 636      *
 637      * @param name The unparsed entity's name.
 638      * @param pubid The entity's public identifier, or null if none was given.
 639      * @param sysid The entity's system identifier.
 640      * @param notation The name of the associated notation.
 641      */
 642     protected void unparsedEntDecl(
 643             String name, String pubid, String sysid, String notation)
 644             throws SAXException {
 645         mHandDtd.unparsedEntityDecl(name, pubid, sysid, notation);
 646     }
 647 
 648     /**
 649      * Notifies the handler about fatal parsing error.
 650      *
 651      * @param msg The problem description message.
 652      */
 653     protected void panic(String msg)
 654             throws SAXException {
 655         SAXParseException spe = new SAXParseException(msg, this);
 656         mHandErr.fatalError(spe);
 657         throw spe;  // [#1.2] fatal error definition
 658     }
 659 
 660     /**
 661      * Reports characters and empties the parser's buffer. This method is called
 662      * only if parser is going to return control to the main loop. This means
 663      * that this method may use parser buffer to report white space without
 664      * copeing characters to temporary buffer.
 665      */
 666     protected void bflash()
 667             throws SAXException {
 668         if (mBuffIdx >= 0) {
 669             //          Textual data has been read
 670             mHandCont.characters(mBuff, 0, (mBuffIdx + 1));
 671             mBuffIdx = -1;
 672         }
 673     }
 674 
 675     /**
 676      * Reports white space characters and empties the parser's buffer. This
 677      * method is called only if parser is going to return control to the main
 678      * loop. This means that this method may use parser buffer to report white
 679      * space without copeing characters to temporary buffer.
 680      */
 681     protected void bflash_ws()
 682             throws SAXException {
 683         if (mBuffIdx >= 0) {
 684             // BUG: With additional info from DTD and xml:space attr [#2.10]
 685             // the following call can be supported:
 686             // mHandCont.ignorableWhitespace(mBuff, 0, (mBuffIdx + 1));
 687 
 688             //          Textual data has been read
 689             mHandCont.characters(mBuff, 0, (mBuffIdx + 1));
 690             mBuffIdx = -1;
 691         }
 692     }
 693 
 694     public boolean getFeature(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
 695         throw new UnsupportedOperationException("Not supported yet.");
 696     }
 697 
 698     public void setFeature(String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
 699         throw new UnsupportedOperationException("Not supported yet.");
 700     }
 701 
 702     public Object getProperty(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
 703         throw new UnsupportedOperationException("Not supported yet.");
 704     }
 705 
 706     public void setProperty(String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
 707         throw new UnsupportedOperationException("Not supported yet.");
 708     }
 709 }