1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.internal.util.xml.impl;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import jdk.internal.org.xml.sax.ContentHandler;
  31 import jdk.internal.org.xml.sax.DTDHandler;
  32 import jdk.internal.org.xml.sax.EntityResolver;
  33 import jdk.internal.org.xml.sax.ErrorHandler;
  34 import jdk.internal.org.xml.sax.InputSource;
  35 import jdk.internal.org.xml.sax.Locator;
  36 import jdk.internal.org.xml.sax.SAXException;
  37 import jdk.internal.org.xml.sax.SAXNotRecognizedException;
  38 import jdk.internal.org.xml.sax.SAXNotSupportedException;
  39 import jdk.internal.org.xml.sax.SAXParseException;
  40 import jdk.internal.org.xml.sax.XMLReader;
  41 import jdk.internal.org.xml.sax.helpers.DefaultHandler;
  42 
  43 /**
  44  * XML non-validating push parser.
  45  *
  46  * This non-validating parser conforms to <a href="http://www.w3.org/TR/REC-xml"
  47  * >Extensible Markup Language (XML) 1.0</a> and <a
  48  * href="http://www.w3.org/TR/REC-xml-names" >"Namespaces in XML"</a>
  49  * specifications. The API supported by the parser are <a
  50  * href="http://java.sun.com/aboutJava/communityprocess/final/jsr030/index.html">CLDC
  51  * 1.0</a> and <a href="http://www.jcp.org/en/jsr/detail?id=280">JSR-280</a>, a
  52  * JavaME subset of <a href="http://java.sun.com/xml/jaxp/index.html">JAXP</a>
  53  * and <a href="http://www.saxproject.org/">SAX2</a>.
  54  *
  55  * @see org.xml.sax.XMLReader
  56  */
  57 
  58 final class ParserSAX
  59     extends Parser implements XMLReader, Locator
  60 {
  61     public final static String FEATURE_NS =
  62             "http://xml.org/sax/features/namespaces";
  63     public final static String FEATURE_PREF =
  64             "http://xml.org/sax/features/namespace-prefixes";
  65     //          SAX feature flags
  66     private boolean mFNamespaces;
  67     private boolean mFPrefixes;
  68     //          SAX handlers
  69     private DefaultHandler mHand;      // the default handler
  70     private ContentHandler mHandCont;  // the content handler
  71     private DTDHandler mHandDtd;   // the DTD handler
  72     private ErrorHandler mHandErr;   // the error handler
  73     private EntityResolver mHandEnt;   // the entity resolver
  74 
  75     /**
  76      * Constructor.
  77      */
  78     public ParserSAX() {
  79         super();
  80 
  81         //              SAX feature defaut values
  82         mFNamespaces = true;
  83         mFPrefixes = false;
  84 
  85         //              Default handler which will be used in case the application
  86         //              do not set one of handlers.
  87         mHand = new DefaultHandler();
  88         mHandCont = mHand;
  89         mHandDtd = mHand;
  90         mHandErr = mHand;
  91         mHandEnt = mHand;
  92     }
  93 
  94     /**
  95      * Return the current content handler.
  96      *
  97      * @return The current content handler, or null if none has been registered.
  98      * @see #setContentHandler
  99      */
 100     public ContentHandler getContentHandler() {
 101         return (mHandCont != mHand) ? mHandCont : null;
 102     }
 103 
 104     /**
 105      * Allow an application to register a content event handler.
 106      *
 107      * <p>If the application does not register a content handler, all content
 108      * events reported by the SAX parser will be silently ignored.</p>
 109      *
 110      * <p>Applications may register a new or different handler in the middle of
 111      * a parse, and the SAX parser must begin using the new handler
 112      * immediately.</p>
 113      *
 114      * @param handler The content handler.
 115      * @exception java.lang.NullPointerException If the handler argument is
 116      * null.
 117      * @see #getContentHandler
 118      */
 119     public void setContentHandler(ContentHandler handler) {
 120         if (handler == null) {
 121             throw new NullPointerException();
 122         }
 123         mHandCont = handler;
 124     }
 125 
 126     /**
 127      * Return the current DTD handler.
 128      *
 129      * @return The current DTD handler, or null if none has been registered.
 130      * @see #setDTDHandler
 131      */
 132     public DTDHandler getDTDHandler() {
 133         return (mHandDtd != mHand) ? mHandDtd : null;
 134     }
 135 
 136     /**
 137      * Allow an application to register a DTD event handler.
 138      *
 139      * <p>If the application does not register a DTD handler, all DTD events
 140      * reported by the SAX parser will be silently ignored.</p>
 141      *
 142      * <p>Applications may register a new or different handler in the middle of
 143      * a parse, and the SAX parser must begin using the new handler
 144      * immediately.</p>
 145      *
 146      * @param handler The DTD handler.
 147      * @exception java.lang.NullPointerException If the handler argument is
 148      * null.
 149      * @see #getDTDHandler
 150      */
 151     public void setDTDHandler(DTDHandler handler) {
 152         if (handler == null) {
 153             throw new NullPointerException();
 154         }
 155         mHandDtd = handler;
 156     }
 157 
 158     /**
 159      * Return the current error handler.
 160      *
 161      * @return The current error handler, or null if none has been registered.
 162      * @see #setErrorHandler
 163      */
 164     public ErrorHandler getErrorHandler() {
 165         return (mHandErr != mHand) ? mHandErr : null;
 166     }
 167 
 168     /**
 169      * Allow an application to register an error event handler.
 170      *
 171      * <p>If the application does not register an error handler, all error
 172      * events reported by the SAX parser will be silently ignored; however,
 173      * normal processing may not continue. It is highly recommended that all SAX
 174      * applications implement an error handler to avoid unexpected bugs.</p>
 175      *
 176      * <p>Applications may register a new or different handler in the middle of
 177      * a parse, and the SAX parser must begin using the new handler
 178      * immediately.</p>
 179      *
 180      * @param handler The error handler.
 181      * @exception java.lang.NullPointerException If the handler argument is
 182      * null.
 183      * @see #getErrorHandler
 184      */
 185     public void setErrorHandler(ErrorHandler handler) {
 186         if (handler == null) {
 187             throw new NullPointerException();
 188         }
 189         mHandErr = handler;
 190     }
 191 
 192     /**
 193      * Return the current entity resolver.
 194      *
 195      * @return The current entity resolver, or null if none has been registered.
 196      * @see #setEntityResolver
 197      */
 198     public EntityResolver getEntityResolver() {
 199         return (mHandEnt != mHand) ? mHandEnt : null;
 200     }
 201 
 202     /**
 203      * Allow an application to register an entity resolver.
 204      *
 205      * <p>If the application does not register an entity resolver, the XMLReader
 206      * will perform its own default resolution.</p>
 207      *
 208      * <p>Applications may register a new or different resolver in the middle of
 209      * a parse, and the SAX parser must begin using the new resolver
 210      * immediately.</p>
 211      *
 212      * @param resolver The entity resolver.
 213      * @exception java.lang.NullPointerException If the resolver argument is
 214      * null.
 215      * @see #getEntityResolver
 216      */
 217     public void setEntityResolver(EntityResolver resolver) {
 218         if (resolver == null) {
 219             throw new NullPointerException();
 220         }
 221         mHandEnt = resolver;
 222     }
 223 
 224     /**
 225      * Return the public identifier for the current document event.
 226      *
 227      * <p>The return value is the public identifier of the document entity or of
 228      * the external parsed entity in which the markup triggering the event
 229      * appears.</p>
 230      *
 231      * @return A string containing the public identifier, or null if none is
 232      * available.
 233      *
 234      * @see #getSystemId
 235      */
 236     public String getPublicId() {
 237         return (mInp != null) ? mInp.pubid : null;
 238     }
 239 
 240     /**
 241      * Return the system identifier for the current document event.
 242      *
 243      * <p>The return value is the system identifier of the document entity or of
 244      * the external parsed entity in which the markup triggering the event
 245      * appears.</p>
 246      *
 247      * <p>If the system identifier is a URL, the parser must resolve it fully
 248      * before passing it to the application.</p>
 249      *
 250      * @return A string containing the system identifier, or null if none is
 251      * available.
 252      *
 253      * @see #getPublicId
 254      */
 255     public String getSystemId() {
 256         return (mInp != null) ? mInp.sysid : null;
 257     }
 258 
 259     /**
 260      * Return the line number where the current document event ends.
 261      *
 262      * @return Always returns -1 indicating the line number is not available.
 263      *
 264      * @see #getColumnNumber
 265      */
 266     public int getLineNumber() {
 267         return -1;
 268     }
 269 
 270     /**
 271      * Return the column number where the current document event ends.
 272      *
 273      * @return Always returns -1 indicating the column number is not available.
 274      *
 275      * @see #getLineNumber
 276      */
 277     public int getColumnNumber() {
 278         return -1;
 279     }
 280 
 281     /**
 282      * Parse an XML document from a system identifier (URI).
 283      *
 284      * <p>This method is a shortcut for the common case of reading a document
 285      * from a system identifier. It is the exact equivalent of the
 286      * following:</p>
 287      *
 288      * <pre>
 289      * parse(new InputSource(systemId));
 290      * </pre>
 291      *
 292      * <p>If the system identifier is a URL, it must be fully resolved by the
 293      * application before it is passed to the parser.</p>
 294      *
 295      * @param systemId The system identifier (URI).
 296      * @exception org.xml.sax.SAXException Any SAX exception, possibly wrapping
 297      * another exception.
 298      * @exception java.io.IOException An IO exception from the parser, possibly
 299      * from a byte stream or character stream supplied by the application.
 300      * @see #parse(org.xml.sax.InputSource)
 301      */
 302     public void parse(String systemId) throws IOException, SAXException {
 303         parse(new InputSource(systemId));
 304     }
 305 
 306     /**
 307      * Parse an XML document.
 308      *
 309      * <p>The application can use this method to instruct the XML reader to
 310      * begin parsing an XML document from any valid input source (a character
 311      * stream, a byte stream, or a URI).</p>
 312      *
 313      * <p>Applications may not invoke this method while a parse is in progress
 314      * (they should create a new XMLReader instead for each nested XML
 315      * document). Once a parse is complete, an application may reuse the same
 316      * XMLReader object, possibly with a different input source.</p>
 317      *
 318      * <p>During the parse, the XMLReader will provide information about the XML
 319      * document through the registered event handlers.</p>
 320      *
 321      * <p>This method is synchronous: it will not return until parsing has
 322      * ended. If a client application wants to terminate parsing early, it
 323      * should throw an exception.</p>
 324      *
 325      * @param is The input source for the top-level of the XML document.
 326      * @exception org.xml.sax.SAXException Any SAX exception, possibly wrapping
 327      * another exception.
 328      * @exception java.io.IOException An IO exception from the parser, possibly
 329      * from a byte stream or character stream supplied by the application.
 330      * @see org.xml.sax.InputSource
 331      * @see #parse(java.lang.String)
 332      * @see #setEntityResolver
 333      * @see #setDTDHandler
 334      * @see #setContentHandler
 335      * @see #setErrorHandler
 336      */
 337     public void parse(InputSource is) throws IOException, SAXException {
 338         if (is == null) {
 339             throw new IllegalArgumentException("");
 340         }
 341         //              Set up the document
 342         mInp = new Input(BUFFSIZE_READER);
 343         mPh = PH_BEFORE_DOC;  // before parsing
 344         try {
 345             setinp(is);
 346         } catch (SAXException saxe) {
 347             throw saxe;
 348         } catch (IOException ioe) {
 349             throw ioe;
 350         } catch (RuntimeException rte) {
 351             throw rte;
 352         } catch (Exception e) {
 353             panic(e.toString());
 354         }
 355         parse();
 356     }
 357 
 358     /**
 359      * Parse the content of the given {@link java.io.InputStream} instance as
 360      * XML using the specified {@link org.xml.sax.helpers.DefaultHandler}.
 361      *
 362      * @param src InputStream containing the content to be parsed.
 363      * @param handler The SAX DefaultHandler to use.
 364      * @exception IOException If any IO errors occur.
 365      * @exception IllegalArgumentException If the given InputStream or handler
 366      * is null.
 367      * @exception SAXException If the underlying parser throws a SAXException
 368      * while parsing.
 369      * @see org.xml.sax.helpers.DefaultHandler
 370      */
 371     public void parse(InputStream src, DefaultHandler handler)
 372             throws SAXException, IOException {
 373         if ((src == null) || (handler == null)) {
 374             throw new IllegalArgumentException("");
 375         }
 376         parse(new InputSource(src), handler);
 377     }
 378 
 379     /**
 380      * Parse the content given {@link org.xml.sax.InputSource} as XML using the
 381      * specified {@link org.xml.sax.helpers.DefaultHandler}.
 382      *
 383      * @param is The InputSource containing the content to be parsed.
 384      * @param handler The SAX DefaultHandler to use.
 385      * @exception IOException If any IO errors occur.
 386      * @exception IllegalArgumentException If the InputSource or handler is
 387      * null.
 388      * @exception SAXException If the underlying parser throws a SAXException
 389      * while parsing.
 390      * @see org.xml.sax.helpers.DefaultHandler
 391      */
 392     public void parse(InputSource is, DefaultHandler handler)
 393         throws SAXException, IOException 
 394     {
 395         if ((is == null) || (handler == null)) {
 396             throw new IllegalArgumentException("");
 397         }
 398         //              Set up the handler
 399         mHandCont = handler;
 400         mHandDtd = handler;
 401         mHandErr = handler;
 402         mHandEnt = handler;
 403         //              Set up the document
 404         mInp = new Input(BUFFSIZE_READER);
 405         mPh = PH_BEFORE_DOC;  // before parsing
 406         try {
 407             setinp(is);
 408         } catch (SAXException | IOException | RuntimeException saxe) {
 409             throw saxe;
 410         } catch (Exception e) {
 411             panic(e.toString());
 412         }
 413         parse();
 414     }
 415 
 416     /**
 417      * Parse the XML document content using specified handlers and an input
 418      * source.
 419      *
 420      * @exception IOException If any IO errors occur.
 421      * @exception SAXException If the underlying parser throws a SAXException
 422      * while parsing.
 423      */
 424     private void parse() throws SAXException, IOException {
 425         init();
 426         try {
 427             mHandCont.setDocumentLocator(this);
 428             mHandCont.startDocument();
 429 
 430             if (mPh != PH_MISC_DTD) {
 431                 mPh = PH_MISC_DTD;  // misc before DTD
 432             }
 433             int evt = EV_NULL;
 434             //          XML document prolog
 435             do {
 436                 wsskip();
 437                 switch (evt = step()) {
 438                     case EV_ELM:
 439                     case EV_ELMS:
 440                         mPh = PH_DOCELM;
 441                         break;
 442 
 443                     case EV_COMM:
 444                     case EV_PI:
 445                         break;
 446 
 447                     case EV_DTD:
 448                         if (mPh >= PH_DTD_MISC) {
 449                             panic(FAULT);
 450                         }
 451                         mPh = PH_DTD_MISC;  // misc after DTD
 452                         break;
 453 
 454                     default:
 455                         panic(FAULT);
 456                 }
 457             } while (mPh < PH_DOCELM);  // misc before DTD
 458             //          XML document starting with document's element
 459             do {
 460                 switch (evt) {
 461                     case EV_ELM:
 462                     case EV_ELMS:
 463                         //              Report the element
 464                         if (mIsNSAware == true) {
 465                             mHandCont.startElement(
 466                                     mElm.value,
 467                                     mElm.name,
 468                                     "",
 469                                     mAttrs);
 470                         } else {
 471                             mHandCont.startElement(
 472                                     "",
 473                                     "",
 474                                     mElm.name,
 475                                     mAttrs);
 476                         }
 477                         if (evt == EV_ELMS) {
 478                             evt = step();
 479                             break;
 480                         }
 481 
 482                     case EV_ELME:
 483                         //              Report the end of element
 484                         if (mIsNSAware == true) {
 485                             mHandCont.endElement(mElm.value, mElm.name, "");
 486                         } else {
 487                             mHandCont.endElement("", "", mElm.name);
 488                         }
 489                         //              Restore the top of the prefix stack
 490                         while (mPref.list == mElm) {
 491                             mHandCont.endPrefixMapping(mPref.name);
 492                             mPref = del(mPref);
 493                         }
 494                         //              Remove the top element tag
 495                         mElm = del(mElm);
 496                         if (mElm == null) {
 497                             mPh = PH_DOCELM_MISC;
 498                         } else {
 499                             evt = step();
 500                         }
 501                         break;
 502 
 503                     case EV_TEXT:
 504                     case EV_WSPC:
 505                     case EV_CDAT:
 506                     case EV_COMM:
 507                     case EV_PI:
 508                     case EV_ENT:
 509                         evt = step();
 510                         break;
 511 
 512                     default:
 513                         panic(FAULT);
 514                 }
 515             } while (mPh == PH_DOCELM);
 516             //          Misc after document's element
 517             do {
 518                 if (wsskip() == EOS) {
 519                     break;
 520                 }
 521 
 522                 switch (step()) {
 523                     case EV_COMM:
 524                     case EV_PI:
 525                         break;
 526 
 527                     default:
 528                         panic(FAULT);
 529                 }
 530             } while (mPh == PH_DOCELM_MISC);
 531             mPh = PH_AFTER_DOC;  // parsing is completed
 532 
 533         } catch (SAXException saxe) {
 534             throw saxe;
 535         } catch (IOException ioe) {
 536             throw ioe;
 537         } catch (RuntimeException rte) {
 538             throw rte;
 539         } catch (Exception e) {
 540             panic(e.toString());
 541         } finally {
 542             mHandCont.endDocument();
 543             cleanup();
 544         }
 545     }
 546 
 547     /**
 548      * Reports document type.
 549      *
 550      * @param name The name of the entity.
 551      * @param pubid The public identifier of the entity or <code>null</code>.
 552      * @param sysid The system identifier of the entity or <code>null</code>.
 553      */
 554     protected void docType(String name, String pubid, String sysid) throws SAXException {
 555         mHandDtd.notationDecl(name, pubid, sysid);
 556     }
 557 
 558     /**
 559      * Reports a comment.
 560      *
 561      * @param text The comment text starting from first charcater.
 562      * @param length The number of characters in comment.
 563      */
 564     protected void comm(char[] text, int length) {
 565     }
 566 
 567     /**
 568      * Reports a processing instruction.
 569      *
 570      * @param target The processing instruction target name.
 571      * @param body The processing instruction body text.
 572      */
 573     protected void pi(String target, String body) throws SAXException {
 574         mHandCont.processingInstruction(target, body);
 575     }
 576 
 577     /**
 578      * Reports new namespace prefix. The Namespace prefix (
 579      * <code>mPref.name</code>) being declared and the Namespace URI (
 580      * <code>mPref.value</code>) the prefix is mapped to. An empty string is
 581      * used for the default element namespace, which has no prefix.
 582      */
 583     protected void newPrefix() throws SAXException {
 584         mHandCont.startPrefixMapping(mPref.name, mPref.value);
 585     }
 586 
 587     /**
 588      * Reports skipped entity name.
 589      *
 590      * @param name The entity name.
 591      */
 592     protected void skippedEnt(String name) throws SAXException {
 593         mHandCont.skippedEntity(name);
 594     }
 595 
 596     /**
 597      * Returns an
 598      * <code>InputSource</code> for specified entity or
 599      * <code>null</code>.
 600      *
 601      * @param name The name of the entity.
 602      * @param pubid The public identifier of the entity.
 603      * @param sysid The system identifier of the entity.
 604      */
 605     protected InputSource resolveEnt(String name, String pubid, String sysid)
 606         throws SAXException, IOException 
 607     {
 608         return mHandEnt.resolveEntity(pubid, sysid);
 609     }
 610 
 611     /**
 612      * Reports notation declaration.
 613      *
 614      * @param name The notation's name.
 615      * @param pubid The notation's public identifier, or null if none was given.
 616      * @param sysid The notation's system identifier, or null if none was given.
 617      */
 618     protected void notDecl(String name, String pubid, String sysid)
 619         throws SAXException 
 620     {
 621         mHandDtd.notationDecl(name, pubid, sysid);
 622     }
 623 
 624     /**
 625      * Reports unparsed entity name.
 626      *
 627      * @param name The unparsed entity's name.
 628      * @param pubid The entity's public identifier, or null if none was given.
 629      * @param sysid The entity's system identifier.
 630      * @param notation The name of the associated notation.
 631      */
 632     protected void unparsedEntDecl(String name, String pubid, String sysid, String notation)
 633         throws SAXException 
 634     {
 635         mHandDtd.unparsedEntityDecl(name, pubid, sysid, notation);
 636     }
 637 
 638     /**
 639      * Notifies the handler about fatal parsing error.
 640      *
 641      * @param msg The problem description message.
 642      */
 643     protected void panic(String msg) throws SAXException {
 644         SAXParseException spe = new SAXParseException(msg, this);
 645         mHandErr.fatalError(spe);
 646         throw spe;  // [#1.2] fatal error definition
 647     }
 648 
 649     /**
 650      * Reports characters and empties the parser's buffer. This method is called
 651      * only if parser is going to return control to the main loop. This means
 652      * that this method may use parser buffer to report white space without
 653      * copeing characters to temporary buffer.
 654      */
 655     protected void bflash() throws SAXException {
 656         if (mBuffIdx >= 0) {
 657             //          Textual data has been read
 658             mHandCont.characters(mBuff, 0, (mBuffIdx + 1));
 659             mBuffIdx = -1;
 660         }
 661     }
 662 
 663     /**
 664      * Reports white space characters and empties the parser's buffer. This
 665      * method is called only if parser is going to return control to the main
 666      * loop. This means that this method may use parser buffer to report white
 667      * space without copeing characters to temporary buffer.
 668      */
 669     protected void bflash_ws() throws SAXException {
 670         if (mBuffIdx >= 0) {
 671             // BUG: With additional info from DTD and xml:space attr [#2.10]
 672             // the following call can be supported:
 673             // mHandCont.ignorableWhitespace(mBuff, 0, (mBuffIdx + 1));
 674 
 675             //          Textual data has been read
 676             mHandCont.characters(mBuff, 0, (mBuffIdx + 1));
 677             mBuffIdx = -1;
 678         }
 679     }
 680 
 681     public boolean getFeature(String name) {
 682         throw new UnsupportedOperationException("Not supported yet.");
 683     }
 684 
 685     public void setFeature(String name, boolean value) {
 686         throw new UnsupportedOperationException("Not supported yet.");
 687     }
 688 
 689     public Object getProperty(String name) {
 690         throw new UnsupportedOperationException("Not supported yet.");
 691     }
 692 
 693     public void setProperty(String name, Object value) {
 694         throw new UnsupportedOperationException("Not supported yet.");
 695     }
 696 }