1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.bind.v2.runtime.unmarshaller;
  27 
  28 import javax.xml.namespace.NamespaceContext;
  29 
  30 import org.xml.sax.SAXException;
  31 
  32 /**
  33  * Walks the XML document structure.
  34  *
  35  * Implemented by the unmarshaller and called by the API-specific connectors.
  36  *
  37  * <h2>Event Call Sequence</h2>
  38  *
  39  * The {@link XmlVisitor} expects the event callbacks in the following order:
  40  * <pre>
  41  * CALL SEQUENCE := startDocument ELEMENT endDocument
  42  * ELEMENT       := startPrefixMapping ELEMENT endPrefixMapping
  43  *               |  startElement BODY endElement
  44  * BODY          := text? (ELEMENT text?)*
  45  * </pre>
  46  * Note in particular that text events may not be called in a row;
  47  * consecutive characters (even those separated by PIs and comments)
  48  * must be reported as one event, unlike SAX.
  49  *
  50  * <p>
  51  * All namespace URIs, local names, and prefixes of element and attribute
  52  * names must be interned. qnames need not be interned.
  53  *
  54  *
  55  * <h2>Typed PCDATA</h2>
  56  * For efficiency, JAXB RI defines a few {@link CharSequence} implementations
  57  * that can be used as a parameter to the {@link #text(CharSequence)} method.
  58  * For example, see {@link Base64Data}.
  59  *
  60  * <h2>Error Handling</h2>
  61  * The visitor may throw {@link SAXException} to abort the unmarshalling process
  62  * in the middle.
  63  *
  64  * @author Kohsuke Kawaguchi
  65  */
  66 public interface XmlVisitor {
  67     /**
  68      * Notifies a start of the document.
  69      *
  70      * @param locator
  71      *      This live object returns the location information as the parsing progresses.
  72      *      must not be null.
  73      * @param nsContext
  74      *      Some broken XML APIs can't iterate all the in-scope namespace bindings,
  75      *      which makes it impossible to emulate {@link #startPrefixMapping(String, String)} correctly
  76      *      when unmarshalling a subtree. Connectors that use such an API can
  77      *      pass in additional {@link NamespaceContext} object that knows about the
  78      *      in-scope namespace bindings. Otherwise (and normally) it is null.
  79      *
  80      *      <p>
  81      *      Ideally this object should be immutable and only represent the namespace URI bindings
  82      *      in the context (those done above the element that JAXB started unmarshalling),
  83      *      but it can also work even if it changes as the parsing progress (to include
  84      *      namespaces declared on the current element being parsed.)
  85      */
  86     void startDocument(LocatorEx locator, NamespaceContext nsContext) throws SAXException;
  87     void endDocument() throws SAXException;
  88 
  89     /**
  90      * Notifies a start tag of a new element.
  91      *
  92      * namespace URIs and local names must be interned.
  93      */
  94     void startElement(TagName tagName) throws SAXException;
  95     void endElement(TagName tagName) throws SAXException;
  96 
  97     /**
  98      * Called before {@link #startElement} event to notify a new namespace binding.
  99      */
 100     void startPrefixMapping( String prefix, String nsUri ) throws SAXException;
 101     /**
 102      * Called after {@link #endElement} event to notify the end of a binding.
 103      */
 104     void endPrefixMapping( String prefix ) throws SAXException;
 105 
 106     /**
 107      * Text events.
 108      *
 109      * <p>
 110      * The caller should consult {@link TextPredictor} to see
 111      * if the unmarshaller is expecting any PCDATA. If the above is returning
 112      * false, the caller is OK to skip any text in XML. The net effect is
 113      * that we can ignore whitespaces quickly.
 114      *
 115      * @param pcdata
 116      *      represents character data. This object can be mutable
 117      *      (such as {@link StringBuilder}); it only needs to be fixed
 118      *      while this method is executing.
 119      */
 120     void text( CharSequence pcdata ) throws SAXException;
 121 
 122     /**
 123      * Returns the {@link UnmarshallingContext} at the end of the chain.
 124      *
 125      * @return
 126      *      always return the same object, so caching the result is recommended.
 127      */
 128     UnmarshallingContext getContext();
 129 
 130     /**
 131      * Gets the predictor that can be used for the caller to avoid
 132      * calling {@link #text(CharSequence)} unnecessarily.
 133      */
 134     TextPredictor getPredictor();
 135 
 136     interface TextPredictor {
 137         /**
 138          * Returns true if the visitor is expecting a text event as the next event.
 139          *
 140          * <p>
 141          * This is primarily intended to be used for optimization to avoid buffering
 142          * characters unnecessarily. If this method returns false and the connector
 143          * sees whitespace it can safely skip it.
 144          *
 145          * <p>
 146          * If this method returns true, all the whitespaces are considered significant
 147          * and thus need to be reported as a {@link XmlVisitor#text} event. Furthermore,
 148          * if the element has no children (like &lt;foo/>), then it has to be reported
 149          * an empty {@link XmlVisitor#text} event.
 150          */
 151         boolean expectText();
 152     }
 153 }