1 /*
   2  * reserved comment block
   3  * DO NOT REMOVE OR ALTER!
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 
  23 // Sep 14, 2000:
  24 //  Fixed serializer to report IO exception directly, instead at
  25 //  the end of document processing.
  26 //  Reported by Patrick Higgins <phiggins@transzap.com>
  27 
  28 
  29 package com.sun.org.apache.xml.internal.serialize;
  30 
  31 
  32 import java.io.IOException;
  33 
  34 import org.w3c.dom.Element;
  35 import org.w3c.dom.Node;
  36 import org.xml.sax.AttributeList;
  37 import org.xml.sax.Attributes;
  38 import org.xml.sax.SAXException;
  39 
  40 
  41 /**
  42  * Implements a text serializer supporting both DOM and SAX
  43  * serializing. For usage instructions see {@link Serializer}.
  44  * <p>
  45  * If an output stream is used, the encoding is taken from the
  46  * output format (defaults to <tt>UTF-8</tt>). If a writer is
  47  * used, make sure the writer uses the same encoding (if applies)
  48  * as specified in the output format.
  49  * <p>
  50  * The serializer supports both DOM and SAX. DOM serializing is done
  51  * by calling {@link #serialize} and SAX serializing is done by firing
  52  * SAX events and using the serializer as a document handler.
  53  * <p>
  54  * If an I/O exception occurs while serializing, the serializer
  55  * will not throw an exception directly, but only throw it
  56  * at the end of serializing (either DOM or SAX's {@link
  57  * org.xml.sax.DocumentHandler#endDocument}.
  58  *
  59  *
  60  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  61  * @see Serializer
  62  *
  63  * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation
  64  * is replaced by that of Xalan. Main class
  65  * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced
  66  * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}.
  67  */
  68 public class TextSerializer
  69     extends BaseMarkupSerializer
  70 {
  71 
  72 
  73     /**
  74      * Constructs a new serializer. The serializer cannot be used without
  75      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
  76      * first.
  77      */
  78     public TextSerializer()
  79     {
  80         super( new OutputFormat( Method.TEXT, null, false ) );
  81     }
  82 
  83 
  84     public void setOutputFormat( OutputFormat format )
  85     {
  86         super.setOutputFormat( format != null ? format : new OutputFormat( Method.TEXT, null, false ) );
  87     }
  88 
  89 
  90     //-----------------------------------------//
  91     // SAX content handler serializing methods //
  92     //-----------------------------------------//
  93 
  94 
  95     public void startElement( String namespaceURI, String localName,
  96                               String rawName, Attributes attrs )
  97         throws SAXException
  98     {
  99         startElement( rawName == null ? localName : rawName, null );
 100     }
 101 
 102 
 103     public void endElement( String namespaceURI, String localName,
 104                             String rawName )
 105         throws SAXException
 106     {
 107         endElement( rawName == null ? localName : rawName );
 108     }
 109 
 110 
 111     //------------------------------------------//
 112     // SAX document handler serializing methods //
 113     //------------------------------000---------//
 114 
 115 
 116     public void startElement( String tagName, AttributeList attrs )
 117         throws SAXException
 118     {
 119         boolean      preserveSpace;
 120         ElementState state;
 121 
 122         try {
 123             state = getElementState();
 124             if ( isDocumentState() ) {
 125                 // If this is the root element handle it differently.
 126                 // If the first root element in the document, serialize
 127                 // the document's DOCTYPE. Space preserving defaults
 128                 // to that of the output format.
 129                 if ( ! _started )
 130                     startDocument( tagName );
 131             }
 132             // For any other element, if first in parent, then
 133             // use the parnet's space preserving.
 134             preserveSpace = state.preserveSpace;
 135 
 136             // Do not change the current element state yet.
 137             // This only happens in endElement().
 138 
 139             // Ignore all other attributes of the element, only printing
 140             // its contents.
 141 
 142             // Now it's time to enter a new element state
 143             // with the tag name and space preserving.
 144             // We still do not change the curent element state.
 145             state = enterElementState( null, null, tagName, preserveSpace );
 146         } catch ( IOException except ) {
 147             throw new SAXException( except );
 148         }
 149     }
 150 
 151 
 152     public void endElement( String tagName )
 153         throws SAXException
 154     {
 155         try {
 156             endElementIO( tagName );
 157         } catch ( IOException except ) {
 158             throw new SAXException( except );
 159         }
 160     }
 161 
 162 
 163     public void endElementIO( String tagName )
 164         throws IOException
 165     {
 166         ElementState state;
 167 
 168         // Works much like content() with additions for closing
 169         // an element. Note the different checks for the closed
 170         // element's state and the parent element's state.
 171         state = getElementState();
 172         // Leave the element state and update that of the parent
 173         // (if we're not root) to not empty and after element.
 174         state = leaveElementState();
 175         state.afterElement = true;
 176         state.empty = false;
 177         if ( isDocumentState() )
 178             _printer.flush();
 179     }
 180 
 181 
 182     public void processingInstructionIO( String target, String code ) throws IOException
 183     {
 184     }
 185 
 186 
 187     public void comment( String text )
 188     {
 189     }
 190 
 191 
 192     public void comment( char[] chars, int start, int length )
 193     {
 194     }
 195 
 196 
 197     public void characters( char[] chars, int start, int length )
 198         throws SAXException
 199     {
 200         ElementState state;
 201 
 202         try {
 203             state = content();
 204             state.doCData = state.inCData = false;
 205             printText( chars, start, length, true, true );
 206         } catch ( IOException except ) {
 207             throw new SAXException( except );
 208         }
 209     }
 210 
 211 
 212     protected void characters( String text, boolean unescaped )
 213         throws IOException
 214     {
 215         ElementState state;
 216 
 217         state = content();
 218         state.doCData = state.inCData = false;
 219         printText( text, true, true );
 220     }
 221 
 222 
 223     //------------------------------------------//
 224     // Generic node serializing methods methods //
 225     //------------------------------------------//
 226 
 227 
 228     /**
 229      * Called to serialize the document's DOCTYPE by the root element.
 230      * <p>
 231      * This method will check if it has not been called before ({@link #_started}),
 232      * will serialize the document type declaration, and will serialize all
 233      * pre-root comments and PIs that were accumulated in the document
 234      * (see {@link #serializePreRoot}). Pre-root will be serialized even if
 235      * this is not the first root element of the document.
 236      */
 237     protected void startDocument( String rootTagName )
 238         throws IOException
 239     {
 240         // Required to stop processing the DTD, even though the DTD
 241         // is not printed.
 242         _printer.leaveDTD();
 243 
 244         _started = true;
 245         // Always serialize these, even if not te first root element.
 246         serializePreRoot();
 247     }
 248 
 249 
 250     /**
 251      * Called to serialize a DOM element. Equivalent to calling {@link
 252      * #startElement}, {@link #endElement} and serializing everything
 253      * inbetween, but better optimized.
 254      */
 255     protected void serializeElement( Element elem )
 256         throws IOException
 257     {
 258         Node         child;
 259         ElementState state;
 260         boolean      preserveSpace;
 261         String       tagName;
 262 
 263         tagName = elem.getTagName();
 264         state = getElementState();
 265         if ( isDocumentState() ) {
 266             // If this is the root element handle it differently.
 267             // If the first root element in the document, serialize
 268             // the document's DOCTYPE. Space preserving defaults
 269             // to that of the output format.
 270             if ( ! _started )
 271                 startDocument( tagName );
 272         }
 273         // For any other element, if first in parent, then
 274         // use the parnet's space preserving.
 275         preserveSpace = state.preserveSpace;
 276 
 277         // Do not change the current element state yet.
 278         // This only happens in endElement().
 279 
 280         // Ignore all other attributes of the element, only printing
 281         // its contents.
 282 
 283         // If element has children, then serialize them, otherwise
 284         // serialize en empty tag.
 285         if ( elem.hasChildNodes() ) {
 286             // Enter an element state, and serialize the children
 287             // one by one. Finally, end the element.
 288             state = enterElementState( null, null, tagName, preserveSpace );
 289             child = elem.getFirstChild();
 290             while ( child != null ) {
 291                 serializeNode( child );
 292                 child = child.getNextSibling();
 293             }
 294             endElementIO( tagName );
 295         } else {
 296             if ( ! isDocumentState() ) {
 297                 // After element but parent element is no longer empty.
 298                 state.afterElement = true;
 299                 state.empty = false;
 300             }
 301         }
 302     }
 303 
 304 
 305     /**
 306      * Serialize the DOM node. This method is unique to the Text serializer.
 307      *
 308      * @param node The node to serialize
 309      */
 310     protected void serializeNode( Node node )
 311         throws IOException
 312     {
 313         // Based on the node type call the suitable SAX handler.
 314         // Only comments entities and documents which are not
 315         // handled by SAX are serialized directly.
 316         switch ( node.getNodeType() ) {
 317         case Node.TEXT_NODE : {
 318             String text;
 319 
 320             text = node.getNodeValue();
 321             if ( text != null )
 322                 characters( node.getNodeValue(), true );
 323             break;
 324         }
 325 
 326         case Node.CDATA_SECTION_NODE : {
 327             String text;
 328 
 329             text = node.getNodeValue();
 330             if ( text != null )
 331                 characters( node.getNodeValue(), true );
 332             break;
 333         }
 334 
 335         case Node.COMMENT_NODE :
 336             break;
 337 
 338         case Node.ENTITY_REFERENCE_NODE :
 339             // Ignore.
 340             break;
 341 
 342         case Node.PROCESSING_INSTRUCTION_NODE :
 343             break;
 344 
 345         case Node.ELEMENT_NODE :
 346             serializeElement( (Element) node );
 347             break;
 348 
 349         case Node.DOCUMENT_NODE :
 350             // !!! Fall through
 351         case Node.DOCUMENT_FRAGMENT_NODE : {
 352             Node         child;
 353 
 354             // By definition this will happen if the node is a document,
 355             // document fragment, etc. Just serialize its contents. It will
 356             // work well for other nodes that we do not know how to serialize.
 357             child = node.getFirstChild();
 358             while ( child != null ) {
 359                 serializeNode( child );
 360                 child = child.getNextSibling();
 361             }
 362             break;
 363         }
 364 
 365         default:
 366             break;
 367         }
 368     }
 369 
 370 
 371     protected ElementState content()
 372     {
 373         ElementState state;
 374 
 375         state = getElementState();
 376         if ( ! isDocumentState() ) {
 377             // If this is the first content in the element,
 378             // change the state to not-empty.
 379             if ( state.empty )
 380                 state.empty = false;
 381             // Except for one content type, all of them
 382             // are not last element. That one content
 383             // type will take care of itself.
 384             state.afterElement = false;
 385         }
 386         return state;
 387     }
 388 
 389 
 390     protected String getEntityRef( int ch )
 391     {
 392         return null;
 393     }
 394 
 395 
 396 }