1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 23 // Sep 14, 2000: 24 // Fixed serializer to report IO exception directly, instead at 25 // the end of document processing. 26 // Reported by Patrick Higgins <phiggins@transzap.com> 27 28 29 package com.sun.org.apache.xml.internal.serialize; 30 31 32 import java.io.IOException; 33 34 import org.w3c.dom.Element; 35 import org.w3c.dom.Node; 36 import org.xml.sax.AttributeList; 37 import org.xml.sax.Attributes; 38 import org.xml.sax.SAXException; 39 40 41 /** 42 * Implements a text serializer supporting both DOM and SAX 43 * serializing. For usage instructions see {@link Serializer}. 44 * <p> 45 * If an output stream is used, the encoding is taken from the 46 * output format (defaults to <tt>UTF-8</tt>). If a writer is 47 * used, make sure the writer uses the same encoding (if applies) 48 * as specified in the output format. 49 * <p> 50 * The serializer supports both DOM and SAX. DOM serializing is done 51 * by calling {@link #serialize} and SAX serializing is done by firing 52 * SAX events and using the serializer as a document handler. 53 * <p> 54 * If an I/O exception occurs while serializing, the serializer 55 * will not throw an exception directly, but only throw it 56 * at the end of serializing (either DOM or SAX's {@link 57 * org.xml.sax.DocumentHandler#endDocument}. 58 * 59 * 60 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 61 * @see Serializer 62 * 63 * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation 64 * is replaced by that of Xalan. Main class 65 * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced 66 * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}. 67 */ 68 public class TextSerializer 69 extends BaseMarkupSerializer 70 { 71 72 73 /** 74 * Constructs a new serializer. The serializer cannot be used without 75 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 76 * first. 77 */ 78 public TextSerializer() 79 { 80 super( new OutputFormat( Method.TEXT, null, false ) ); 81 } 82 83 84 public void setOutputFormat( OutputFormat format ) 85 { 86 super.setOutputFormat( format != null ? format : new OutputFormat( Method.TEXT, null, false ) ); 87 } 88 89 90 //-----------------------------------------// 91 // SAX content handler serializing methods // 92 //-----------------------------------------// 93 94 95 public void startElement( String namespaceURI, String localName, 96 String rawName, Attributes attrs ) 97 throws SAXException 98 { 99 startElement( rawName == null ? localName : rawName, null ); 100 } 101 102 103 public void endElement( String namespaceURI, String localName, 104 String rawName ) 105 throws SAXException 106 { 107 endElement( rawName == null ? localName : rawName ); 108 } 109 110 111 //------------------------------------------// 112 // SAX document handler serializing methods // 113 //------------------------------000---------// 114 115 116 public void startElement( String tagName, AttributeList attrs ) 117 throws SAXException 118 { 119 boolean preserveSpace; 120 ElementState state; 121 122 try { 123 state = getElementState(); 124 if ( isDocumentState() ) { 125 // If this is the root element handle it differently. 126 // If the first root element in the document, serialize 127 // the document's DOCTYPE. Space preserving defaults 128 // to that of the output format. 129 if ( ! _started ) 130 startDocument( tagName ); 131 } 132 // For any other element, if first in parent, then 133 // use the parnet's space preserving. 134 preserveSpace = state.preserveSpace; 135 136 // Do not change the current element state yet. 137 // This only happens in endElement(). 138 139 // Ignore all other attributes of the element, only printing 140 // its contents. 141 142 // Now it's time to enter a new element state 143 // with the tag name and space preserving. 144 // We still do not change the curent element state. 145 state = enterElementState( null, null, tagName, preserveSpace ); 146 } catch ( IOException except ) { 147 throw new SAXException( except ); 148 } 149 } 150 151 152 public void endElement( String tagName ) 153 throws SAXException 154 { 155 try { 156 endElementIO( tagName ); 157 } catch ( IOException except ) { 158 throw new SAXException( except ); 159 } 160 } 161 162 163 public void endElementIO( String tagName ) 164 throws IOException 165 { 166 ElementState state; 167 168 // Works much like content() with additions for closing 169 // an element. Note the different checks for the closed 170 // element's state and the parent element's state. 171 state = getElementState(); 172 // Leave the element state and update that of the parent 173 // (if we're not root) to not empty and after element. 174 state = leaveElementState(); 175 state.afterElement = true; 176 state.empty = false; 177 if ( isDocumentState() ) 178 _printer.flush(); 179 } 180 181 182 public void processingInstructionIO( String target, String code ) throws IOException 183 { 184 } 185 186 187 public void comment( String text ) 188 { 189 } 190 191 192 public void comment( char[] chars, int start, int length ) 193 { 194 } 195 196 197 public void characters( char[] chars, int start, int length ) 198 throws SAXException 199 { 200 ElementState state; 201 202 try { 203 state = content(); 204 state.doCData = state.inCData = false; 205 printText( chars, start, length, true, true ); 206 } catch ( IOException except ) { 207 throw new SAXException( except ); 208 } 209 } 210 211 212 protected void characters( String text, boolean unescaped ) 213 throws IOException 214 { 215 ElementState state; 216 217 state = content(); 218 state.doCData = state.inCData = false; 219 printText( text, true, true ); 220 } 221 222 223 //------------------------------------------// 224 // Generic node serializing methods methods // 225 //------------------------------------------// 226 227 228 /** 229 * Called to serialize the document's DOCTYPE by the root element. 230 * <p> 231 * This method will check if it has not been called before ({@link #_started}), 232 * will serialize the document type declaration, and will serialize all 233 * pre-root comments and PIs that were accumulated in the document 234 * (see {@link #serializePreRoot}). Pre-root will be serialized even if 235 * this is not the first root element of the document. 236 */ 237 protected void startDocument( String rootTagName ) 238 throws IOException 239 { 240 // Required to stop processing the DTD, even though the DTD 241 // is not printed. 242 _printer.leaveDTD(); 243 244 _started = true; 245 // Always serialize these, even if not te first root element. 246 serializePreRoot(); 247 } 248 249 250 /** 251 * Called to serialize a DOM element. Equivalent to calling {@link 252 * #startElement}, {@link #endElement} and serializing everything 253 * inbetween, but better optimized. 254 */ 255 protected void serializeElement( Element elem ) 256 throws IOException 257 { 258 Node child; 259 ElementState state; 260 boolean preserveSpace; 261 String tagName; 262 263 tagName = elem.getTagName(); 264 state = getElementState(); 265 if ( isDocumentState() ) { 266 // If this is the root element handle it differently. 267 // If the first root element in the document, serialize 268 // the document's DOCTYPE. Space preserving defaults 269 // to that of the output format. 270 if ( ! _started ) 271 startDocument( tagName ); 272 } 273 // For any other element, if first in parent, then 274 // use the parnet's space preserving. 275 preserveSpace = state.preserveSpace; 276 277 // Do not change the current element state yet. 278 // This only happens in endElement(). 279 280 // Ignore all other attributes of the element, only printing 281 // its contents. 282 283 // If element has children, then serialize them, otherwise 284 // serialize en empty tag. 285 if ( elem.hasChildNodes() ) { 286 // Enter an element state, and serialize the children 287 // one by one. Finally, end the element. 288 state = enterElementState( null, null, tagName, preserveSpace ); 289 child = elem.getFirstChild(); 290 while ( child != null ) { 291 serializeNode( child ); 292 child = child.getNextSibling(); 293 } 294 endElementIO( tagName ); 295 } else { 296 if ( ! isDocumentState() ) { 297 // After element but parent element is no longer empty. 298 state.afterElement = true; 299 state.empty = false; 300 } 301 } 302 } 303 304 305 /** 306 * Serialize the DOM node. This method is unique to the Text serializer. 307 * 308 * @param node The node to serialize 309 */ 310 protected void serializeNode( Node node ) 311 throws IOException 312 { 313 // Based on the node type call the suitable SAX handler. 314 // Only comments entities and documents which are not 315 // handled by SAX are serialized directly. 316 switch ( node.getNodeType() ) { 317 case Node.TEXT_NODE : { 318 String text; 319 320 text = node.getNodeValue(); 321 if ( text != null ) 322 characters( node.getNodeValue(), true ); 323 break; 324 } 325 326 case Node.CDATA_SECTION_NODE : { 327 String text; 328 329 text = node.getNodeValue(); 330 if ( text != null ) 331 characters( node.getNodeValue(), true ); 332 break; 333 } 334 335 case Node.COMMENT_NODE : 336 break; 337 338 case Node.ENTITY_REFERENCE_NODE : 339 // Ignore. 340 break; 341 342 case Node.PROCESSING_INSTRUCTION_NODE : 343 break; 344 345 case Node.ELEMENT_NODE : 346 serializeElement( (Element) node ); 347 break; 348 349 case Node.DOCUMENT_NODE : 350 // !!! Fall through 351 case Node.DOCUMENT_FRAGMENT_NODE : { 352 Node child; 353 354 // By definition this will happen if the node is a document, 355 // document fragment, etc. Just serialize its contents. It will 356 // work well for other nodes that we do not know how to serialize. 357 child = node.getFirstChild(); 358 while ( child != null ) { 359 serializeNode( child ); 360 child = child.getNextSibling(); 361 } 362 break; 363 } 364 365 default: 366 break; 367 } 368 } 369 370 371 protected ElementState content() 372 { 373 ElementState state; 374 375 state = getElementState(); 376 if ( ! isDocumentState() ) { 377 // If this is the first content in the element, 378 // change the state to not-empty. 379 if ( state.empty ) 380 state.empty = false; 381 // Except for one content type, all of them 382 // are not last element. That one content 383 // type will take care of itself. 384 state.afterElement = false; 385 } 386 return state; 387 } 388 389 390 protected String getEntityRef( int ch ) 391 { 392 return null; 393 } 394 395 396 }