1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 // Sep 14, 2000: 23 // Fixed problem with namespace handling. Contributed by 24 // David Blondeau <blondeau@intalio.com> 25 // Sep 14, 2000: 26 // Fixed serializer to report IO exception directly, instead at 27 // the end of document processing. 28 // Reported by Patrick Higgins <phiggins@transzap.com> 29 // Aug 21, 2000: 30 // Fixed bug in startDocument not calling prepare. 31 // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se> 32 // Aug 21, 2000: 33 // Added ability to omit DOCTYPE declaration. 34 35 package com.sun.org.apache.xml.internal.serialize; 36 37 import java.io.IOException; 38 import java.io.OutputStream; 39 import java.io.Writer; 40 41 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter; 42 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 43 import com.sun.org.apache.xerces.internal.util.SymbolTable; 44 import com.sun.org.apache.xerces.internal.util.XML11Char; 45 import com.sun.org.apache.xerces.internal.util.XMLChar; 46 import org.w3c.dom.DOMError; 47 import org.w3c.dom.Document; 48 import org.xml.sax.SAXException; 49 50 /** 51 * Implements an XML serializer supporting both DOM and SAX pretty 52 * serializing. For usage instructions see {@link Serializer}. 53 * <p> 54 * If an output stream is used, the encoding is taken from the 55 * output format (defaults to <tt>UTF-8</tt>). If a writer is 56 * used, make sure the writer uses the same encoding (if applies) 57 * as specified in the output format. 58 * <p> 59 * The serializer supports both DOM and SAX. SAX serializing is done by firing 60 * SAX events and using the serializer as a document handler. DOM serializing is done 61 * by calling {@link #serialize(Document)} or by using DOM Level 3 62 * {@link org.w3c.dom.ls.LSSerializer} and 63 * serializing with {@link org.w3c.dom.ls.LSSerializer#write}, 64 * {@link org.w3c.dom.ls.LSSerializer#writeToString}. 65 * <p> 66 * If an I/O exception occurs while serializing, the serializer 67 * will not throw an exception directly, but only throw it 68 * at the end of serializing (either DOM or SAX's {@link 69 * org.xml.sax.DocumentHandler#endDocument}. 70 * <p> 71 * For elements that are not specified as whitespace preserving, 72 * the serializer will potentially break long text lines at space 73 * boundaries, indent lines, and serialize elements on separate 74 * lines. Line terminators will be regarded as spaces, and 75 * spaces at beginning of line will be stripped. 76 * 77 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 78 * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a> 79 * @author Elena Litani IBM 80 * @see Serializer 81 * 82 * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation 83 * is replaced by that of Xalan. Main class 84 * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced 85 * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}. 86 */ 87 public class XML11Serializer 88 extends XMLSerializer { 89 90 // 91 // constants 92 // 93 94 protected static final boolean DEBUG = false; 95 96 // 97 // data 98 // 99 100 // 101 // DOM Level 3 implementation: variables intialized in DOMSerializerImpl 102 // 103 104 /** stores namespaces in scope */ 105 protected NamespaceSupport fNSBinder; 106 107 /** stores all namespace bindings on the current element */ 108 protected NamespaceSupport fLocalNSBinder; 109 110 /** symbol table for serialization */ 111 protected SymbolTable fSymbolTable; 112 113 // is node dom level 1 node? 114 protected boolean fDOML1 = false; 115 // counter for new prefix names 116 protected int fNamespaceCounter = 1; 117 protected final static String PREFIX = "NS"; 118 119 /** 120 * Controls whether namespace fixup should be performed during 121 * the serialization. 122 * NOTE: if this field is set to true the following 123 * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable, 124 * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter. 125 */ 126 protected boolean fNamespaces = false; 127 128 /** 129 * Constructs a new serializer. The serializer cannot be used without 130 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 131 * first. 132 */ 133 public XML11Serializer() { 134 super( ); 135 _format.setVersion("1.1"); 136 } 137 138 139 /** 140 * Constructs a new serializer. The serializer cannot be used without 141 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 142 * first. 143 */ 144 public XML11Serializer( OutputFormat format ) { 145 super( format ); 146 _format.setVersion("1.1"); 147 } 148 149 150 /** 151 * Constructs a new serializer that writes to the specified writer 152 * using the specified output format. If <tt>format</tt> is null, 153 * will use a default output format. 154 * 155 * @param writer The writer to use 156 * @param format The output format to use, null for the default 157 */ 158 public XML11Serializer( Writer writer, OutputFormat format ) { 159 super( writer, format ); 160 _format.setVersion("1.1"); 161 } 162 163 164 /** 165 * Constructs a new serializer that writes to the specified output 166 * stream using the specified output format. If <tt>format</tt> 167 * is null, will use a default output format. 168 * 169 * @param output The output stream to use 170 * @param format The output format to use, null for the default 171 */ 172 public XML11Serializer( OutputStream output, OutputFormat format ) { 173 super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) ); 174 _format.setVersion("1.1"); 175 } 176 177 //-----------------------------------------// 178 // SAX content handler serializing methods // 179 //-----------------------------------------// 180 181 182 public void characters( char[] chars, int start, int length ) 183 throws SAXException 184 { 185 ElementState state; 186 187 try { 188 state = content(); 189 190 // Check if text should be print as CDATA section or unescaped 191 // based on elements listed in the output format (the element 192 // state) or whether we are inside a CDATA section or entity. 193 194 if ( state.inCData || state.doCData ) { 195 int saveIndent; 196 197 // Print a CDATA section. The text is not escaped, but ']]>' 198 // appearing in the code must be identified and dealt with. 199 // The contents of a text node is considered space preserving. 200 if ( ! state.inCData ) { 201 _printer.printText( "<![CDATA[" ); 202 state.inCData = true; 203 } 204 saveIndent = _printer.getNextIndent(); 205 _printer.setNextIndent( 0 ); 206 char ch; 207 final int end = start + length; 208 for ( int index = start; index < end; ++index ) { 209 ch = chars[index]; 210 if ( ch == ']' && index + 2 < end && 211 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) { 212 _printer.printText("]]]]><![CDATA[>"); 213 index +=2; 214 continue; 215 } 216 if (!XML11Char.isXML11Valid(ch)) { 217 // check if it is surrogate 218 if (++index < end) { 219 surrogates(ch, chars[index], true); 220 } 221 else { 222 fatalError("The character '"+ch+"' is an invalid XML character"); 223 } 224 continue; 225 } 226 if ( _encodingInfo.isPrintable(ch) && XML11Char.isXML11ValidLiteral(ch)) { 227 _printer.printText(ch); 228 } 229 else { 230 // The character is not printable -- split CDATA section 231 _printer.printText("]]>&#x"); 232 _printer.printText(Integer.toHexString(ch)); 233 _printer.printText(";<![CDATA["); 234 } 235 } 236 _printer.setNextIndent( saveIndent ); 237 238 } 239 else { 240 241 int saveIndent; 242 243 if ( state.preserveSpace ) { 244 // If preserving space then hold of indentation so no 245 // excessive spaces are printed at line breaks, escape 246 // the text content without replacing spaces and print 247 // the text breaking only at line breaks. 248 saveIndent = _printer.getNextIndent(); 249 _printer.setNextIndent( 0 ); 250 printText( chars, start, length, true, state.unescaped ); 251 _printer.setNextIndent( saveIndent ); 252 } 253 else { 254 printText( chars, start, length, false, state.unescaped ); 255 } 256 } 257 } 258 catch ( IOException except ) { 259 throw new SAXException( except ); 260 } 261 } 262 263 // 264 // overwrite printing functions to make sure serializer prints out valid XML 265 // 266 protected void printEscaped( String source ) throws IOException { 267 int length = source.length(); 268 for ( int i = 0 ; i < length ; ++i ) { 269 int ch = source.charAt(i); 270 if (!XML11Char.isXML11Valid(ch)) { 271 if (++i <length) { 272 surrogates(ch, source.charAt(i), false); 273 } 274 else { 275 fatalError("The character '"+(char)ch+"' is an invalid XML character"); 276 } 277 continue; 278 } 279 if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028) { 280 printHex(ch); 281 } 282 else if (ch == '<') { 283 _printer.printText("<"); 284 } 285 else if (ch == '&') { 286 _printer.printText("&"); 287 } 288 else if (ch == '"') { 289 _printer.printText("""); 290 } 291 else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) { 292 _printer.printText((char) ch); 293 } 294 else { 295 printHex(ch); 296 } 297 } 298 } 299 300 protected final void printCDATAText(String text) throws IOException { 301 int length = text.length(); 302 char ch; 303 304 for (int index = 0; index < length; ++index) { 305 ch = text.charAt(index); 306 307 if (ch == ']' 308 && index + 2 < length 309 && text.charAt(index + 1) == ']' 310 && text.charAt(index + 2) == '>') { // check for ']]>' 311 if (fDOMErrorHandler != null){ 312 // REVISIT: this means that if DOM Error handler is not registered we don't report any 313 // fatal errors and might serialize not wellformed document 314 if ((features & DOMSerializerImpl.SPLITCDATA) == 0 315 && (features & DOMSerializerImpl.WELLFORMED) == 0) { 316 // issue fatal error 317 String msg = 318 DOMMessageFormatter.formatMessage( 319 DOMMessageFormatter.SERIALIZER_DOMAIN, 320 "EndingCDATA", 321 null); 322 modifyDOMError( 323 msg, 324 DOMError.SEVERITY_FATAL_ERROR, 325 null, fCurrentNode); 326 boolean continueProcess = 327 fDOMErrorHandler.handleError(fDOMError); 328 if (!continueProcess) { 329 throw new IOException(); 330 } 331 } else { 332 // issue warning 333 String msg = 334 DOMMessageFormatter.formatMessage( 335 DOMMessageFormatter.SERIALIZER_DOMAIN, 336 "SplittingCDATA", 337 null); 338 modifyDOMError( 339 msg, 340 DOMError.SEVERITY_WARNING, 341 null, fCurrentNode); 342 fDOMErrorHandler.handleError(fDOMError); 343 } 344 } 345 // split CDATA section 346 _printer.printText("]]]]><![CDATA[>"); 347 index += 2; 348 continue; 349 } 350 351 if (!XML11Char.isXML11Valid(ch)) { 352 // check if it is surrogate 353 if (++index < length) { 354 surrogates(ch, text.charAt(index), true); 355 } 356 else { 357 fatalError("The character '" + ch + "' is an invalid XML character"); 358 } 359 continue; 360 } 361 if (_encodingInfo.isPrintable(ch) 362 && XML11Char.isXML11ValidLiteral(ch)) { 363 _printer.printText(ch); 364 } 365 else { 366 // The character is not printable -- split CDATA section 367 _printer.printText("]]>&#x"); 368 _printer.printText(Integer.toHexString(ch)); 369 _printer.printText(";<![CDATA["); 370 } 371 } 372 } 373 374 // note that this "int" should, in all cases, be a char. 375 // REVISIT: make it a char... 376 protected final void printXMLChar( int ch ) throws IOException { 377 378 if (ch == '\r' || ch == 0x0085 || ch == 0x2028) { 379 printHex(ch); 380 } 381 else if ( ch == '<') { 382 _printer.printText("<"); 383 } 384 else if (ch == '&') { 385 _printer.printText("&"); 386 } 387 else if (ch == '>'){ 388 // character sequence "]]>" can't appear in content, therefore 389 // we should escape '>' 390 _printer.printText(">"); 391 } 392 else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) { 393 _printer.printText((char)ch); 394 } 395 else { 396 printHex(ch); 397 } 398 } 399 400 401 402 protected final void surrogates(int high, int low, boolean inContent) throws IOException{ 403 if (XMLChar.isHighSurrogate(high)) { 404 if (!XMLChar.isLowSurrogate(low)) { 405 //Invalid XML 406 fatalError("The character '"+(char)low+"' is an invalid XML character"); 407 } 408 else { 409 int supplemental = XMLChar.supplemental((char)high, (char)low); 410 if (!XML11Char.isXML11Valid(supplemental)) { 411 //Invalid XML 412 fatalError("The character '"+(char)supplemental+"' is an invalid XML character"); 413 } 414 else { 415 if (inContent && content().inCData) { 416 _printer.printText("]]>&#x"); 417 _printer.printText(Integer.toHexString(supplemental)); 418 _printer.printText(";<![CDATA["); 419 } 420 else { 421 printHex(supplemental); 422 } 423 } 424 } 425 } 426 else { 427 fatalError("The character '"+(char)high+"' is an invalid XML character"); 428 } 429 430 } 431 432 433 protected void printText( String text, boolean preserveSpace, boolean unescaped ) 434 throws IOException { 435 int index; 436 char ch; 437 int length = text.length(); 438 if ( preserveSpace ) { 439 // Preserving spaces: the text must print exactly as it is, 440 // without breaking when spaces appear in the text and without 441 // consolidating spaces. If a line terminator is used, a line 442 // break will occur. 443 for ( index = 0 ; index < length ; ++index ) { 444 ch = text.charAt( index ); 445 if (!XML11Char.isXML11Valid(ch)) { 446 // check if it is surrogate 447 if (++index <length) { 448 surrogates(ch, text.charAt(index), true); 449 } else { 450 fatalError("The character '"+ch+"' is an invalid XML character"); 451 } 452 continue; 453 } 454 if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) { 455 _printer.printText( ch ); 456 } 457 else { 458 printXMLChar( ch ); 459 } 460 } 461 } 462 else { 463 // Not preserving spaces: print one part at a time, and 464 // use spaces between parts to break them into different 465 // lines. Spaces at beginning of line will be stripped 466 // by printing mechanism. Line terminator is treated 467 // no different than other text part. 468 for ( index = 0 ; index < length ; ++index ) { 469 ch = text.charAt( index ); 470 if (!XML11Char.isXML11Valid(ch)) { 471 // check if it is surrogate 472 if (++index <length) { 473 surrogates(ch, text.charAt(index), true); 474 } else { 475 fatalError("The character '"+ch+"' is an invalid XML character"); 476 } 477 continue; 478 } 479 480 if ( unescaped && XML11Char.isXML11ValidLiteral(ch) ) { 481 _printer.printText( ch ); 482 } 483 else { 484 printXMLChar( ch ); 485 } 486 } 487 } 488 } 489 490 protected void printText( char[] chars, int start, int length, 491 boolean preserveSpace, boolean unescaped ) throws IOException { 492 493 if ( preserveSpace ) { 494 // Preserving spaces: the text must print exactly as it is, 495 // without breaking when spaces appear in the text and without 496 // consolidating spaces. If a line terminator is used, a line 497 // break will occur. 498 while ( length-- > 0 ) { 499 char ch = chars[start++]; 500 if (!XML11Char.isXML11Valid(ch)) { 501 // check if it is surrogate 502 if ( length-- > 0) { 503 surrogates(ch, chars[start++], true); 504 } else { 505 fatalError("The character '"+ch+"' is an invalid XML character"); 506 } 507 continue; 508 } 509 if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) { 510 _printer.printText( ch ); 511 } 512 else { 513 printXMLChar( ch ); 514 } 515 } 516 } 517 else { 518 // Not preserving spaces: print one part at a time, and 519 // use spaces between parts to break them into different 520 // lines. Spaces at beginning of line will be stripped 521 // by printing mechanism. Line terminator is treated 522 // no different than other text part. 523 while ( length-- > 0 ) { 524 char ch = chars[start++]; 525 if (!XML11Char.isXML11Valid(ch)) { 526 // check if it is surrogate 527 if ( length-- > 0) { 528 surrogates(ch, chars[start++], true); 529 } else { 530 fatalError("The character '"+ch+"' is an invalid XML character"); 531 } 532 continue; 533 } 534 535 if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) { 536 _printer.printText( ch ); 537 } 538 else { 539 printXMLChar( ch ); 540 } 541 } 542 } 543 } 544 545 public boolean reset() { 546 super.reset(); 547 return true; 548 } 549 550 }