New jaxp/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serialize/XML11Serializer.java

   1 /*
   2  * reserved comment block
   3  * DO NOT REMOVE OR ALTER!
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 // Sep 14, 2000:
  23 //  Fixed problem with namespace handling. Contributed by
  24 //  David Blondeau <blondeau@intalio.com>
  25 // Sep 14, 2000:
  26 //  Fixed serializer to report IO exception directly, instead at
  27 //  the end of document processing.
  28 //  Reported by Patrick Higgins <phiggins@transzap.com>
  29 // Aug 21, 2000:
  30 //  Fixed bug in startDocument not calling prepare.
  31 //  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
  32 // Aug 21, 2000:
  33 //  Added ability to omit DOCTYPE declaration.
  34 
  35 package com.sun.org.apache.xml.internal.serialize;
  36 
  37 import java.io.IOException;
  38 import java.io.OutputStream;
  39 import java.io.Writer;
  40 
  41 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
  42 import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
  43 import com.sun.org.apache.xerces.internal.util.SymbolTable;
  44 import com.sun.org.apache.xerces.internal.util.XML11Char;
  45 import com.sun.org.apache.xerces.internal.util.XMLChar;
  46 import org.w3c.dom.DOMError;
  47 import org.w3c.dom.Document;
  48 import org.xml.sax.SAXException;
  49 
  50 /**
  51  * Implements an XML serializer supporting both DOM and SAX pretty
  52  * serializing. For usage instructions see {@link Serializer}.
  53  * <p>
  54  * If an output stream is used, the encoding is taken from the
  55  * output format (defaults to <tt>UTF-8</tt>). If a writer is
  56  * used, make sure the writer uses the same encoding (if applies)
  57  * as specified in the output format.
  58  * <p>
  59  * The serializer supports both DOM and SAX. SAX serializing is done by firing
  60  * SAX events and using the serializer as a document handler. DOM serializing is done
  61  * by calling {@link #serialize(Document)} or by using DOM Level 3
  62  * {@link org.w3c.dom.ls.LSSerializer} and
  63  * serializing with {@link org.w3c.dom.ls.LSSerializer#write},
  64  * {@link org.w3c.dom.ls.LSSerializer#writeToString}.
  65  * <p>
  66  * If an I/O exception occurs while serializing, the serializer
  67  * will not throw an exception directly, but only throw it
  68  * at the end of serializing (either DOM or SAX's {@link
  69  * org.xml.sax.DocumentHandler#endDocument}.
  70  * <p>
  71  * For elements that are not specified as whitespace preserving,
  72  * the serializer will potentially break long text lines at space
  73  * boundaries, indent lines, and serialize elements on separate
  74  * lines. Line terminators will be regarded as spaces, and
  75  * spaces at beginning of line will be stripped.
  76  *
  77  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  78  * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
  79  * @author Elena Litani IBM
  80  * @see Serializer
  81  *
  82  * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation
  83  * is replaced by that of Xalan. Main class
  84  * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced
  85  * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}.
  86  */
  87 public class XML11Serializer
  88 extends XMLSerializer {
  89 
  90     //
  91     // constants
  92     //
  93 
  94     protected static final boolean DEBUG = false;
  95 
  96     //
  97     // data
  98     //
  99 
 100     //
 101     // DOM Level 3 implementation: variables intialized in DOMSerializerImpl
 102     //
 103 
 104     /** stores namespaces in scope */
 105     protected NamespaceSupport fNSBinder;
 106 
 107     /** stores all namespace bindings on the current element */
 108     protected NamespaceSupport fLocalNSBinder;
 109 
 110     /** symbol table for serialization */
 111     protected SymbolTable fSymbolTable;
 112 
 113     // is node dom level 1 node?
 114     protected boolean fDOML1 = false;
 115     // counter for new prefix names
 116     protected int fNamespaceCounter = 1;
 117     protected final static String PREFIX = "NS";
 118 
 119     /**
 120      * Controls whether namespace fixup should be performed during
 121      * the serialization.
 122      * NOTE: if this field is set to true the following
 123      * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
 124      * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
 125      */
 126     protected boolean fNamespaces = false;
 127 
 128     /**
 129      * Constructs a new serializer. The serializer cannot be used without
 130      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
 131      * first.
 132      */
 133     public XML11Serializer() {
 134         super( );
 135         _format.setVersion("1.1");
 136     }
 137 
 138 
 139     /**
 140      * Constructs a new serializer. The serializer cannot be used without
 141      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
 142      * first.
 143      */
 144     public XML11Serializer( OutputFormat format ) {
 145         super( format );
 146         _format.setVersion("1.1");
 147     }
 148 
 149 
 150     /**
 151      * Constructs a new serializer that writes to the specified writer
 152      * using the specified output format. If <tt>format</tt> is null,
 153      * will use a default output format.
 154      *
 155      * @param writer The writer to use
 156      * @param format The output format to use, null for the default
 157      */
 158     public XML11Serializer( Writer writer, OutputFormat format ) {
 159         super( writer, format );
 160         _format.setVersion("1.1");
 161     }
 162 
 163 
 164     /**
 165      * Constructs a new serializer that writes to the specified output
 166      * stream using the specified output format. If <tt>format</tt>
 167      * is null, will use a default output format.
 168      *
 169      * @param output The output stream to use
 170      * @param format The output format to use, null for the default
 171      */
 172     public XML11Serializer( OutputStream output, OutputFormat format ) {
 173         super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
 174         _format.setVersion("1.1");
 175     }
 176 
 177     //-----------------------------------------//
 178     // SAX content handler serializing methods //
 179     //-----------------------------------------//
 180 
 181 
 182     public void characters( char[] chars, int start, int length )
 183         throws SAXException
 184     {
 185         ElementState state;
 186 
 187         try {
 188             state = content();
 189 
 190             // Check if text should be print as CDATA section or unescaped
 191             // based on elements listed in the output format (the element
 192             // state) or whether we are inside a CDATA section or entity.
 193 
 194             if ( state.inCData || state.doCData ) {
 195                 int          saveIndent;
 196 
 197                 // Print a CDATA section. The text is not escaped, but ']]>'
 198                 // appearing in the code must be identified and dealt with.
 199                 // The contents of a text node is considered space preserving.
 200                 if ( ! state.inCData ) {
 201                     _printer.printText( "<![CDATA[" );
 202                     state.inCData = true;
 203                 }
 204                 saveIndent = _printer.getNextIndent();
 205                 _printer.setNextIndent( 0 );
 206                 char ch;
 207                 final int end = start + length;
 208                 for ( int index = start; index < end; ++index ) {
 209                     ch = chars[index];
 210                     if ( ch == ']' && index + 2 < end &&
 211                         chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
 212                         _printer.printText("]]]]><![CDATA[>");
 213                         index +=2;
 214                         continue;
 215                     }
 216                     if (!XML11Char.isXML11Valid(ch)) {
 217                         // check if it is surrogate
 218                         if (++index < end) {
 219                             surrogates(ch, chars[index], true);
 220                         }
 221                         else {
 222                             fatalError("The character '"+ch+"' is an invalid XML character");
 223                         }
 224                         continue;
 225                     }
 226                     if ( _encodingInfo.isPrintable(ch) && XML11Char.isXML11ValidLiteral(ch)) {
 227                         _printer.printText(ch);
 228                     }
 229                     else {
 230                         // The character is not printable -- split CDATA section
 231                         _printer.printText("]]>&#x");
 232                         _printer.printText(Integer.toHexString(ch));
 233                         _printer.printText(";<![CDATA[");
 234                     }
 235                 }
 236                 _printer.setNextIndent( saveIndent );
 237 
 238             }
 239             else {
 240 
 241                 int saveIndent;
 242 
 243                 if ( state.preserveSpace ) {
 244                     // If preserving space then hold of indentation so no
 245                     // excessive spaces are printed at line breaks, escape
 246                     // the text content without replacing spaces and print
 247                     // the text breaking only at line breaks.
 248                     saveIndent = _printer.getNextIndent();
 249                     _printer.setNextIndent( 0 );
 250                     printText( chars, start, length, true, state.unescaped );
 251                     _printer.setNextIndent( saveIndent );
 252                 }
 253                 else {
 254                     printText( chars, start, length, false, state.unescaped );
 255                 }
 256             }
 257         }
 258         catch ( IOException except ) {
 259             throw new SAXException( except );
 260         }
 261     }
 262 
 263     //
 264     // overwrite printing functions to make sure serializer prints out valid XML
 265     //
 266     protected void printEscaped( String source ) throws IOException {
 267         int length = source.length();
 268         for ( int i = 0 ; i < length ; ++i ) {
 269             int ch = source.charAt(i);
 270             if (!XML11Char.isXML11Valid(ch)) {
 271                 if (++i <length) {
 272                     surrogates(ch, source.charAt(i), false);
 273                 }
 274                 else {
 275                     fatalError("The character '"+(char)ch+"' is an invalid XML character");
 276                 }
 277                 continue;
 278             }
 279             if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028) {
 280                 printHex(ch);
 281             }
 282             else if (ch == '<') {
 283                 _printer.printText("&lt;");
 284             }
 285             else if (ch == '&') {
 286                 _printer.printText("&amp;");
 287             }
 288             else if (ch == '"') {
 289                 _printer.printText("&quot;");
 290             }
 291             else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
 292                 _printer.printText((char) ch);
 293             }
 294             else {
 295                 printHex(ch);
 296             }
 297         }
 298     }
 299 
 300     protected final void printCDATAText(String text) throws IOException {
 301         int length = text.length();
 302         char ch;
 303 
 304         for (int index = 0; index < length; ++index) {
 305             ch = text.charAt(index);
 306 
 307             if (ch == ']'
 308                 && index + 2 < length
 309                 && text.charAt(index + 1) == ']'
 310                 && text.charAt(index + 2) == '>') { // check for ']]>'
 311                 if (fDOMErrorHandler != null){
 312                     // REVISIT: this means that if DOM Error handler is not registered we don't report any
 313                     // fatal errors and might serialize not wellformed document
 314                 if ((features & DOMSerializerImpl.SPLITCDATA) == 0
 315                     && (features & DOMSerializerImpl.WELLFORMED) == 0) {
 316                     // issue fatal error
 317                     String msg =
 318                         DOMMessageFormatter.formatMessage(
 319                             DOMMessageFormatter.SERIALIZER_DOMAIN,
 320                             "EndingCDATA",
 321                             null);
 322                     modifyDOMError(
 323                         msg,
 324                         DOMError.SEVERITY_FATAL_ERROR,
 325                         null, fCurrentNode);
 326                     boolean continueProcess =
 327                         fDOMErrorHandler.handleError(fDOMError);
 328                     if (!continueProcess) {
 329                         throw new IOException();
 330                     }
 331                 } else {
 332                     // issue warning
 333                     String msg =
 334                         DOMMessageFormatter.formatMessage(
 335                             DOMMessageFormatter.SERIALIZER_DOMAIN,
 336                             "SplittingCDATA",
 337                             null);
 338                     modifyDOMError(
 339                         msg,
 340                         DOMError.SEVERITY_WARNING,
 341                         null, fCurrentNode);
 342                     fDOMErrorHandler.handleError(fDOMError);
 343                 }
 344                 }
 345                 // split CDATA section
 346                 _printer.printText("]]]]><![CDATA[>");
 347                 index += 2;
 348                 continue;
 349             }
 350 
 351             if (!XML11Char.isXML11Valid(ch)) {
 352                 // check if it is surrogate
 353                 if (++index < length) {
 354                     surrogates(ch, text.charAt(index), true);
 355                 }
 356                 else {
 357                     fatalError("The character '" + ch + "' is an invalid XML character");
 358                 }
 359                 continue;
 360             }
 361             if (_encodingInfo.isPrintable(ch)
 362                 && XML11Char.isXML11ValidLiteral(ch)) {
 363                 _printer.printText(ch);
 364             }
 365             else {
 366                 // The character is not printable -- split CDATA section
 367                 _printer.printText("]]>&#x");
 368                 _printer.printText(Integer.toHexString(ch));
 369                 _printer.printText(";<![CDATA[");
 370             }
 371         }
 372     }
 373 
 374     // note that this "int" should, in all cases, be a char.
 375     // REVISIT:  make it a char...
 376     protected final void printXMLChar( int ch ) throws IOException {
 377 
 378         if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
 379             printHex(ch);
 380         }
 381         else if ( ch == '<') {
 382             _printer.printText("&lt;");
 383         }
 384         else if (ch == '&') {
 385             _printer.printText("&amp;");
 386         }
 387         else if (ch == '>'){
 388             // character sequence "]]>" can't appear in content, therefore
 389             // we should escape '>'
 390             _printer.printText("&gt;");
 391         }
 392         else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
 393             _printer.printText((char)ch);
 394         }
 395         else {
 396             printHex(ch);
 397         }
 398     }
 399 
 400 
 401 
 402     protected final void surrogates(int high, int low, boolean inContent) throws IOException{
 403         if (XMLChar.isHighSurrogate(high)) {
 404             if (!XMLChar.isLowSurrogate(low)) {
 405                 //Invalid XML
 406                 fatalError("The character '"+(char)low+"' is an invalid XML character");
 407             }
 408             else {
 409                 int supplemental = XMLChar.supplemental((char)high, (char)low);
 410                 if (!XML11Char.isXML11Valid(supplemental)) {
 411                     //Invalid XML
 412                     fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
 413                 }
 414                 else {
 415                     if (inContent && content().inCData) {
 416                         _printer.printText("]]>&#x");
 417                         _printer.printText(Integer.toHexString(supplemental));
 418                         _printer.printText(";<![CDATA[");
 419                     }
 420                     else {
 421                                                 printHex(supplemental);
 422                     }
 423                 }
 424             }
 425         }
 426         else {
 427             fatalError("The character '"+(char)high+"' is an invalid XML character");
 428         }
 429 
 430     }
 431 
 432 
 433     protected void printText( String text, boolean preserveSpace, boolean unescaped )
 434     throws IOException {
 435         int index;
 436         char ch;
 437         int length = text.length();
 438         if ( preserveSpace ) {
 439             // Preserving spaces: the text must print exactly as it is,
 440             // without breaking when spaces appear in the text and without
 441             // consolidating spaces. If a line terminator is used, a line
 442             // break will occur.
 443             for ( index = 0 ; index < length ; ++index ) {
 444                 ch = text.charAt( index );
 445                 if (!XML11Char.isXML11Valid(ch)) {
 446                     // check if it is surrogate
 447                     if (++index <length) {
 448                         surrogates(ch, text.charAt(index), true);
 449                     } else {
 450                         fatalError("The character '"+ch+"' is an invalid XML character");
 451                     }
 452                     continue;
 453                 }
 454                 if ( unescaped  && XML11Char.isXML11ValidLiteral(ch)) {
 455                     _printer.printText( ch );
 456                 }
 457                 else {
 458                     printXMLChar( ch );
 459                 }
 460             }
 461         }
 462         else {
 463             // Not preserving spaces: print one part at a time, and
 464             // use spaces between parts to break them into different
 465             // lines. Spaces at beginning of line will be stripped
 466             // by printing mechanism. Line terminator is treated
 467             // no different than other text part.
 468             for ( index = 0 ; index < length ; ++index ) {
 469                 ch = text.charAt( index );
 470                 if (!XML11Char.isXML11Valid(ch)) {
 471                     // check if it is surrogate
 472                     if (++index <length) {
 473                         surrogates(ch, text.charAt(index), true);
 474                     } else {
 475                         fatalError("The character '"+ch+"' is an invalid XML character");
 476                     }
 477                     continue;
 478                 }
 479 
 480                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch) ) {
 481                     _printer.printText( ch );
 482                 }
 483                 else {
 484                     printXMLChar( ch );
 485                 }
 486             }
 487         }
 488     }
 489 
 490     protected void printText( char[] chars, int start, int length,
 491                               boolean preserveSpace, boolean unescaped ) throws IOException {
 492 
 493         if ( preserveSpace ) {
 494             // Preserving spaces: the text must print exactly as it is,
 495             // without breaking when spaces appear in the text and without
 496             // consolidating spaces. If a line terminator is used, a line
 497             // break will occur.
 498             while ( length-- > 0 ) {
 499                 char ch = chars[start++];
 500                 if (!XML11Char.isXML11Valid(ch)) {
 501                     // check if it is surrogate
 502                     if ( length-- > 0) {
 503                         surrogates(ch, chars[start++], true);
 504                     } else {
 505                         fatalError("The character '"+ch+"' is an invalid XML character");
 506                     }
 507                     continue;
 508                 }
 509                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) {
 510                     _printer.printText( ch );
 511                 }
 512                 else {
 513                     printXMLChar( ch );
 514                 }
 515             }
 516         }
 517         else {
 518             // Not preserving spaces: print one part at a time, and
 519             // use spaces between parts to break them into different
 520             // lines. Spaces at beginning of line will be stripped
 521             // by printing mechanism. Line terminator is treated
 522             // no different than other text part.
 523             while ( length-- > 0 ) {
 524                 char ch = chars[start++];
 525                 if (!XML11Char.isXML11Valid(ch)) {
 526                     // check if it is surrogate
 527                     if ( length-- > 0) {
 528                         surrogates(ch, chars[start++], true);
 529                     } else {
 530                         fatalError("The character '"+ch+"' is an invalid XML character");
 531                     }
 532                     continue;
 533                 }
 534 
 535                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) {
 536                     _printer.printText( ch );
 537                 }
 538                 else {
 539                     printXMLChar( ch );
 540                 }
 541             }
 542         }
 543     }
 544 
 545     public boolean reset() {
 546         super.reset();
 547         return true;
 548     }
 549 
 550 }