1 /*
   2  * Copyright (c) 2015, 2017 Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 
  22 // Sep 14, 2000:
  23 //  Fixed serializer to report IO exception directly, instead at
  24 //  the end of document processing.
  25 //  Reported by Patrick Higgins <phiggins@transzap.com>
  26 // Aug 21, 2000:
  27 //  Fixed bug in startDocument not calling prepare.
  28 //  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
  29 // Aug 21, 2000:
  30 //  Added ability to omit DOCTYPE declaration.
  31 // Sep 1, 2000:
  32 //   If no output format is provided the serializer now defaults
  33 //   to ISO-8859-1 encoding. Reported by Mikael Staldal
  34 //   <d96-mst@d.kth.se>
  35 
  36 
  37 package com.sun.org.apache.xml.internal.serialize;
  38 
  39 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
  40 import java.io.IOException;
  41 import java.io.OutputStream;
  42 import java.io.Writer;
  43 import java.util.Enumeration;
  44 import java.util.Locale;
  45 import java.util.Map;
  46 import org.w3c.dom.Attr;
  47 import org.w3c.dom.Element;
  48 import org.w3c.dom.NamedNodeMap;
  49 import org.w3c.dom.Node;
  50 import org.xml.sax.AttributeList;
  51 import org.xml.sax.Attributes;
  52 import org.xml.sax.SAXException;
  53 
  54 
  55 /**
  56  * Implements an HTML/XHTML serializer supporting both DOM and SAX
  57  * pretty serializing. HTML/XHTML mode is determined in the
  58  * constructor.  For usage instructions see {@link Serializer}.
  59  * <p>
  60  * If an output stream is used, the encoding is taken from the
  61  * output format (defaults to <tt>UTF-8</tt>). If a writer is
  62  * used, make sure the writer uses the same encoding (if applies)
  63  * as specified in the output format.
  64  * <p>
  65  * The serializer supports both DOM and SAX. DOM serializing is done
  66  * by calling {@link #serialize} and SAX serializing is done by firing
  67  * SAX events and using the serializer as a document handler.
  68  * <p>
  69  * If an I/O exception occurs while serializing, the serializer
  70  * will not throw an exception directly, but only throw it
  71  * at the end of serializing (either DOM or SAX's {@link
  72  * org.xml.sax.DocumentHandler#endDocument}.
  73  * <p>
  74  * For elements that are not specified as whitespace preserving,
  75  * the serializer will potentially break long text lines at space
  76  * boundaries, indent lines, and serialize elements on separate
  77  * lines. Line terminators will be regarded as spaces, and
  78  * spaces at beginning of line will be stripped.
  79  * <p>
  80  * XHTML is slightly different than HTML:
  81  * <ul>
  82  * <li>Element/attribute names are lower case and case matters
  83  * <li>Attributes must specify value, even if empty string
  84  * <li>Empty elements must have '/' in empty tag
  85  * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
  86  * </ul>
  87  *
  88  * @deprecated This class was deprecated in Xerces 2.6.2. It is
  89  * recommended that new applications use JAXP's Transformation API
  90  * for XML (TrAX) for serializing HTML. See the Xerces documentation
  91  * for more information.
  92  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  93  * @see Serializer
  94  */
  95 @Deprecated
  96 public class HTMLSerializer
  97     extends BaseMarkupSerializer
  98 {
  99 
 100 
 101     /**
 102      * True if serializing in XHTML format.
 103      */
 104     private boolean _xhtml;
 105 
 106 
 107     public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml";
 108 
 109     // for users to override XHTMLNamespace if need be.
 110     private String fUserXHTMLNamespace = null;
 111 
 112 
 113     /**
 114      * Constructs a new HTML/XHTML serializer depending on the value of
 115      * <tt>xhtml</tt>. The serializer cannot be used without calling
 116      * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
 117      *
 118      * @param xhtml True if XHTML serializing
 119      */
 120     protected HTMLSerializer( boolean xhtml, OutputFormat format )
 121     {
 122         super( format );
 123         _xhtml = xhtml;
 124     }
 125 
 126 
 127     /**
 128      * Constructs a new serializer. The serializer cannot be used without
 129      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
 130      * first.
 131      */
 132     public HTMLSerializer()
 133     {
 134         this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
 135     }
 136 
 137 
 138     /**
 139      * Constructs a new serializer. The serializer cannot be used without
 140      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
 141      * first.
 142      */
 143     public HTMLSerializer( OutputFormat format )
 144     {
 145         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
 146     }
 147 
 148 
 149 
 150     /**
 151      * Constructs a new serializer that writes to the specified writer
 152      * using the specified output format. If <tt>format</tt> is null,
 153      * will use a default output format.
 154      *
 155      * @param writer The writer to use
 156      * @param format The output format to use, null for the default
 157      */
 158     public HTMLSerializer( Writer writer, OutputFormat format )
 159     {
 160         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
 161         setOutputCharStream( writer );
 162     }
 163 
 164 
 165     /**
 166      * Constructs a new serializer that writes to the specified output
 167      * stream using the specified output format. If <tt>format</tt>
 168      * is null, will use a default output format.
 169      *
 170      * @param output The output stream to use
 171      * @param format The output format to use, null for the default
 172      */
 173     public HTMLSerializer( OutputStream output, OutputFormat format )
 174     {
 175         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
 176         setOutputByteStream( output );
 177     }
 178 
 179 
 180     public void setOutputFormat( OutputFormat format )
 181     {
 182         super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
 183     }
 184 
 185     // Set  value for alternate XHTML namespace.
 186     public void setXHTMLNamespace(String newNamespace) {
 187         fUserXHTMLNamespace = newNamespace;
 188     } // setXHTMLNamespace(String)
 189 
 190     //-----------------------------------------//
 191     // SAX content handler serializing methods //
 192     //-----------------------------------------//
 193 
 194 
 195     public void startElement( String namespaceURI, String localName,
 196                               String rawName, Attributes attrs )
 197         throws SAXException
 198     {
 199         int          i;
 200         boolean      preserveSpace;
 201         ElementState state;
 202         String       name;
 203         String       value;
 204         String       htmlName;
 205         boolean      addNSAttr = false;
 206 
 207         try {
 208             if ( _printer == null )
 209                 throw new IllegalStateException(
 210                                     DOMMessageFormatter.formatMessage(
 211                                     DOMMessageFormatter.SERIALIZER_DOMAIN,
 212                     "NoWriterSupplied", null));
 213 
 214             state = getElementState();
 215             if ( isDocumentState() ) {
 216                 // If this is the root element handle it differently.
 217                 // If the first root element in the document, serialize
 218                 // the document's DOCTYPE. Space preserving defaults
 219                 // to that of the output format.
 220                 if ( ! _started )
 221                     startDocument( (localName == null || localName.length() == 0)
 222                         ? rawName : localName );
 223             } else {
 224                 // For any other element, if first in parent, then
 225                 // close parent's opening tag and use the parnet's
 226                 // space preserving.
 227                 if ( state.empty )
 228                     _printer.printText( '>' );
 229                 // Indent this element on a new line if the first
 230                 // content of the parent element or immediately
 231                 // following an element.
 232                 if ( _indenting && ! state.preserveSpace &&
 233                      ( state.empty || state.afterElement ) )
 234                     _printer.breakLine();
 235             }
 236             preserveSpace = state.preserveSpace;
 237 
 238             // Do not change the current element state yet.
 239             // This only happens in endElement().
 240 
 241             // As per SAX2, the namespace URI is an empty string if the element has no
 242             // namespace URI, or namespaces is turned off. The check against null protects
 243             // against broken SAX implementations, so I've left it there. - mrglavas
 244             boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);
 245 
 246             // SAX2: rawName (QName) could be empty string if
 247             // namespace-prefixes property is false.
 248             if ( rawName == null || rawName.length() == 0) {
 249                 rawName = localName;
 250                 if ( hasNamespaceURI ) {
 251                     String prefix;
 252                     prefix = getPrefix( namespaceURI );
 253                     if ( prefix != null && prefix.length() != 0 )
 254                         rawName = prefix + ":" + localName;
 255                 }
 256                 addNSAttr = true;
 257             }
 258             if ( !hasNamespaceURI )
 259                 htmlName = rawName;
 260             else {
 261                 if ( namespaceURI.equals( XHTMLNamespace ) ||
 262                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )
 263                     htmlName = localName;
 264                 else
 265                     htmlName = null;
 266             }
 267 
 268             // XHTML: element names are lower case, DOM will be different
 269             _printer.printText( '<' );
 270             if ( _xhtml )
 271                 _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );
 272             else
 273                 _printer.printText( rawName );
 274             _printer.indent();
 275 
 276             // For each attribute serialize it's name and value as one part,
 277             // separated with a space so the element can be broken on
 278             // multiple lines.
 279             if ( attrs != null ) {
 280                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
 281                     _printer.printSpace();
 282                     name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);
 283                     value = attrs.getValue( i );
 284                     if ( _xhtml || hasNamespaceURI ) {
 285                         // XHTML: print empty string for null values.
 286                         if ( value == null ) {
 287                             _printer.printText( name );
 288                             _printer.printText( "=\"\"" );
 289                         } else {
 290                             _printer.printText( name );
 291                             _printer.printText( "=\"" );
 292                             printEscaped( value );
 293                             _printer.printText( '"' );
 294                         }
 295                     } else {
 296                         // HTML: Empty values print as attribute name, no value.
 297                         // HTML: URI attributes will print unescaped
 298                         if ( value == null ) {
 299                             value = "";
 300                         }
 301                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
 302                             _printer.printText( name );
 303                         else if ( HTMLdtd.isURI( rawName, name ) ) {
 304                             _printer.printText( name );
 305                             _printer.printText( "=\"" );
 306                             _printer.printText( escapeURI( value ) );
 307                             _printer.printText( '"' );
 308                         } else if ( HTMLdtd.isBoolean( rawName, name ) )
 309                             _printer.printText( name );
 310                         else {
 311                             _printer.printText( name );
 312                             _printer.printText( "=\"" );
 313                             printEscaped( value );
 314                             _printer.printText( '"' );
 315                         }
 316                     }
 317                 }
 318             }
 319             if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
 320                 preserveSpace = true;
 321 
 322             if ( addNSAttr ) {
 323                 for (Map.Entry<String, String> entry : _prefixes.entrySet()) {
 324                     _printer.printSpace();
 325                     value = entry.getKey(); //The prefixes map uses the URI value as key.
 326                     name = entry.getValue(); //and prefix name as value
 327                     if ( name.length() == 0 ) {
 328                         _printer.printText( "xmlns=\"" );
 329                         printEscaped( value );
 330                         _printer.printText( '"' );
 331                     } else {
 332                         _printer.printText( "xmlns:" );
 333                         _printer.printText( name );
 334                         _printer.printText( "=\"" );
 335                         printEscaped( value );
 336                         _printer.printText( '"' );
 337                     }
 338                 }
 339             }
 340 
 341             // Now it's time to enter a new element state
 342             // with the tag name and space preserving.
 343             // We still do not change the curent element state.
 344             state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
 345 
 346             // Prevents line breaks inside A/TD
 347 
 348             if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
 349                                        htmlName.equalsIgnoreCase( "TD" ) ) ) {
 350                 state.empty = false;
 351                 _printer.printText( '>' );
 352             }
 353 
 354             // Handle SCRIPT and STYLE specifically by changing the
 355             // state of the current element to CDATA (XHTML) or
 356             // unescaped (HTML).
 357             if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
 358                                        rawName.equalsIgnoreCase( "STYLE" ) ) ) {
 359                 if ( _xhtml ) {
 360                     // XHTML: Print contents as CDATA section
 361                     state.doCData = true;
 362                 } else {
 363                     // HTML: Print contents unescaped
 364                     state.unescaped = true;
 365                 }
 366             }
 367         } catch ( IOException except ) {
 368             throw new SAXException( except );
 369         }
 370     }
 371 
 372 
 373     public void endElement( String namespaceURI, String localName,
 374                             String rawName )
 375         throws SAXException
 376     {
 377         try {
 378             endElementIO( namespaceURI, localName, rawName );
 379         } catch ( IOException except ) {
 380             throw new SAXException( except );
 381         }
 382     }
 383 
 384 
 385     public void endElementIO( String namespaceURI, String localName,
 386                               String rawName )
 387         throws IOException
 388     {
 389         ElementState state;
 390         String       htmlName;
 391 
 392         // Works much like content() with additions for closing
 393         // an element. Note the different checks for the closed
 394         // element's state and the parent element's state.
 395         _printer.unindent();
 396         state = getElementState();
 397 
 398         if ( state.namespaceURI == null || state.namespaceURI.length() == 0 )
 399             htmlName = state.rawName;
 400         else {
 401             if ( state.namespaceURI.equals( XHTMLNamespace ) ||
 402                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) )
 403                 htmlName = state.localName;
 404             else
 405                 htmlName = null;
 406         }
 407 
 408         if ( _xhtml) {
 409             if ( state.empty ) {
 410                 _printer.printText( " />" );
 411             } else {
 412                 // Must leave CData section first
 413                 if ( state.inCData )
 414                     _printer.printText( "]]>" );
 415                 // XHTML: element names are lower case, DOM will be different
 416                 _printer.printText( "</" );
 417                 _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) );
 418                 _printer.printText( '>' );
 419             }
 420         } else {
 421             if ( state.empty )
 422                 _printer.printText( '>' );
 423             // This element is not empty and that last content was
 424             // another element, so print a line break before that
 425             // last element and this element's closing tag.
 426             // [keith] Provided this is not an anchor.
 427             // HTML: some elements do not print closing tag (e.g. LI)
 428             if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
 429                 if ( _indenting && ! state.preserveSpace && state.afterElement )
 430                     _printer.breakLine();
 431                 // Must leave CData section first (Illegal in HTML, but still)
 432                 if ( state.inCData )
 433                     _printer.printText( "]]>" );
 434                 _printer.printText( "</" );
 435                 _printer.printText( state.rawName );
 436                 _printer.printText( '>' );
 437             }
 438         }
 439         // Leave the element state and update that of the parent
 440         // (if we're not root) to not empty and after element.
 441         state = leaveElementState();
 442         // Temporary hack to prevent line breaks inside A/TD
 443         if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
 444                                    ! htmlName.equalsIgnoreCase( "TD" ) ) )
 445 
 446             state.afterElement = true;
 447         state.empty = false;
 448         if ( isDocumentState() )
 449             _printer.flush();
 450     }
 451 
 452 
 453     //------------------------------------------//
 454     // SAX document handler serializing methods //
 455     //------------------------------------------//
 456 
 457 
 458     public void characters( char[] chars, int start, int length )
 459         throws SAXException
 460     {
 461         ElementState state;
 462 
 463         try {
 464             // HTML: no CDATA section
 465             state = content();
 466             state.doCData = false;
 467             super.characters( chars, start, length );
 468         } catch ( IOException except ) {
 469             throw new SAXException( except );
 470         }
 471     }
 472 
 473 
 474     public void startElement( String tagName, AttributeList attrs )
 475         throws SAXException
 476     {
 477         int          i;
 478         boolean      preserveSpace;
 479         ElementState state;
 480         String       name;
 481         String       value;
 482 
 483         try {
 484             if ( _printer == null )
 485                 throw new IllegalStateException(
 486                                     DOMMessageFormatter.formatMessage(
 487                                     DOMMessageFormatter.SERIALIZER_DOMAIN,
 488                     "NoWriterSupplied", null));
 489 
 490 
 491             state = getElementState();
 492             if ( isDocumentState() ) {
 493                 // If this is the root element handle it differently.
 494                 // If the first root element in the document, serialize
 495                 // the document's DOCTYPE. Space preserving defaults
 496                 // to that of the output format.
 497                 if ( ! _started )
 498                     startDocument( tagName );
 499             } else {
 500                 // For any other element, if first in parent, then
 501                 // close parent's opening tag and use the parnet's
 502                 // space preserving.
 503                 if ( state.empty )
 504                     _printer.printText( '>' );
 505                 // Indent this element on a new line if the first
 506                 // content of the parent element or immediately
 507                 // following an element.
 508                 if ( _indenting && ! state.preserveSpace &&
 509                      ( state.empty || state.afterElement ) )
 510                     _printer.breakLine();
 511             }
 512             preserveSpace = state.preserveSpace;
 513 
 514             // Do not change the current element state yet.
 515             // This only happens in endElement().
 516 
 517             // XHTML: element names are lower case, DOM will be different
 518             _printer.printText( '<' );
 519             if ( _xhtml )
 520                 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
 521             else
 522                 _printer.printText( tagName );
 523             _printer.indent();
 524 
 525             // For each attribute serialize it's name and value as one part,
 526             // separated with a space so the element can be broken on
 527             // multiple lines.
 528             if ( attrs != null ) {
 529                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
 530                     _printer.printSpace();
 531                     name = attrs.getName( i ).toLowerCase(Locale.ENGLISH);
 532                     value = attrs.getValue( i );
 533                     if ( _xhtml ) {
 534                         // XHTML: print empty string for null values.
 535                         if ( value == null ) {
 536                             _printer.printText( name );
 537                             _printer.printText( "=\"\"" );
 538                         } else {
 539                             _printer.printText( name );
 540                             _printer.printText( "=\"" );
 541                             printEscaped( value );
 542                             _printer.printText( '"' );
 543                         }
 544                     } else {
 545                         // HTML: Empty values print as attribute name, no value.
 546                         // HTML: URI attributes will print unescaped
 547                         if ( value == null ) {
 548                             value = "";
 549                         }
 550                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
 551                             _printer.printText( name );
 552                         else if ( HTMLdtd.isURI( tagName, name ) ) {
 553                             _printer.printText( name );
 554                             _printer.printText( "=\"" );
 555                             _printer.printText( escapeURI( value ) );
 556                             _printer.printText( '"' );
 557                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
 558                             _printer.printText( name );
 559                         else {
 560                             _printer.printText( name );
 561                             _printer.printText( "=\"" );
 562                             printEscaped( value );
 563                             _printer.printText( '"' );
 564                         }
 565                     }
 566                 }
 567             }
 568             if ( HTMLdtd.isPreserveSpace( tagName ) )
 569                 preserveSpace = true;
 570 
 571             // Now it's time to enter a new element state
 572             // with the tag name and space preserving.
 573             // We still do not change the curent element state.
 574             state = enterElementState( null, null, tagName, preserveSpace );
 575 
 576             // Prevents line breaks inside A/TD
 577             if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
 578                 state.empty = false;
 579                 _printer.printText( '>' );
 580             }
 581 
 582             // Handle SCRIPT and STYLE specifically by changing the
 583             // state of the current element to CDATA (XHTML) or
 584             // unescaped (HTML).
 585             if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
 586                  tagName.equalsIgnoreCase( "STYLE" ) ) {
 587                 if ( _xhtml ) {
 588                     // XHTML: Print contents as CDATA section
 589                     state.doCData = true;
 590                 } else {
 591                     // HTML: Print contents unescaped
 592                     state.unescaped = true;
 593                 }
 594             }
 595         } catch ( IOException except ) {
 596             throw new SAXException( except );
 597         }
 598     }
 599 
 600 
 601     public void endElement( String tagName )
 602         throws SAXException
 603     {
 604         endElement( null, null, tagName );
 605     }
 606 
 607 
 608     //------------------------------------------//
 609     // Generic node serializing methods methods //
 610     //------------------------------------------//
 611 
 612 
 613     /**
 614      * Called to serialize the document's DOCTYPE by the root element.
 615      * The document type declaration must name the root element,
 616      * but the root element is only known when that element is serialized,
 617      * and not at the start of the document.
 618      * <p>
 619      * This method will check if it has not been called before ({@link #_started}),
 620      * will serialize the document type declaration, and will serialize all
 621      * pre-root comments and PIs that were accumulated in the document
 622      * (see {@link #serializePreRoot}). Pre-root will be serialized even if
 623      * this is not the first root element of the document.
 624      */
 625     protected void startDocument( String rootTagName )
 626         throws IOException
 627     {
 628         StringBuffer buffer;
 629 
 630         // Not supported in HTML/XHTML, but we still have to switch
 631         // out of DTD mode.
 632         _printer.leaveDTD();
 633         if ( ! _started ) {
 634             // If the public and system identifiers were not specified
 635             // in the output format, use the appropriate ones for HTML
 636             // or XHTML.
 637             if ( _docTypePublicId == null && _docTypeSystemId == null ) {
 638                 if ( _xhtml ) {
 639                     _docTypePublicId = HTMLdtd.XHTMLPublicId;
 640                     _docTypeSystemId = HTMLdtd.XHTMLSystemId;
 641                 } else {
 642                     _docTypePublicId = HTMLdtd.HTMLPublicId;
 643                     _docTypeSystemId = HTMLdtd.HTMLSystemId;
 644                 }
 645             }
 646 
 647             if ( ! _format.getOmitDocumentType() ) {
 648                 // XHTML: If public identifier and system identifier
 649                 //  specified, print them, else print just system identifier
 650                 // HTML: If public identifier specified, print it with
 651                 //  system identifier, if specified.
 652                 // XHTML requires that all element names are lower case, so the
 653                 // root on the DOCTYPE must be 'html'. - mrglavas
 654                 if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null )  ) {
 655                     if (_xhtml) {
 656                         _printer.printText( "<!DOCTYPE html PUBLIC " );
 657                     }
 658                     else {
 659                         _printer.printText( "<!DOCTYPE HTML PUBLIC " );
 660                     }
 661                     printDoctypeURL( _docTypePublicId );
 662                     if ( _docTypeSystemId != null ) {
 663                         if ( _indenting ) {
 664                             _printer.breakLine();
 665                             _printer.printText( "                      " );
 666                         } else
 667                         _printer.printText( ' ' );
 668                         printDoctypeURL( _docTypeSystemId );
 669                     }
 670                     _printer.printText( '>' );
 671                     _printer.breakLine();
 672                 } else if ( _docTypeSystemId != null ) {
 673                     if (_xhtml) {
 674                         _printer.printText( "<!DOCTYPE html SYSTEM " );
 675                     }
 676                     else {
 677                         _printer.printText( "<!DOCTYPE HTML SYSTEM " );
 678                     }
 679                     printDoctypeURL( _docTypeSystemId );
 680                     _printer.printText( '>' );
 681                     _printer.breakLine();
 682                 }
 683             }
 684         }
 685 
 686         _started = true;
 687         // Always serialize these, even if not te first root element.
 688         serializePreRoot();
 689     }
 690 
 691 
 692     /**
 693      * Called to serialize a DOM element. Equivalent to calling {@link
 694      * #startElement}, {@link #endElement} and serializing everything
 695      * inbetween, but better optimized.
 696      */
 697     protected void serializeElement( Element elem )
 698         throws IOException
 699     {
 700         Attr         attr;
 701         NamedNodeMap attrMap;
 702         int          i;
 703         Node         child;
 704         ElementState state;
 705         boolean      preserveSpace;
 706         String       name;
 707         String       value;
 708         String       tagName;
 709 
 710         tagName = elem.getTagName();
 711         state = getElementState();
 712         if ( isDocumentState() ) {
 713             // If this is the root element handle it differently.
 714             // If the first root element in the document, serialize
 715             // the document's DOCTYPE. Space preserving defaults
 716             // to that of the output format.
 717             if ( ! _started )
 718                 startDocument( tagName );
 719         } else {
 720             // For any other element, if first in parent, then
 721             // close parent's opening tag and use the parnet's
 722             // space preserving.
 723             if ( state.empty )
 724                 _printer.printText( '>' );
 725             // Indent this element on a new line if the first
 726             // content of the parent element or immediately
 727             // following an element.
 728             if ( _indenting && ! state.preserveSpace &&
 729                  ( state.empty || state.afterElement ) )
 730                 _printer.breakLine();
 731         }
 732         preserveSpace = state.preserveSpace;
 733 
 734         // Do not change the current element state yet.
 735         // This only happens in endElement().
 736 
 737         // XHTML: element names are lower case, DOM will be different
 738         _printer.printText( '<' );
 739         if ( _xhtml )
 740             _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
 741         else
 742             _printer.printText( tagName );
 743         _printer.indent();
 744 
 745         // Lookup the element's attribute, but only print specified
 746         // attributes. (Unspecified attributes are derived from the DTD.
 747         // For each attribute print it's name and value as one part,
 748         // separated with a space so the element can be broken on
 749         // multiple lines.
 750         attrMap = elem.getAttributes();
 751         if ( attrMap != null ) {
 752             for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
 753                 attr = (Attr) attrMap.item( i );
 754                 name = attr.getName().toLowerCase(Locale.ENGLISH);
 755                 value = attr.getValue();
 756                 if ( attr.getSpecified() ) {
 757                     _printer.printSpace();
 758                     if ( _xhtml ) {
 759                         // XHTML: print empty string for null values.
 760                         if ( value == null ) {
 761                             _printer.printText( name );
 762                             _printer.printText( "=\"\"" );
 763                         } else {
 764                             _printer.printText( name );
 765                             _printer.printText( "=\"" );
 766                             printEscaped( value );
 767                             _printer.printText( '"' );
 768                         }
 769                     } else {
 770                         // HTML: Empty values print as attribute name, no value.
 771                         // HTML: URI attributes will print unescaped
 772                         if ( value == null ) {
 773                             value = "";
 774                         }
 775                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
 776                             _printer.printText( name );
 777                         else if ( HTMLdtd.isURI( tagName, name ) ) {
 778                             _printer.printText( name );
 779                             _printer.printText( "=\"" );
 780                             _printer.printText( escapeURI( value ) );
 781                             _printer.printText( '"' );
 782                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
 783                             _printer.printText( name );
 784                         else {
 785                             _printer.printText( name );
 786                             _printer.printText( "=\"" );
 787                             printEscaped( value );
 788                             _printer.printText( '"' );
 789                         }
 790                     }
 791                 }
 792             }
 793         }
 794         if ( HTMLdtd.isPreserveSpace( tagName ) )
 795             preserveSpace = true;
 796 
 797         // If element has children, or if element is not an empty tag,
 798         // serialize an opening tag.
 799         if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
 800             // Enter an element state, and serialize the children
 801             // one by one. Finally, end the element.
 802             state = enterElementState( null, null, tagName, preserveSpace );
 803 
 804             // Prevents line breaks inside A/TD
 805             if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
 806                 state.empty = false;
 807                 _printer.printText( '>' );
 808             }
 809 
 810             // Handle SCRIPT and STYLE specifically by changing the
 811             // state of the current element to CDATA (XHTML) or
 812             // unescaped (HTML).
 813             if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
 814                  tagName.equalsIgnoreCase( "STYLE" ) ) {
 815                 if ( _xhtml ) {
 816                     // XHTML: Print contents as CDATA section
 817                     state.doCData = true;
 818                 } else {
 819                     // HTML: Print contents unescaped
 820                     state.unescaped = true;
 821                 }
 822             }
 823             child = elem.getFirstChild();
 824             while ( child != null ) {
 825                 serializeNode( child );
 826                 child = child.getNextSibling();
 827             }
 828             endElementIO( null, null, tagName );
 829         } else {
 830             _printer.unindent();
 831             // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
 832             // HTML: Empty tags are defined as such in DTD no in document.
 833             if ( _xhtml )
 834                 _printer.printText( " />" );
 835             else
 836                 _printer.printText( '>' );
 837             // After element but parent element is no longer empty.
 838             state.afterElement = true;
 839             state.empty = false;
 840             if ( isDocumentState() )
 841                 _printer.flush();
 842         }
 843     }
 844 
 845 
 846 
 847     protected void characters( String text )
 848         throws IOException
 849     {
 850         ElementState state;
 851 
 852         // HTML: no CDATA section
 853         state = content();
 854         super.characters( text );
 855     }
 856 
 857 
 858     protected String getEntityRef( int ch )
 859     {
 860         return HTMLdtd.fromChar( ch );
 861     }
 862 
 863 
 864     protected String escapeURI( String uri )
 865     {
 866         int index;
 867 
 868         // XXX  Apparently Netscape doesn't like if we escape the URI
 869         //      using %nn, so we leave it as is, just remove any quotes.
 870         index = uri.indexOf( "\"" );
 871         if ( index >= 0 )
 872             return uri.substring( 0, index );
 873         else
 874             return uri;
 875     }
 876 
 877 
 878 }