1 /* 2 * Copyright (c) 2015, 2017 Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 22 // Sep 14, 2000: 23 // Fixed serializer to report IO exception directly, instead at 24 // the end of document processing. 25 // Reported by Patrick Higgins <phiggins@transzap.com> 26 // Aug 21, 2000: 27 // Fixed bug in startDocument not calling prepare. 28 // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se> 29 // Aug 21, 2000: 30 // Added ability to omit DOCTYPE declaration. 31 // Sep 1, 2000: 32 // If no output format is provided the serializer now defaults 33 // to ISO-8859-1 encoding. Reported by Mikael Staldal 34 // <d96-mst@d.kth.se> 35 36 37 package com.sun.org.apache.xml.internal.serialize; 38 39 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter; 40 import java.io.IOException; 41 import java.io.OutputStream; 42 import java.io.Writer; 43 import java.util.Enumeration; 44 import java.util.Locale; 45 import java.util.Map; 46 import org.w3c.dom.Attr; 47 import org.w3c.dom.Element; 48 import org.w3c.dom.NamedNodeMap; 49 import org.w3c.dom.Node; 50 import org.xml.sax.AttributeList; 51 import org.xml.sax.Attributes; 52 import org.xml.sax.SAXException; 53 54 55 /** 56 * Implements an HTML/XHTML serializer supporting both DOM and SAX 57 * pretty serializing. HTML/XHTML mode is determined in the 58 * constructor. For usage instructions see {@link Serializer}. 59 * <p> 60 * If an output stream is used, the encoding is taken from the 61 * output format (defaults to <tt>UTF-8</tt>). If a writer is 62 * used, make sure the writer uses the same encoding (if applies) 63 * as specified in the output format. 64 * <p> 65 * The serializer supports both DOM and SAX. DOM serializing is done 66 * by calling {@link #serialize} and SAX serializing is done by firing 67 * SAX events and using the serializer as a document handler. 68 * <p> 69 * If an I/O exception occurs while serializing, the serializer 70 * will not throw an exception directly, but only throw it 71 * at the end of serializing (either DOM or SAX's {@link 72 * org.xml.sax.DocumentHandler#endDocument}. 73 * <p> 74 * For elements that are not specified as whitespace preserving, 75 * the serializer will potentially break long text lines at space 76 * boundaries, indent lines, and serialize elements on separate 77 * lines. Line terminators will be regarded as spaces, and 78 * spaces at beginning of line will be stripped. 79 * <p> 80 * XHTML is slightly different than HTML: 81 * <ul> 82 * <li>Element/attribute names are lower case and case matters 83 * <li>Attributes must specify value, even if empty string 84 * <li>Empty elements must have '/' in empty tag 85 * <li>Contents of SCRIPT and STYLE elements serialized as CDATA 86 * </ul> 87 * 88 * @deprecated This class was deprecated in Xerces 2.6.2. It is 89 * recommended that new applications use JAXP's Transformation API 90 * for XML (TrAX) for serializing HTML. See the Xerces documentation 91 * for more information. 92 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 93 * @see Serializer 94 */ 95 @Deprecated 96 public class HTMLSerializer 97 extends BaseMarkupSerializer 98 { 99 100 101 /** 102 * True if serializing in XHTML format. 103 */ 104 private boolean _xhtml; 105 106 107 public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml"; 108 109 // for users to override XHTMLNamespace if need be. 110 private String fUserXHTMLNamespace = null; 111 112 113 /** 114 * Constructs a new HTML/XHTML serializer depending on the value of 115 * <tt>xhtml</tt>. The serializer cannot be used without calling 116 * {@link #setOutputCharStream} or {@link #setOutputByteStream} first. 117 * 118 * @param xhtml True if XHTML serializing 119 */ 120 protected HTMLSerializer( boolean xhtml, OutputFormat format ) 121 { 122 super( format ); 123 _xhtml = xhtml; 124 } 125 126 127 /** 128 * Constructs a new serializer. The serializer cannot be used without 129 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 130 * first. 131 */ 132 public HTMLSerializer() 133 { 134 this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 135 } 136 137 138 /** 139 * Constructs a new serializer. The serializer cannot be used without 140 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 141 * first. 142 */ 143 public HTMLSerializer( OutputFormat format ) 144 { 145 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 146 } 147 148 149 150 /** 151 * Constructs a new serializer that writes to the specified writer 152 * using the specified output format. If <tt>format</tt> is null, 153 * will use a default output format. 154 * 155 * @param writer The writer to use 156 * @param format The output format to use, null for the default 157 */ 158 public HTMLSerializer( Writer writer, OutputFormat format ) 159 { 160 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 161 setOutputCharStream( writer ); 162 } 163 164 165 /** 166 * Constructs a new serializer that writes to the specified output 167 * stream using the specified output format. If <tt>format</tt> 168 * is null, will use a default output format. 169 * 170 * @param output The output stream to use 171 * @param format The output format to use, null for the default 172 */ 173 public HTMLSerializer( OutputStream output, OutputFormat format ) 174 { 175 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 176 setOutputByteStream( output ); 177 } 178 179 180 public void setOutputFormat( OutputFormat format ) 181 { 182 super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 183 } 184 185 // Set value for alternate XHTML namespace. 186 public void setXHTMLNamespace(String newNamespace) { 187 fUserXHTMLNamespace = newNamespace; 188 } // setXHTMLNamespace(String) 189 190 //-----------------------------------------// 191 // SAX content handler serializing methods // 192 //-----------------------------------------// 193 194 195 public void startElement( String namespaceURI, String localName, 196 String rawName, Attributes attrs ) 197 throws SAXException 198 { 199 int i; 200 boolean preserveSpace; 201 ElementState state; 202 String name; 203 String value; 204 String htmlName; 205 boolean addNSAttr = false; 206 207 try { 208 if ( _printer == null ) 209 throw new IllegalStateException( 210 DOMMessageFormatter.formatMessage( 211 DOMMessageFormatter.SERIALIZER_DOMAIN, 212 "NoWriterSupplied", null)); 213 214 state = getElementState(); 215 if ( isDocumentState() ) { 216 // If this is the root element handle it differently. 217 // If the first root element in the document, serialize 218 // the document's DOCTYPE. Space preserving defaults 219 // to that of the output format. 220 if ( ! _started ) 221 startDocument( (localName == null || localName.length() == 0) 222 ? rawName : localName ); 223 } else { 224 // For any other element, if first in parent, then 225 // close parent's opening tag and use the parnet's 226 // space preserving. 227 if ( state.empty ) 228 _printer.printText( '>' ); 229 // Indent this element on a new line if the first 230 // content of the parent element or immediately 231 // following an element. 232 if ( _indenting && ! state.preserveSpace && 233 ( state.empty || state.afterElement ) ) 234 _printer.breakLine(); 235 } 236 preserveSpace = state.preserveSpace; 237 238 // Do not change the current element state yet. 239 // This only happens in endElement(). 240 241 // As per SAX2, the namespace URI is an empty string if the element has no 242 // namespace URI, or namespaces is turned off. The check against null protects 243 // against broken SAX implementations, so I've left it there. - mrglavas 244 boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0); 245 246 // SAX2: rawName (QName) could be empty string if 247 // namespace-prefixes property is false. 248 if ( rawName == null || rawName.length() == 0) { 249 rawName = localName; 250 if ( hasNamespaceURI ) { 251 String prefix; 252 prefix = getPrefix( namespaceURI ); 253 if ( prefix != null && prefix.length() != 0 ) 254 rawName = prefix + ":" + localName; 255 } 256 addNSAttr = true; 257 } 258 if ( !hasNamespaceURI ) 259 htmlName = rawName; 260 else { 261 if ( namespaceURI.equals( XHTMLNamespace ) || 262 (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) ) 263 htmlName = localName; 264 else 265 htmlName = null; 266 } 267 268 // XHTML: element names are lower case, DOM will be different 269 _printer.printText( '<' ); 270 if ( _xhtml ) 271 _printer.printText( rawName.toLowerCase(Locale.ENGLISH) ); 272 else 273 _printer.printText( rawName ); 274 _printer.indent(); 275 276 // For each attribute serialize it's name and value as one part, 277 // separated with a space so the element can be broken on 278 // multiple lines. 279 if ( attrs != null ) { 280 for ( i = 0 ; i < attrs.getLength() ; ++i ) { 281 _printer.printSpace(); 282 name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH); 283 value = attrs.getValue( i ); 284 if ( _xhtml || hasNamespaceURI ) { 285 // XHTML: print empty string for null values. 286 if ( value == null ) { 287 _printer.printText( name ); 288 _printer.printText( "=\"\"" ); 289 } else { 290 _printer.printText( name ); 291 _printer.printText( "=\"" ); 292 printEscaped( value ); 293 _printer.printText( '"' ); 294 } 295 } else { 296 // HTML: Empty values print as attribute name, no value. 297 // HTML: URI attributes will print unescaped 298 if ( value == null ) { 299 value = ""; 300 } 301 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 302 _printer.printText( name ); 303 else if ( HTMLdtd.isURI( rawName, name ) ) { 304 _printer.printText( name ); 305 _printer.printText( "=\"" ); 306 _printer.printText( escapeURI( value ) ); 307 _printer.printText( '"' ); 308 } else if ( HTMLdtd.isBoolean( rawName, name ) ) 309 _printer.printText( name ); 310 else { 311 _printer.printText( name ); 312 _printer.printText( "=\"" ); 313 printEscaped( value ); 314 _printer.printText( '"' ); 315 } 316 } 317 } 318 } 319 if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) ) 320 preserveSpace = true; 321 322 if ( addNSAttr ) { 323 for (Map.Entry<String, String> entry : _prefixes.entrySet()) { 324 _printer.printSpace(); 325 value = entry.getKey(); //The prefixes map uses the URI value as key. 326 name = entry.getValue(); //and prefix name as value 327 if ( name.length() == 0 ) { 328 _printer.printText( "xmlns=\"" ); 329 printEscaped( value ); 330 _printer.printText( '"' ); 331 } else { 332 _printer.printText( "xmlns:" ); 333 _printer.printText( name ); 334 _printer.printText( "=\"" ); 335 printEscaped( value ); 336 _printer.printText( '"' ); 337 } 338 } 339 } 340 341 // Now it's time to enter a new element state 342 // with the tag name and space preserving. 343 // We still do not change the curent element state. 344 state = enterElementState( namespaceURI, localName, rawName, preserveSpace ); 345 346 // Prevents line breaks inside A/TD 347 348 if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) || 349 htmlName.equalsIgnoreCase( "TD" ) ) ) { 350 state.empty = false; 351 _printer.printText( '>' ); 352 } 353 354 // Handle SCRIPT and STYLE specifically by changing the 355 // state of the current element to CDATA (XHTML) or 356 // unescaped (HTML). 357 if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) || 358 rawName.equalsIgnoreCase( "STYLE" ) ) ) { 359 if ( _xhtml ) { 360 // XHTML: Print contents as CDATA section 361 state.doCData = true; 362 } else { 363 // HTML: Print contents unescaped 364 state.unescaped = true; 365 } 366 } 367 } catch ( IOException except ) { 368 throw new SAXException( except ); 369 } 370 } 371 372 373 public void endElement( String namespaceURI, String localName, 374 String rawName ) 375 throws SAXException 376 { 377 try { 378 endElementIO( namespaceURI, localName, rawName ); 379 } catch ( IOException except ) { 380 throw new SAXException( except ); 381 } 382 } 383 384 385 public void endElementIO( String namespaceURI, String localName, 386 String rawName ) 387 throws IOException 388 { 389 ElementState state; 390 String htmlName; 391 392 // Works much like content() with additions for closing 393 // an element. Note the different checks for the closed 394 // element's state and the parent element's state. 395 _printer.unindent(); 396 state = getElementState(); 397 398 if ( state.namespaceURI == null || state.namespaceURI.length() == 0 ) 399 htmlName = state.rawName; 400 else { 401 if ( state.namespaceURI.equals( XHTMLNamespace ) || 402 (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) ) 403 htmlName = state.localName; 404 else 405 htmlName = null; 406 } 407 408 if ( _xhtml) { 409 if ( state.empty ) { 410 _printer.printText( " />" ); 411 } else { 412 // Must leave CData section first 413 if ( state.inCData ) 414 _printer.printText( "]]>" ); 415 // XHTML: element names are lower case, DOM will be different 416 _printer.printText( "</" ); 417 _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) ); 418 _printer.printText( '>' ); 419 } 420 } else { 421 if ( state.empty ) 422 _printer.printText( '>' ); 423 // This element is not empty and that last content was 424 // another element, so print a line break before that 425 // last element and this element's closing tag. 426 // [keith] Provided this is not an anchor. 427 // HTML: some elements do not print closing tag (e.g. LI) 428 if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) { 429 if ( _indenting && ! state.preserveSpace && state.afterElement ) 430 _printer.breakLine(); 431 // Must leave CData section first (Illegal in HTML, but still) 432 if ( state.inCData ) 433 _printer.printText( "]]>" ); 434 _printer.printText( "</" ); 435 _printer.printText( state.rawName ); 436 _printer.printText( '>' ); 437 } 438 } 439 // Leave the element state and update that of the parent 440 // (if we're not root) to not empty and after element. 441 state = leaveElementState(); 442 // Temporary hack to prevent line breaks inside A/TD 443 if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) && 444 ! htmlName.equalsIgnoreCase( "TD" ) ) ) 445 446 state.afterElement = true; 447 state.empty = false; 448 if ( isDocumentState() ) 449 _printer.flush(); 450 } 451 452 453 //------------------------------------------// 454 // SAX document handler serializing methods // 455 //------------------------------------------// 456 457 458 public void characters( char[] chars, int start, int length ) 459 throws SAXException 460 { 461 ElementState state; 462 463 try { 464 // HTML: no CDATA section 465 state = content(); 466 state.doCData = false; 467 super.characters( chars, start, length ); 468 } catch ( IOException except ) { 469 throw new SAXException( except ); 470 } 471 } 472 473 474 public void startElement( String tagName, AttributeList attrs ) 475 throws SAXException 476 { 477 int i; 478 boolean preserveSpace; 479 ElementState state; 480 String name; 481 String value; 482 483 try { 484 if ( _printer == null ) 485 throw new IllegalStateException( 486 DOMMessageFormatter.formatMessage( 487 DOMMessageFormatter.SERIALIZER_DOMAIN, 488 "NoWriterSupplied", null)); 489 490 491 state = getElementState(); 492 if ( isDocumentState() ) { 493 // If this is the root element handle it differently. 494 // If the first root element in the document, serialize 495 // the document's DOCTYPE. Space preserving defaults 496 // to that of the output format. 497 if ( ! _started ) 498 startDocument( tagName ); 499 } else { 500 // For any other element, if first in parent, then 501 // close parent's opening tag and use the parnet's 502 // space preserving. 503 if ( state.empty ) 504 _printer.printText( '>' ); 505 // Indent this element on a new line if the first 506 // content of the parent element or immediately 507 // following an element. 508 if ( _indenting && ! state.preserveSpace && 509 ( state.empty || state.afterElement ) ) 510 _printer.breakLine(); 511 } 512 preserveSpace = state.preserveSpace; 513 514 // Do not change the current element state yet. 515 // This only happens in endElement(). 516 517 // XHTML: element names are lower case, DOM will be different 518 _printer.printText( '<' ); 519 if ( _xhtml ) 520 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) ); 521 else 522 _printer.printText( tagName ); 523 _printer.indent(); 524 525 // For each attribute serialize it's name and value as one part, 526 // separated with a space so the element can be broken on 527 // multiple lines. 528 if ( attrs != null ) { 529 for ( i = 0 ; i < attrs.getLength() ; ++i ) { 530 _printer.printSpace(); 531 name = attrs.getName( i ).toLowerCase(Locale.ENGLISH); 532 value = attrs.getValue( i ); 533 if ( _xhtml ) { 534 // XHTML: print empty string for null values. 535 if ( value == null ) { 536 _printer.printText( name ); 537 _printer.printText( "=\"\"" ); 538 } else { 539 _printer.printText( name ); 540 _printer.printText( "=\"" ); 541 printEscaped( value ); 542 _printer.printText( '"' ); 543 } 544 } else { 545 // HTML: Empty values print as attribute name, no value. 546 // HTML: URI attributes will print unescaped 547 if ( value == null ) { 548 value = ""; 549 } 550 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 551 _printer.printText( name ); 552 else if ( HTMLdtd.isURI( tagName, name ) ) { 553 _printer.printText( name ); 554 _printer.printText( "=\"" ); 555 _printer.printText( escapeURI( value ) ); 556 _printer.printText( '"' ); 557 } else if ( HTMLdtd.isBoolean( tagName, name ) ) 558 _printer.printText( name ); 559 else { 560 _printer.printText( name ); 561 _printer.printText( "=\"" ); 562 printEscaped( value ); 563 _printer.printText( '"' ); 564 } 565 } 566 } 567 } 568 if ( HTMLdtd.isPreserveSpace( tagName ) ) 569 preserveSpace = true; 570 571 // Now it's time to enter a new element state 572 // with the tag name and space preserving. 573 // We still do not change the curent element state. 574 state = enterElementState( null, null, tagName, preserveSpace ); 575 576 // Prevents line breaks inside A/TD 577 if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) { 578 state.empty = false; 579 _printer.printText( '>' ); 580 } 581 582 // Handle SCRIPT and STYLE specifically by changing the 583 // state of the current element to CDATA (XHTML) or 584 // unescaped (HTML). 585 if ( tagName.equalsIgnoreCase( "SCRIPT" ) || 586 tagName.equalsIgnoreCase( "STYLE" ) ) { 587 if ( _xhtml ) { 588 // XHTML: Print contents as CDATA section 589 state.doCData = true; 590 } else { 591 // HTML: Print contents unescaped 592 state.unescaped = true; 593 } 594 } 595 } catch ( IOException except ) { 596 throw new SAXException( except ); 597 } 598 } 599 600 601 public void endElement( String tagName ) 602 throws SAXException 603 { 604 endElement( null, null, tagName ); 605 } 606 607 608 //------------------------------------------// 609 // Generic node serializing methods methods // 610 //------------------------------------------// 611 612 613 /** 614 * Called to serialize the document's DOCTYPE by the root element. 615 * The document type declaration must name the root element, 616 * but the root element is only known when that element is serialized, 617 * and not at the start of the document. 618 * <p> 619 * This method will check if it has not been called before ({@link #_started}), 620 * will serialize the document type declaration, and will serialize all 621 * pre-root comments and PIs that were accumulated in the document 622 * (see {@link #serializePreRoot}). Pre-root will be serialized even if 623 * this is not the first root element of the document. 624 */ 625 protected void startDocument( String rootTagName ) 626 throws IOException 627 { 628 StringBuffer buffer; 629 630 // Not supported in HTML/XHTML, but we still have to switch 631 // out of DTD mode. 632 _printer.leaveDTD(); 633 if ( ! _started ) { 634 // If the public and system identifiers were not specified 635 // in the output format, use the appropriate ones for HTML 636 // or XHTML. 637 if ( _docTypePublicId == null && _docTypeSystemId == null ) { 638 if ( _xhtml ) { 639 _docTypePublicId = HTMLdtd.XHTMLPublicId; 640 _docTypeSystemId = HTMLdtd.XHTMLSystemId; 641 } else { 642 _docTypePublicId = HTMLdtd.HTMLPublicId; 643 _docTypeSystemId = HTMLdtd.HTMLSystemId; 644 } 645 } 646 647 if ( ! _format.getOmitDocumentType() ) { 648 // XHTML: If public identifier and system identifier 649 // specified, print them, else print just system identifier 650 // HTML: If public identifier specified, print it with 651 // system identifier, if specified. 652 // XHTML requires that all element names are lower case, so the 653 // root on the DOCTYPE must be 'html'. - mrglavas 654 if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null ) ) { 655 if (_xhtml) { 656 _printer.printText( "<!DOCTYPE html PUBLIC " ); 657 } 658 else { 659 _printer.printText( "<!DOCTYPE HTML PUBLIC " ); 660 } 661 printDoctypeURL( _docTypePublicId ); 662 if ( _docTypeSystemId != null ) { 663 if ( _indenting ) { 664 _printer.breakLine(); 665 _printer.printText( " " ); 666 } else 667 _printer.printText( ' ' ); 668 printDoctypeURL( _docTypeSystemId ); 669 } 670 _printer.printText( '>' ); 671 _printer.breakLine(); 672 } else if ( _docTypeSystemId != null ) { 673 if (_xhtml) { 674 _printer.printText( "<!DOCTYPE html SYSTEM " ); 675 } 676 else { 677 _printer.printText( "<!DOCTYPE HTML SYSTEM " ); 678 } 679 printDoctypeURL( _docTypeSystemId ); 680 _printer.printText( '>' ); 681 _printer.breakLine(); 682 } 683 } 684 } 685 686 _started = true; 687 // Always serialize these, even if not te first root element. 688 serializePreRoot(); 689 } 690 691 692 /** 693 * Called to serialize a DOM element. Equivalent to calling {@link 694 * #startElement}, {@link #endElement} and serializing everything 695 * inbetween, but better optimized. 696 */ 697 protected void serializeElement( Element elem ) 698 throws IOException 699 { 700 Attr attr; 701 NamedNodeMap attrMap; 702 int i; 703 Node child; 704 ElementState state; 705 boolean preserveSpace; 706 String name; 707 String value; 708 String tagName; 709 710 tagName = elem.getTagName(); 711 state = getElementState(); 712 if ( isDocumentState() ) { 713 // If this is the root element handle it differently. 714 // If the first root element in the document, serialize 715 // the document's DOCTYPE. Space preserving defaults 716 // to that of the output format. 717 if ( ! _started ) 718 startDocument( tagName ); 719 } else { 720 // For any other element, if first in parent, then 721 // close parent's opening tag and use the parnet's 722 // space preserving. 723 if ( state.empty ) 724 _printer.printText( '>' ); 725 // Indent this element on a new line if the first 726 // content of the parent element or immediately 727 // following an element. 728 if ( _indenting && ! state.preserveSpace && 729 ( state.empty || state.afterElement ) ) 730 _printer.breakLine(); 731 } 732 preserveSpace = state.preserveSpace; 733 734 // Do not change the current element state yet. 735 // This only happens in endElement(). 736 737 // XHTML: element names are lower case, DOM will be different 738 _printer.printText( '<' ); 739 if ( _xhtml ) 740 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) ); 741 else 742 _printer.printText( tagName ); 743 _printer.indent(); 744 745 // Lookup the element's attribute, but only print specified 746 // attributes. (Unspecified attributes are derived from the DTD. 747 // For each attribute print it's name and value as one part, 748 // separated with a space so the element can be broken on 749 // multiple lines. 750 attrMap = elem.getAttributes(); 751 if ( attrMap != null ) { 752 for ( i = 0 ; i < attrMap.getLength() ; ++i ) { 753 attr = (Attr) attrMap.item( i ); 754 name = attr.getName().toLowerCase(Locale.ENGLISH); 755 value = attr.getValue(); 756 if ( attr.getSpecified() ) { 757 _printer.printSpace(); 758 if ( _xhtml ) { 759 // XHTML: print empty string for null values. 760 if ( value == null ) { 761 _printer.printText( name ); 762 _printer.printText( "=\"\"" ); 763 } else { 764 _printer.printText( name ); 765 _printer.printText( "=\"" ); 766 printEscaped( value ); 767 _printer.printText( '"' ); 768 } 769 } else { 770 // HTML: Empty values print as attribute name, no value. 771 // HTML: URI attributes will print unescaped 772 if ( value == null ) { 773 value = ""; 774 } 775 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 776 _printer.printText( name ); 777 else if ( HTMLdtd.isURI( tagName, name ) ) { 778 _printer.printText( name ); 779 _printer.printText( "=\"" ); 780 _printer.printText( escapeURI( value ) ); 781 _printer.printText( '"' ); 782 } else if ( HTMLdtd.isBoolean( tagName, name ) ) 783 _printer.printText( name ); 784 else { 785 _printer.printText( name ); 786 _printer.printText( "=\"" ); 787 printEscaped( value ); 788 _printer.printText( '"' ); 789 } 790 } 791 } 792 } 793 } 794 if ( HTMLdtd.isPreserveSpace( tagName ) ) 795 preserveSpace = true; 796 797 // If element has children, or if element is not an empty tag, 798 // serialize an opening tag. 799 if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) { 800 // Enter an element state, and serialize the children 801 // one by one. Finally, end the element. 802 state = enterElementState( null, null, tagName, preserveSpace ); 803 804 // Prevents line breaks inside A/TD 805 if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) { 806 state.empty = false; 807 _printer.printText( '>' ); 808 } 809 810 // Handle SCRIPT and STYLE specifically by changing the 811 // state of the current element to CDATA (XHTML) or 812 // unescaped (HTML). 813 if ( tagName.equalsIgnoreCase( "SCRIPT" ) || 814 tagName.equalsIgnoreCase( "STYLE" ) ) { 815 if ( _xhtml ) { 816 // XHTML: Print contents as CDATA section 817 state.doCData = true; 818 } else { 819 // HTML: Print contents unescaped 820 state.unescaped = true; 821 } 822 } 823 child = elem.getFirstChild(); 824 while ( child != null ) { 825 serializeNode( child ); 826 child = child.getNextSibling(); 827 } 828 endElementIO( null, null, tagName ); 829 } else { 830 _printer.unindent(); 831 // XHTML: Close empty tag with ' />' so it's XML and HTML compatible. 832 // HTML: Empty tags are defined as such in DTD no in document. 833 if ( _xhtml ) 834 _printer.printText( " />" ); 835 else 836 _printer.printText( '>' ); 837 // After element but parent element is no longer empty. 838 state.afterElement = true; 839 state.empty = false; 840 if ( isDocumentState() ) 841 _printer.flush(); 842 } 843 } 844 845 846 847 protected void characters( String text ) 848 throws IOException 849 { 850 ElementState state; 851 852 // HTML: no CDATA section 853 state = content(); 854 super.characters( text ); 855 } 856 857 858 protected String getEntityRef( int ch ) 859 { 860 return HTMLdtd.fromChar( ch ); 861 } 862 863 864 protected String escapeURI( String uri ) 865 { 866 int index; 867 868 // XXX Apparently Netscape doesn't like if we escape the URI 869 // using %nn, so we leave it as is, just remove any quotes. 870 index = uri.indexOf( "\"" ); 871 if ( index >= 0 ) 872 return uri.substring( 0, index ); 873 else 874 return uri; 875 } 876 877 878 }