1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 23 // Aug 21, 2000: 24 // Added ability to omit DOCTYPE declaration. 25 // Reported by Lars Martin <lars@smb-tec.com> 26 // Aug 25, 2000: 27 // Added ability to omit comments. 28 // Contributed by Anupam Bagchi <abagchi@jtcsv.com> 29 30 31 package com.sun.org.apache.xml.internal.serialize; 32 33 34 import java.io.UnsupportedEncodingException; 35 36 import org.w3c.dom.Document; 37 import org.w3c.dom.DocumentType; 38 import org.w3c.dom.Node; 39 40 41 /** 42 * Specifies an output format to control the serializer. Based on the 43 * XSLT specification for output format, plus additional parameters. 44 * Used to select the suitable serializer and determine how the 45 * document should be formatted on output. 46 * <p> 47 * The two interesting constructors are: 48 * <ul> 49 * <li>{@link #OutputFormat(String,String,boolean)} creates a format 50 * for the specified method (XML, HTML, Text, etc), encoding and indentation 51 * <li>{@link #OutputFormat(Document,String,boolean)} creates a format 52 * compatible with the document type (XML, HTML, Text, etc), encoding and 53 * indentation 54 * </ul> 55 * 56 * 57 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 58 * <a href="mailto:visco@intalio.com">Keith Visco</a> 59 * @see Serializer 60 * @see Method 61 * @see LineSeparator 62 * 63 * @deprecated As of JDK 1.9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation 64 * is replaced by that of Xalan. Main class 65 * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced 66 * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}. 67 */ 68 public class OutputFormat 69 { 70 71 72 public static class DTD 73 { 74 75 /** 76 * Public identifier for HTML 4.01 (Strict) document type. 77 */ 78 public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN"; 79 80 /** 81 * System identifier for HTML 4.01 (Strict) document type. 82 */ 83 public static final String HTMLSystemId = 84 "http://www.w3.org/TR/html4/strict.dtd"; 85 86 /** 87 * Public identifier for XHTML 1.0 (Strict) document type. 88 */ 89 public static final String XHTMLPublicId = 90 "-//W3C//DTD XHTML 1.0 Strict//EN"; 91 92 /** 93 * System identifier for XHTML 1.0 (Strict) document type. 94 */ 95 public static final String XHTMLSystemId = 96 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; 97 98 } 99 100 101 public static class Defaults 102 { 103 104 /** 105 * If indentation is turned on, the default identation 106 * level is 4. 107 * 108 * @see #setIndenting(boolean) 109 */ 110 public static final int Indent = 4; 111 112 /** 113 * The default encoding for Web documents it UTF-8. 114 * 115 * @see #getEncoding() 116 */ 117 public static final String Encoding = "UTF-8"; 118 119 /** 120 * The default line width at which to break long lines 121 * when identing. This is set to 72. 122 */ 123 public static final int LineWidth = 72; 124 125 } 126 127 128 /** 129 * Holds the output method specified for this document, 130 * or null if no method was specified. 131 */ 132 private String _method; 133 134 135 /** 136 * Specifies the version of the output method. 137 */ 138 private String _version; 139 140 141 /** 142 * The indentation level, or zero if no indentation 143 * was requested. 144 */ 145 private int _indent = 0; 146 147 148 /** 149 * The encoding to use, if an input stream is used. 150 * The default is always UTF-8. 151 */ 152 private String _encoding = Defaults.Encoding; 153 154 /** 155 * The EncodingInfo instance for _encoding. 156 */ 157 private EncodingInfo _encodingInfo = null; 158 159 // whether java names for encodings are permitted 160 private boolean _allowJavaNames = false; 161 162 /** 163 * The specified media type or null. 164 */ 165 private String _mediaType; 166 167 168 /** 169 * The specified document type system identifier, or null. 170 */ 171 private String _doctypeSystem; 172 173 174 /** 175 * The specified document type public identifier, or null. 176 */ 177 private String _doctypePublic; 178 179 180 /** 181 * Ture if the XML declaration should be ommited; 182 */ 183 private boolean _omitXmlDeclaration = false; 184 185 186 /** 187 * Ture if the DOCTYPE declaration should be ommited; 188 */ 189 private boolean _omitDoctype = false; 190 191 192 /** 193 * Ture if comments should be ommited; 194 */ 195 private boolean _omitComments = false; 196 197 198 /** 199 * Ture if the comments should be ommited; 200 */ 201 private boolean _stripComments = false; 202 203 204 /** 205 * True if the document type should be marked as standalone. 206 */ 207 private boolean _standalone = false; 208 209 210 /** 211 * List of element tag names whose text node children must 212 * be output as CDATA. 213 */ 214 private String[] _cdataElements; 215 216 217 /** 218 * List of element tag names whose text node children must 219 * be output unescaped. 220 */ 221 private String[] _nonEscapingElements; 222 223 224 /** 225 * The selected line separator. 226 */ 227 private String _lineSeparator = LineSeparator.Web; 228 229 230 /** 231 * The line width at which to wrap long lines when indenting. 232 */ 233 private int _lineWidth = Defaults.LineWidth; 234 235 236 /** 237 * True if spaces should be preserved in elements that do not 238 * specify otherwise, or specify the default behavior. 239 */ 240 private boolean _preserve = false; 241 /** If true, an empty string valued attribute is output as "". If false and 242 * and we are using the HTMLSerializer, then only the attribute name is 243 * serialized. Defaults to false for backwards compatibility. 244 */ 245 private boolean _preserveEmptyAttributes = false; 246 247 /** 248 * Constructs a new output format with the default values. 249 */ 250 public OutputFormat() 251 { 252 } 253 254 255 /** 256 * Constructs a new output format with the default values for 257 * the specified method and encoding. If <tt>indent</tt> 258 * is true, the document will be pretty printed with the default 259 * indentation level and default line wrapping. 260 * 261 * @param method The specified output method 262 * @param encoding The specified encoding 263 * @param indenting True for pretty printing 264 * @see #setEncoding 265 * @see #setIndenting 266 * @see #setMethod 267 */ 268 public OutputFormat( String method, String encoding, boolean indenting ) 269 { 270 setMethod( method ); 271 setEncoding( encoding ); 272 setIndenting( indenting ); 273 } 274 275 /** 276 * Returns the method specified for this output format. 277 * Typically the method will be <tt>xml</tt>, <tt>html</tt> 278 * or <tt>text</tt>, but it might be other values. 279 * If no method was specified, null will be returned 280 * and the most suitable method will be determined for 281 * the document by calling {@link #whichMethod}. 282 * 283 * @return The specified output method, or null 284 */ 285 public String getMethod() 286 { 287 return _method; 288 } 289 290 291 /** 292 * Sets the method for this output format. 293 * 294 * @see #getMethod 295 * @param method The output method, or null 296 */ 297 public void setMethod( String method ) 298 { 299 _method = method; 300 } 301 302 303 /** 304 * Returns the version for this output method. 305 * If no version was specified, will return null 306 * and the default version number will be used. 307 * If the serializerr does not support that particular 308 * version, it should default to a supported version. 309 * 310 * @return The specified method version, or null 311 */ 312 public String getVersion() 313 { 314 return _version; 315 } 316 317 318 /** 319 * Sets the version for this output method. 320 * For XML the value would be "1.0", for HTML 321 * it would be "4.0". 322 * 323 * @see #getVersion 324 * @param version The output method version, or null 325 */ 326 public void setVersion( String version ) 327 { 328 _version = version; 329 } 330 331 332 /** 333 * Returns the indentation specified. If no indentation 334 * was specified, zero is returned and the document 335 * should not be indented. 336 * 337 * @return The indentation or zero 338 * @see #setIndenting 339 */ 340 public int getIndent() 341 { 342 return _indent; 343 } 344 345 346 /** 347 * Returns true if indentation was specified. 348 */ 349 public boolean getIndenting() 350 { 351 return ( _indent > 0 ); 352 } 353 354 355 /** 356 * Sets the indentation. The document will not be 357 * indented if the indentation is set to zero. 358 * Calling {@link #setIndenting} will reset this 359 * value to zero (off) or the default (on). 360 * 361 * @param indent The indentation, or zero 362 */ 363 public void setIndent( int indent ) 364 { 365 if ( indent < 0 ) 366 _indent = 0; 367 else 368 _indent = indent; 369 } 370 371 372 /** 373 * Sets the indentation on and off. When set on, the default 374 * indentation level and default line wrapping is used 375 * (see {@link Defaults#Indent} and {@link Defaults#LineWidth}). 376 * To specify a different indentation level or line wrapping, 377 * use {@link #setIndent} and {@link #setLineWidth}. 378 * 379 * @param on True if indentation should be on 380 */ 381 public void setIndenting( boolean on ) 382 { 383 if ( on ) { 384 _indent = Defaults.Indent; 385 _lineWidth = Defaults.LineWidth; 386 } else { 387 _indent = 0; 388 _lineWidth = 0; 389 } 390 } 391 392 393 /** 394 * Returns the specified encoding. If no encoding was 395 * specified, the default is always "UTF-8". 396 * 397 * @return The encoding 398 */ 399 public String getEncoding() 400 { 401 return _encoding; 402 } 403 404 405 /** 406 * Sets the encoding for this output method. If no 407 * encoding was specified, the default is always "UTF-8". 408 * Make sure the encoding is compatible with the one 409 * used by the {@link java.io.Writer}. 410 * 411 * @see #getEncoding 412 * @param encoding The encoding, or null 413 */ 414 public void setEncoding( String encoding ) 415 { 416 _encoding = encoding; 417 _encodingInfo = null; 418 } 419 420 /** 421 * Sets the encoding for this output method with an <code>EncodingInfo</code> 422 * instance. 423 */ 424 public void setEncoding(EncodingInfo encInfo) { 425 _encoding = encInfo.getIANAName(); 426 _encodingInfo = encInfo; 427 } 428 429 /** 430 * Returns an <code>EncodingInfo<code> instance for the encoding. 431 * 432 * @see #setEncoding 433 */ 434 public EncodingInfo getEncodingInfo() throws UnsupportedEncodingException { 435 if (_encodingInfo == null) 436 _encodingInfo = Encodings.getEncodingInfo(_encoding, _allowJavaNames); 437 return _encodingInfo; 438 } 439 440 /** 441 * Sets whether java encoding names are permitted 442 */ 443 public void setAllowJavaNames (boolean allow) { 444 _allowJavaNames = allow; 445 } 446 447 /** 448 * Returns whether java encoding names are permitted 449 */ 450 public boolean setAllowJavaNames () { 451 return _allowJavaNames; 452 } 453 454 /** 455 * Returns the specified media type, or null. 456 * To determine the media type based on the 457 * document type, use {@link #whichMediaType}. 458 * 459 * @return The specified media type, or null 460 */ 461 public String getMediaType() 462 { 463 return _mediaType; 464 } 465 466 467 /** 468 * Sets the media type. 469 * 470 * @see #getMediaType 471 * @param mediaType The specified media type 472 */ 473 public void setMediaType( String mediaType ) 474 { 475 _mediaType = mediaType; 476 } 477 478 479 /** 480 * Sets the document type public and system identifiers. 481 * Required only if the DOM Document or SAX events do not 482 * specify the document type, and one must be present in 483 * the serialized document. Any document type specified 484 * by the DOM Document or SAX events will override these 485 * values. 486 * 487 * @param publicId The public identifier, or null 488 * @param systemId The system identifier, or null 489 */ 490 public void setDoctype( String publicId, String systemId ) 491 { 492 _doctypePublic = publicId; 493 _doctypeSystem = systemId; 494 } 495 496 497 /** 498 * Returns the specified document type public identifier, 499 * or null. 500 */ 501 public String getDoctypePublic() 502 { 503 return _doctypePublic; 504 } 505 506 507 /** 508 * Returns the specified document type system identifier, 509 * or null. 510 */ 511 public String getDoctypeSystem() 512 { 513 return _doctypeSystem; 514 } 515 516 517 /** 518 * Returns true if comments should be ommited. 519 * The default is false. 520 */ 521 public boolean getOmitComments() 522 { 523 return _omitComments; 524 } 525 526 527 /** 528 * Sets comment omitting on and off. 529 * 530 * @param omit True if comments should be ommited 531 */ 532 public void setOmitComments( boolean omit ) 533 { 534 _omitComments = omit; 535 } 536 537 538 /** 539 * Returns true if the DOCTYPE declaration should 540 * be ommited. The default is false. 541 */ 542 public boolean getOmitDocumentType() 543 { 544 return _omitDoctype; 545 } 546 547 548 /** 549 * Sets DOCTYPE declaration omitting on and off. 550 * 551 * @param omit True if DOCTYPE declaration should be ommited 552 */ 553 public void setOmitDocumentType( boolean omit ) 554 { 555 _omitDoctype = omit; 556 } 557 558 559 /** 560 * Returns true if the XML document declaration should 561 * be ommited. The default is false. 562 */ 563 public boolean getOmitXMLDeclaration() 564 { 565 return _omitXmlDeclaration; 566 } 567 568 569 /** 570 * Sets XML declaration omitting on and off. 571 * 572 * @param omit True if XML declaration should be ommited 573 */ 574 public void setOmitXMLDeclaration( boolean omit ) 575 { 576 _omitXmlDeclaration = omit; 577 } 578 579 580 /** 581 * Returns true if the document type is standalone. 582 * The default is false. 583 */ 584 public boolean getStandalone() 585 { 586 return _standalone; 587 } 588 589 590 /** 591 * Sets document DTD standalone. The public and system 592 * identifiers must be null for the document to be 593 * serialized as standalone. 594 * 595 * @param standalone True if document DTD is standalone 596 */ 597 public void setStandalone( boolean standalone ) 598 { 599 _standalone = standalone; 600 } 601 602 603 /** 604 * Returns a list of all the elements whose text node children 605 * should be output as CDATA, or null if no such elements were 606 * specified. 607 */ 608 public String[] getCDataElements() 609 { 610 return _cdataElements; 611 } 612 613 614 /** 615 * Returns true if the text node children of the given elements 616 * should be output as CDATA. 617 * 618 * @param tagName The element's tag name 619 * @return True if should serialize as CDATA 620 */ 621 public boolean isCDataElement( String tagName ) 622 { 623 int i; 624 625 if ( _cdataElements == null ) 626 return false; 627 for ( i = 0 ; i < _cdataElements.length ; ++i ) 628 if ( _cdataElements[ i ].equals( tagName ) ) 629 return true; 630 return false; 631 } 632 633 634 /** 635 * Sets the list of elements for which text node children 636 * should be output as CDATA. 637 * 638 * @param cdataElements List of CDATA element tag names 639 */ 640 public void setCDataElements( String[] cdataElements ) 641 { 642 _cdataElements = cdataElements; 643 } 644 645 646 /** 647 * Returns a list of all the elements whose text node children 648 * should be output unescaped (no character references), or null 649 * if no such elements were specified. 650 */ 651 public String[] getNonEscapingElements() 652 { 653 return _nonEscapingElements; 654 } 655 656 657 /** 658 * Returns true if the text node children of the given elements 659 * should be output unescaped. 660 * 661 * @param tagName The element's tag name 662 * @return True if should serialize unescaped 663 */ 664 public boolean isNonEscapingElement( String tagName ) 665 { 666 int i; 667 668 if ( _nonEscapingElements == null ) { 669 return false; 670 } 671 for ( i = 0 ; i < _nonEscapingElements.length ; ++i ) 672 if ( _nonEscapingElements[ i ].equals( tagName ) ) 673 return true; 674 return false; 675 } 676 677 678 /** 679 * Sets the list of elements for which text node children 680 * should be output unescaped (no character references). 681 * 682 * @param nonEscapingElements List of unescaped element tag names 683 */ 684 public void setNonEscapingElements( String[] nonEscapingElements ) 685 { 686 _nonEscapingElements = nonEscapingElements; 687 } 688 689 690 691 /** 692 * Returns a specific line separator to use. The default is the 693 * Web line separator (<tt>\n</tt>). A string is returned to 694 * support double codes (CR + LF). 695 * 696 * @return The specified line separator 697 */ 698 public String getLineSeparator() 699 { 700 return _lineSeparator; 701 } 702 703 704 /** 705 * Sets the line separator. The default is the Web line separator 706 * (<tt>\n</tt>). The machine's line separator can be obtained 707 * from the system property <tt>line.separator</tt>, but is only 708 * useful if the document is edited on machines of the same type. 709 * For general documents, use the Web line separator. 710 * 711 * @param lineSeparator The specified line separator 712 */ 713 public void setLineSeparator( String lineSeparator ) 714 { 715 if ( lineSeparator == null ) 716 _lineSeparator = LineSeparator.Web; 717 else 718 _lineSeparator = lineSeparator; 719 } 720 721 722 /** 723 * Returns true if the default behavior for this format is to 724 * preserve spaces. All elements that do not specify otherwise 725 * or specify the default behavior will be formatted based on 726 * this rule. All elements that specify space preserving will 727 * always preserve space. 728 */ 729 public boolean getPreserveSpace() 730 { 731 return _preserve; 732 } 733 734 735 /** 736 * Sets space preserving as the default behavior. The default is 737 * space stripping and all elements that do not specify otherwise 738 * or use the default value will not preserve spaces. 739 * 740 * @param preserve True if spaces should be preserved 741 */ 742 public void setPreserveSpace( boolean preserve ) 743 { 744 _preserve = preserve; 745 } 746 747 748 /** 749 * Return the selected line width for breaking up long lines. 750 * When indenting, and only when indenting, long lines will be 751 * broken at space boundaries based on this line width. 752 * No line wrapping occurs if this value is zero. 753 */ 754 public int getLineWidth() 755 { 756 return _lineWidth; 757 } 758 759 760 /** 761 * Sets the line width. If zero then no line wrapping will 762 * occur. Calling {@link #setIndenting} will reset this 763 * value to zero (off) or the default (on). 764 * 765 * @param lineWidth The line width to use, zero for default 766 * @see #getLineWidth 767 * @see #setIndenting 768 */ 769 public void setLineWidth( int lineWidth ) 770 { 771 if ( lineWidth <= 0 ) 772 _lineWidth = 0; 773 else 774 _lineWidth = lineWidth; 775 } 776 /** 777 * Returns the preserveEmptyAttribute flag. If flag is false, then' 778 * attributes with empty string values are output as the attribute 779 * name only (in HTML mode). 780 * @return preserve the preserve flag 781 */ public boolean getPreserveEmptyAttributes () { return _preserveEmptyAttributes; } /** 782 * Sets the preserveEmptyAttribute flag. If flag is false, then' 783 * attributes with empty string values are output as the attribute 784 * name only (in HTML mode). 785 * @param preserve the preserve flag 786 */ public void setPreserveEmptyAttributes (boolean preserve) { _preserveEmptyAttributes = preserve; } 787 788 /** 789 * Returns the last printable character based on the selected 790 * encoding. Control characters and non-printable characters 791 * are always printed as character references. 792 */ 793 public char getLastPrintable() 794 { 795 if ( getEncoding() != null && 796 ( getEncoding().equalsIgnoreCase( "ASCII" ) ) ) 797 return 0xFF; 798 else 799 return 0xFFFF; 800 } 801 802 803 /** 804 * Returns the suitable media format for a document 805 * output with the specified method. 806 */ 807 public static String whichMediaType( String method ) 808 { 809 if ( method.equalsIgnoreCase( Method.XML ) ) 810 return "text/xml"; 811 if ( method.equalsIgnoreCase( Method.HTML ) ) 812 return "text/html"; 813 if ( method.equalsIgnoreCase( Method.XHTML ) ) 814 return "text/html"; 815 if ( method.equalsIgnoreCase( Method.TEXT ) ) 816 return "text/plain"; 817 if ( method.equalsIgnoreCase( Method.FOP ) ) 818 return "application/pdf"; 819 return null; 820 } 821 822 823 }