1 /*
   2  * reserved comment block
   3  * DO NOT REMOVE OR ALTER!
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 
  23 // Aug 21, 2000:
  24 //  Added ability to omit DOCTYPE declaration.
  25 //  Reported by Lars Martin <lars@smb-tec.com>
  26 // Aug 25, 2000:
  27 //  Added ability to omit comments.
  28 //  Contributed by Anupam Bagchi <abagchi@jtcsv.com>
  29 
  30 
  31 package com.sun.org.apache.xml.internal.serialize;
  32 
  33 
  34 import java.io.UnsupportedEncodingException;
  35 
  36 import org.w3c.dom.Document;
  37 import org.w3c.dom.DocumentType;
  38 import org.w3c.dom.Node;
  39 
  40 
  41 /**
  42  * Specifies an output format to control the serializer. Based on the
  43  * XSLT specification for output format, plus additional parameters.
  44  * Used to select the suitable serializer and determine how the
  45  * document should be formatted on output.
  46  * <p>
  47  * The two interesting constructors are:
  48  * <ul>
  49  * <li>{@link #OutputFormat(String,String,boolean)} creates a format
  50  *  for the specified method (XML, HTML, Text, etc), encoding and indentation
  51  * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
  52  *  compatible with the document type (XML, HTML, Text, etc), encoding and
  53  *  indentation
  54  * </ul>
  55  *
  56  *
  57  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  58  *         <a href="mailto:visco@intalio.com">Keith Visco</a>
  59  * @see Serializer
  60  * @see Method
  61  * @see LineSeparator
  62  *
  63  * @deprecated As of JDK 1.9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation
  64  * is replaced by that of Xalan. Main class
  65  * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced
  66  * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}.
  67  */
  68 public class OutputFormat
  69 {
  70 
  71 
  72     public static class DTD
  73     {
  74 
  75         /**
  76          * Public identifier for HTML 4.01 (Strict) document type.
  77          */
  78         public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN";
  79 
  80         /**
  81          * System identifier for HTML 4.01 (Strict) document type.
  82          */
  83         public static final String HTMLSystemId =
  84             "http://www.w3.org/TR/html4/strict.dtd";
  85 
  86         /**
  87          * Public identifier for XHTML 1.0 (Strict) document type.
  88          */
  89         public static final String XHTMLPublicId =
  90             "-//W3C//DTD XHTML 1.0 Strict//EN";
  91 
  92         /**
  93          * System identifier for XHTML 1.0 (Strict) document type.
  94          */
  95         public static final String XHTMLSystemId =
  96             "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
  97 
  98     }
  99 
 100 
 101     public static class Defaults
 102     {
 103 
 104         /**
 105          * If indentation is turned on, the default identation
 106          * level is 4.
 107          *
 108          * @see #setIndenting(boolean)
 109          */
 110         public static final int Indent = 4;
 111 
 112         /**
 113          * The default encoding for Web documents it UTF-8.
 114          *
 115          * @see #getEncoding()
 116          */
 117         public static final String Encoding = "UTF-8";
 118 
 119         /**
 120          * The default line width at which to break long lines
 121          * when identing. This is set to 72.
 122          */
 123         public static final int LineWidth = 72;
 124 
 125     }
 126 
 127 
 128     /**
 129      * Holds the output method specified for this document,
 130      * or null if no method was specified.
 131      */
 132     private String _method;
 133 
 134 
 135     /**
 136      * Specifies the version of the output method.
 137      */
 138     private String _version;
 139 
 140 
 141     /**
 142      * The indentation level, or zero if no indentation
 143      * was requested.
 144      */
 145     private int _indent = 0;
 146 
 147 
 148     /**
 149      * The encoding to use, if an input stream is used.
 150      * The default is always UTF-8.
 151      */
 152     private String _encoding = Defaults.Encoding;
 153 
 154     /**
 155      * The EncodingInfo instance for _encoding.
 156      */
 157     private EncodingInfo _encodingInfo = null;
 158 
 159     // whether java names for encodings are permitted
 160     private boolean _allowJavaNames = false;
 161 
 162     /**
 163      * The specified media type or null.
 164      */
 165     private String _mediaType;
 166 
 167 
 168     /**
 169      * The specified document type system identifier, or null.
 170      */
 171     private String _doctypeSystem;
 172 
 173 
 174     /**
 175      * The specified document type public identifier, or null.
 176      */
 177     private String _doctypePublic;
 178 
 179 
 180     /**
 181      * Ture if the XML declaration should be ommited;
 182      */
 183     private boolean _omitXmlDeclaration = false;
 184 
 185 
 186     /**
 187      * Ture if the DOCTYPE declaration should be ommited;
 188      */
 189     private boolean _omitDoctype = false;
 190 
 191 
 192     /**
 193      * Ture if comments should be ommited;
 194      */
 195     private boolean _omitComments = false;
 196 
 197 
 198     /**
 199      * Ture if the comments should be ommited;
 200      */
 201     private boolean _stripComments = false;
 202 
 203 
 204     /**
 205      * True if the document type should be marked as standalone.
 206      */
 207     private boolean _standalone = false;
 208 
 209 
 210     /**
 211      * List of element tag names whose text node children must
 212      * be output as CDATA.
 213      */
 214     private String[] _cdataElements;
 215 
 216 
 217     /**
 218      * List of element tag names whose text node children must
 219      * be output unescaped.
 220      */
 221     private String[] _nonEscapingElements;
 222 
 223 
 224     /**
 225      * The selected line separator.
 226      */
 227     private String _lineSeparator = LineSeparator.Web;
 228 
 229 
 230     /**
 231      * The line width at which to wrap long lines when indenting.
 232      */
 233     private int _lineWidth = Defaults.LineWidth;
 234 
 235 
 236     /**
 237      * True if spaces should be preserved in elements that do not
 238      * specify otherwise, or specify the default behavior.
 239      */
 240     private boolean _preserve = false;
 241         /** If true, an empty string valued attribute is output as "". If false and
 242          * and we are using the HTMLSerializer, then only the attribute name is
 243          * serialized. Defaults to false for backwards compatibility.
 244          */
 245         private boolean _preserveEmptyAttributes = false;
 246 
 247     /**
 248      * Constructs a new output format with the default values.
 249      */
 250     public OutputFormat()
 251     {
 252     }
 253 
 254 
 255     /**
 256      * Constructs a new output format with the default values for
 257      * the specified method and encoding. If <tt>indent</tt>
 258      * is true, the document will be pretty printed with the default
 259      * indentation level and default line wrapping.
 260      *
 261      * @param method The specified output method
 262      * @param encoding The specified encoding
 263      * @param indenting True for pretty printing
 264      * @see #setEncoding
 265      * @see #setIndenting
 266      * @see #setMethod
 267      */
 268     public OutputFormat( String method, String encoding, boolean indenting )
 269     {
 270         setMethod( method );
 271         setEncoding( encoding );
 272         setIndenting( indenting );
 273     }
 274 
 275     /**
 276      * Returns the method specified for this output format.
 277      * Typically the method will be <tt>xml</tt>, <tt>html</tt>
 278      * or <tt>text</tt>, but it might be other values.
 279      * If no method was specified, null will be returned
 280      * and the most suitable method will be determined for
 281      * the document by calling {@link #whichMethod}.
 282      *
 283      * @return The specified output method, or null
 284      */
 285     public String getMethod()
 286     {
 287         return _method;
 288     }
 289 
 290 
 291     /**
 292      * Sets the method for this output format.
 293      *
 294      * @see #getMethod
 295      * @param method The output method, or null
 296      */
 297     public void setMethod( String method )
 298     {
 299         _method = method;
 300     }
 301 
 302 
 303     /**
 304      * Returns the version for this output method.
 305      * If no version was specified, will return null
 306      * and the default version number will be used.
 307      * If the serializerr does not support that particular
 308      * version, it should default to a supported version.
 309      *
 310      * @return The specified method version, or null
 311      */
 312     public String getVersion()
 313     {
 314         return _version;
 315     }
 316 
 317 
 318     /**
 319      * Sets the version for this output method.
 320      * For XML the value would be "1.0", for HTML
 321      * it would be "4.0".
 322      *
 323      * @see #getVersion
 324      * @param version The output method version, or null
 325      */
 326     public void setVersion( String version )
 327     {
 328         _version = version;
 329     }
 330 
 331 
 332     /**
 333      * Returns the indentation specified. If no indentation
 334      * was specified, zero is returned and the document
 335      * should not be indented.
 336      *
 337      * @return The indentation or zero
 338      * @see #setIndenting
 339      */
 340     public int getIndent()
 341     {
 342         return _indent;
 343     }
 344 
 345 
 346     /**
 347      * Returns true if indentation was specified.
 348      */
 349     public boolean getIndenting()
 350     {
 351         return ( _indent > 0 );
 352     }
 353 
 354 
 355     /**
 356      * Sets the indentation. The document will not be
 357      * indented if the indentation is set to zero.
 358      * Calling {@link #setIndenting} will reset this
 359      * value to zero (off) or the default (on).
 360      *
 361      * @param indent The indentation, or zero
 362      */
 363     public void setIndent( int indent )
 364     {
 365         if ( indent < 0 )
 366             _indent = 0;
 367         else
 368             _indent = indent;
 369     }
 370 
 371 
 372     /**
 373      * Sets the indentation on and off. When set on, the default
 374      * indentation level and default line wrapping is used
 375      * (see {@link Defaults#Indent} and {@link Defaults#LineWidth}).
 376      * To specify a different indentation level or line wrapping,
 377      * use {@link #setIndent} and {@link #setLineWidth}.
 378      *
 379      * @param on True if indentation should be on
 380      */
 381     public void setIndenting( boolean on )
 382     {
 383         if ( on ) {
 384             _indent = Defaults.Indent;
 385             _lineWidth = Defaults.LineWidth;
 386         } else {
 387             _indent = 0;
 388             _lineWidth = 0;
 389         }
 390     }
 391 
 392 
 393     /**
 394      * Returns the specified encoding. If no encoding was
 395      * specified, the default is always "UTF-8".
 396      *
 397      * @return The encoding
 398      */
 399     public String getEncoding()
 400     {
 401         return _encoding;
 402     }
 403 
 404 
 405     /**
 406      * Sets the encoding for this output method. If no
 407      * encoding was specified, the default is always "UTF-8".
 408      * Make sure the encoding is compatible with the one
 409      * used by the {@link java.io.Writer}.
 410      *
 411      * @see #getEncoding
 412      * @param encoding The encoding, or null
 413      */
 414     public void setEncoding( String encoding )
 415     {
 416         _encoding = encoding;
 417         _encodingInfo = null;
 418     }
 419 
 420     /**
 421      * Sets the encoding for this output method with an <code>EncodingInfo</code>
 422      * instance.
 423      */
 424     public void setEncoding(EncodingInfo encInfo) {
 425         _encoding = encInfo.getIANAName();
 426         _encodingInfo = encInfo;
 427     }
 428 
 429     /**
 430      * Returns an <code>EncodingInfo<code> instance for the encoding.
 431      *
 432      * @see #setEncoding
 433      */
 434     public EncodingInfo getEncodingInfo() throws UnsupportedEncodingException {
 435         if (_encodingInfo == null)
 436             _encodingInfo = Encodings.getEncodingInfo(_encoding, _allowJavaNames);
 437         return _encodingInfo;
 438     }
 439 
 440     /**
 441      * Sets whether java encoding names are permitted
 442      */
 443     public void setAllowJavaNames (boolean allow) {
 444         _allowJavaNames = allow;
 445     }
 446 
 447     /**
 448      * Returns whether java encoding names are permitted
 449      */
 450     public boolean setAllowJavaNames () {
 451         return _allowJavaNames;
 452     }
 453 
 454     /**
 455      * Returns the specified media type, or null.
 456      * To determine the media type based on the
 457      * document type, use {@link #whichMediaType}.
 458      *
 459      * @return The specified media type, or null
 460      */
 461     public String getMediaType()
 462     {
 463         return _mediaType;
 464     }
 465 
 466 
 467     /**
 468      * Sets the media type.
 469      *
 470      * @see #getMediaType
 471      * @param mediaType The specified media type
 472      */
 473     public void setMediaType( String mediaType )
 474     {
 475         _mediaType = mediaType;
 476     }
 477 
 478 
 479     /**
 480      * Sets the document type public and system identifiers.
 481      * Required only if the DOM Document or SAX events do not
 482      * specify the document type, and one must be present in
 483      * the serialized document. Any document type specified
 484      * by the DOM Document or SAX events will override these
 485      * values.
 486      *
 487      * @param publicId The public identifier, or null
 488      * @param systemId The system identifier, or null
 489      */
 490     public void setDoctype( String publicId, String systemId )
 491     {
 492         _doctypePublic = publicId;
 493         _doctypeSystem = systemId;
 494     }
 495 
 496 
 497     /**
 498      * Returns the specified document type public identifier,
 499      * or null.
 500      */
 501     public String getDoctypePublic()
 502     {
 503         return _doctypePublic;
 504     }
 505 
 506 
 507     /**
 508      * Returns the specified document type system identifier,
 509      * or null.
 510      */
 511     public String getDoctypeSystem()
 512     {
 513         return _doctypeSystem;
 514     }
 515 
 516 
 517     /**
 518      * Returns true if comments should be ommited.
 519      * The default is false.
 520      */
 521     public boolean getOmitComments()
 522     {
 523         return _omitComments;
 524     }
 525 
 526 
 527     /**
 528      * Sets comment omitting on and off.
 529      *
 530      * @param omit True if comments should be ommited
 531      */
 532     public void setOmitComments( boolean omit )
 533     {
 534         _omitComments = omit;
 535     }
 536 
 537 
 538     /**
 539      * Returns true if the DOCTYPE declaration should
 540      * be ommited. The default is false.
 541      */
 542     public boolean getOmitDocumentType()
 543     {
 544         return _omitDoctype;
 545     }
 546 
 547 
 548     /**
 549      * Sets DOCTYPE declaration omitting on and off.
 550      *
 551      * @param omit True if DOCTYPE declaration should be ommited
 552      */
 553     public void setOmitDocumentType( boolean omit )
 554     {
 555         _omitDoctype = omit;
 556     }
 557 
 558 
 559     /**
 560      * Returns true if the XML document declaration should
 561      * be ommited. The default is false.
 562      */
 563     public boolean getOmitXMLDeclaration()
 564     {
 565         return _omitXmlDeclaration;
 566     }
 567 
 568 
 569     /**
 570      * Sets XML declaration omitting on and off.
 571      *
 572      * @param omit True if XML declaration should be ommited
 573      */
 574     public void setOmitXMLDeclaration( boolean omit )
 575     {
 576         _omitXmlDeclaration = omit;
 577     }
 578 
 579 
 580     /**
 581      * Returns true if the document type is standalone.
 582      * The default is false.
 583      */
 584     public boolean getStandalone()
 585     {
 586         return _standalone;
 587     }
 588 
 589 
 590     /**
 591      * Sets document DTD standalone. The public and system
 592      * identifiers must be null for the document to be
 593      * serialized as standalone.
 594      *
 595      * @param standalone True if document DTD is standalone
 596      */
 597     public void setStandalone( boolean standalone )
 598     {
 599         _standalone = standalone;
 600     }
 601 
 602 
 603     /**
 604      * Returns a list of all the elements whose text node children
 605      * should be output as CDATA, or null if no such elements were
 606      * specified.
 607      */
 608     public String[] getCDataElements()
 609     {
 610         return _cdataElements;
 611     }
 612 
 613 
 614     /**
 615      * Returns true if the text node children of the given elements
 616      * should be output as CDATA.
 617      *
 618      * @param tagName The element's tag name
 619      * @return True if should serialize as CDATA
 620      */
 621     public boolean isCDataElement( String tagName )
 622     {
 623         int i;
 624 
 625         if ( _cdataElements == null )
 626             return false;
 627         for ( i = 0 ; i < _cdataElements.length ; ++i )
 628             if ( _cdataElements[ i ].equals( tagName ) )
 629                 return true;
 630         return false;
 631     }
 632 
 633 
 634     /**
 635      * Sets the list of elements for which text node children
 636      * should be output as CDATA.
 637      *
 638      * @param cdataElements List of CDATA element tag names
 639      */
 640     public void setCDataElements( String[] cdataElements )
 641     {
 642         _cdataElements = cdataElements;
 643     }
 644 
 645 
 646     /**
 647      * Returns a list of all the elements whose text node children
 648      * should be output unescaped (no character references), or null
 649      * if no such elements were specified.
 650      */
 651     public String[] getNonEscapingElements()
 652     {
 653         return _nonEscapingElements;
 654     }
 655 
 656 
 657     /**
 658      * Returns true if the text node children of the given elements
 659      * should be output unescaped.
 660      *
 661      * @param tagName The element's tag name
 662      * @return True if should serialize unescaped
 663      */
 664     public boolean isNonEscapingElement( String tagName )
 665     {
 666         int i;
 667 
 668         if ( _nonEscapingElements == null ) {
 669             return false;
 670         }
 671         for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
 672             if ( _nonEscapingElements[ i ].equals( tagName ) )
 673                 return true;
 674         return false;
 675     }
 676 
 677 
 678     /**
 679      * Sets the list of elements for which text node children
 680      * should be output unescaped (no character references).
 681      *
 682      * @param nonEscapingElements List of unescaped element tag names
 683      */
 684     public void setNonEscapingElements( String[] nonEscapingElements )
 685     {
 686         _nonEscapingElements = nonEscapingElements;
 687     }
 688 
 689 
 690 
 691     /**
 692      * Returns a specific line separator to use. The default is the
 693      * Web line separator (<tt>\n</tt>). A string is returned to
 694      * support double codes (CR + LF).
 695      *
 696      * @return The specified line separator
 697      */
 698     public String getLineSeparator()
 699     {
 700         return _lineSeparator;
 701     }
 702 
 703 
 704     /**
 705      * Sets the line separator. The default is the Web line separator
 706      * (<tt>\n</tt>). The machine's line separator can be obtained
 707      * from the system property <tt>line.separator</tt>, but is only
 708      * useful if the document is edited on machines of the same type.
 709      * For general documents, use the Web line separator.
 710      *
 711      * @param lineSeparator The specified line separator
 712      */
 713     public void setLineSeparator( String lineSeparator )
 714     {
 715         if ( lineSeparator == null )
 716             _lineSeparator =  LineSeparator.Web;
 717         else
 718             _lineSeparator = lineSeparator;
 719     }
 720 
 721 
 722     /**
 723      * Returns true if the default behavior for this format is to
 724      * preserve spaces. All elements that do not specify otherwise
 725      * or specify the default behavior will be formatted based on
 726      * this rule. All elements that specify space preserving will
 727      * always preserve space.
 728      */
 729     public boolean getPreserveSpace()
 730     {
 731         return _preserve;
 732     }
 733 
 734 
 735     /**
 736      * Sets space preserving as the default behavior. The default is
 737      * space stripping and all elements that do not specify otherwise
 738      * or use the default value will not preserve spaces.
 739      *
 740      * @param preserve True if spaces should be preserved
 741      */
 742     public void setPreserveSpace( boolean preserve )
 743     {
 744         _preserve = preserve;
 745     }
 746 
 747 
 748     /**
 749      * Return the selected line width for breaking up long lines.
 750      * When indenting, and only when indenting, long lines will be
 751      * broken at space boundaries based on this line width.
 752      * No line wrapping occurs if this value is zero.
 753      */
 754     public int getLineWidth()
 755     {
 756         return _lineWidth;
 757     }
 758 
 759 
 760     /**
 761      * Sets the line width. If zero then no line wrapping will
 762      * occur. Calling {@link #setIndenting} will reset this
 763      * value to zero (off) or the default (on).
 764      *
 765      * @param lineWidth The line width to use, zero for default
 766      * @see #getLineWidth
 767      * @see #setIndenting
 768      */
 769     public void setLineWidth( int lineWidth )
 770     {
 771         if ( lineWidth <= 0 )
 772             _lineWidth = 0;
 773         else
 774             _lineWidth = lineWidth;
 775     }
 776         /**
 777          * Returns the preserveEmptyAttribute flag. If flag is false, then'
 778          * attributes with empty string values are output as the attribute
 779          * name only (in HTML mode).
 780          * @return preserve the preserve flag
 781          */     public boolean getPreserveEmptyAttributes () {          return _preserveEmptyAttributes;        }       /**
 782          * Sets the preserveEmptyAttribute flag. If flag is false, then'
 783          * attributes with empty string values are output as the attribute
 784          * name only (in HTML mode).
 785          * @param preserve the preserve flag
 786          */     public void setPreserveEmptyAttributes (boolean preserve) {             _preserveEmptyAttributes = preserve;    }
 787 
 788     /**
 789      * Returns the last printable character based on the selected
 790      * encoding. Control characters and non-printable characters
 791      * are always printed as character references.
 792      */
 793     public char getLastPrintable()
 794     {
 795         if ( getEncoding() != null &&
 796              ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
 797             return 0xFF;
 798         else
 799             return 0xFFFF;
 800     }
 801 
 802 
 803     /**
 804      * Returns the suitable media format for a document
 805      * output with the specified method.
 806      */
 807     public static String whichMediaType( String method )
 808     {
 809         if ( method.equalsIgnoreCase( Method.XML ) )
 810             return "text/xml";
 811         if ( method.equalsIgnoreCase( Method.HTML ) )
 812             return "text/html";
 813         if ( method.equalsIgnoreCase( Method.XHTML ) )
 814             return "text/html";
 815         if ( method.equalsIgnoreCase( Method.TEXT ) )
 816             return "text/plain";
 817         if ( method.equalsIgnoreCase( Method.FOP ) )
 818             return "application/pdf";
 819         return null;
 820     }
 821 
 822 
 823 }