src/java.xml/share/classes/com/sun/org/apache/xml/internal/serialize/OutputFormat.java

Print this page




  19  * limitations under the License.
  20  */
  21 
  22 
  23 // Aug 21, 2000:
  24 //  Added ability to omit DOCTYPE declaration.
  25 //  Reported by Lars Martin <lars@smb-tec.com>
  26 // Aug 25, 2000:
  27 //  Added ability to omit comments.
  28 //  Contributed by Anupam Bagchi <abagchi@jtcsv.com>
  29 
  30 
  31 package com.sun.org.apache.xml.internal.serialize;
  32 
  33 
  34 import java.io.UnsupportedEncodingException;
  35 
  36 import org.w3c.dom.Document;
  37 import org.w3c.dom.DocumentType;
  38 import org.w3c.dom.Node;
  39 import org.w3c.dom.html.HTMLDocument;
  40 
  41 
  42 /**
  43  * Specifies an output format to control the serializer. Based on the
  44  * XSLT specification for output format, plus additional parameters.
  45  * Used to select the suitable serializer and determine how the
  46  * document should be formatted on output.
  47  * <p>
  48  * The two interesting constructors are:
  49  * <ul>
  50  * <li>{@link #OutputFormat(String,String,boolean)} creates a format
  51  *  for the specified method (XML, HTML, Text, etc), encoding and indentation
  52  * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
  53  *  compatible with the document type (XML, HTML, Text, etc), encoding and
  54  *  indentation
  55  * </ul>
  56  *
  57  *
  58  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  59  *         <a href="mailto:visco@intalio.com">Keith Visco</a>


 256     /**
 257      * Constructs a new output format with the default values for
 258      * the specified method and encoding. If <tt>indent</tt>
 259      * is true, the document will be pretty printed with the default
 260      * indentation level and default line wrapping.
 261      *
 262      * @param method The specified output method
 263      * @param encoding The specified encoding
 264      * @param indenting True for pretty printing
 265      * @see #setEncoding
 266      * @see #setIndenting
 267      * @see #setMethod
 268      */
 269     public OutputFormat( String method, String encoding, boolean indenting )
 270     {
 271         setMethod( method );
 272         setEncoding( encoding );
 273         setIndenting( indenting );
 274     }
 275 
 276 
 277     /**
 278      * Constructs a new output format with the proper method,
 279      * document type identifiers and media type for the specified
 280      * document.
 281      *
 282      * @param doc The document to output
 283      * @see #whichMethod
 284      */
 285     public OutputFormat( Document doc )
 286     {
 287         setMethod( whichMethod( doc ) );
 288         setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
 289         setMediaType( whichMediaType( getMethod() ) );
 290     }
 291 
 292 
 293     /**
 294      * Constructs a new output format with the proper method,
 295      * document type identifiers and media type for the specified
 296      * document, and with the specified encoding. If <tt>indent</tt>
 297      * is true, the document will be pretty printed with the default
 298      * indentation level and default line wrapping.
 299      *
 300      * @param doc The document to output
 301      * @param encoding The specified encoding
 302      * @param indenting True for pretty printing
 303      * @see #setEncoding
 304      * @see #setIndenting
 305      * @see #whichMethod
 306      */
 307     public OutputFormat( Document doc, String encoding, boolean indenting )
 308     {
 309         this( doc );
 310         setEncoding( encoding );
 311         setIndenting( indenting );
 312     }
 313 
 314 
 315     /**
 316      * Returns the method specified for this output format.
 317      * Typically the method will be <tt>xml</tt>, <tt>html</tt>
 318      * or <tt>text</tt>, but it might be other values.
 319      * If no method was specified, null will be returned
 320      * and the most suitable method will be determined for
 321      * the document by calling {@link #whichMethod}.
 322      *
 323      * @return The specified output method, or null
 324      */
 325     public String getMethod()
 326     {
 327         return _method;
 328     }
 329 
 330 
 331     /**
 332      * Sets the method for this output format.
 333      *
 334      * @see #getMethod


 820          * @return preserve the preserve flag
 821          */     public boolean getPreserveEmptyAttributes () {          return _preserveEmptyAttributes;        }       /**
 822          * Sets the preserveEmptyAttribute flag. If flag is false, then'
 823          * attributes with empty string values are output as the attribute
 824          * name only (in HTML mode).
 825          * @param preserve the preserve flag
 826          */     public void setPreserveEmptyAttributes (boolean preserve) {             _preserveEmptyAttributes = preserve;    }
 827 
 828     /**
 829      * Returns the last printable character based on the selected
 830      * encoding. Control characters and non-printable characters
 831      * are always printed as character references.
 832      */
 833     public char getLastPrintable()
 834     {
 835         if ( getEncoding() != null &&
 836              ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
 837             return 0xFF;
 838         else
 839             return 0xFFFF;
 840     }
 841 
 842 
 843     /**
 844      * Determine the output method for the specified document.
 845      * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
 846      * then the method is said to be <tt>html</tt>. If the root
 847      * element is 'html' and all text nodes preceding the root
 848      * element are all whitespace, then the method is said to be
 849      * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
 850      *
 851      * @param doc The document to check
 852      * @return The suitable method
 853      */
 854     public static String whichMethod( Document doc )
 855     {
 856         Node    node;
 857         String  value;
 858         int     i;
 859 
 860         // If document is derived from HTMLDocument then the default
 861         // method is html.
 862         if ( doc instanceof HTMLDocument )
 863             return Method.HTML;
 864 
 865         // Lookup the root element and the text nodes preceding it.
 866         // If root element is html and all text nodes contain whitespace
 867         // only, the method is html.
 868 
 869         // FIXME (SM) should we care about namespaces here?
 870 
 871         node = doc.getFirstChild();
 872         while (node != null) {
 873             // If the root element is html, the method is html.
 874             if ( node.getNodeType() == Node.ELEMENT_NODE ) {
 875                 if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
 876                     return Method.HTML;
 877                 } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
 878                     return Method.FOP;
 879                 } else {
 880                     return Method.XML;
 881                 }
 882             } else if ( node.getNodeType() == Node.TEXT_NODE ) {
 883                 // If a text node preceding the root element contains
 884                 // only whitespace, this might be html, otherwise it's
 885                 // definitely xml.
 886                 value = node.getNodeValue();
 887                 for ( i = 0 ; i < value.length() ; ++i )
 888                     if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
 889                          value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
 890                         return Method.XML;
 891             }
 892             node = node.getNextSibling();
 893         }
 894         // Anything else, the method is xml.
 895         return Method.XML;
 896     }
 897 
 898 
 899     /**
 900      * Returns the document type public identifier
 901      * specified for this document, or null.
 902      */
 903     public static String whichDoctypePublic( Document doc )
 904     {
 905         DocumentType doctype;
 906 
 907            /*  DOM Level 2 was introduced into the code base*/
 908            doctype = doc.getDoctype();
 909            if ( doctype != null ) {
 910            // Note on catch: DOM Level 1 does not specify this method
 911            // and the code will throw a NoSuchMethodError
 912            try {
 913            return doctype.getPublicId();
 914            } catch ( Error except ) {  }
 915            }
 916 
 917         if ( doc instanceof HTMLDocument )
 918             return DTD.XHTMLPublicId;
 919         return null;
 920     }
 921 
 922 
 923     /**
 924      * Returns the document type system identifier
 925      * specified for this document, or null.
 926      */
 927     public static String whichDoctypeSystem( Document doc )
 928     {
 929         DocumentType doctype;
 930 
 931         /* DOM Level 2 was introduced into the code base*/
 932            doctype = doc.getDoctype();
 933            if ( doctype != null ) {
 934            // Note on catch: DOM Level 1 does not specify this method
 935            // and the code will throw a NoSuchMethodError
 936            try {
 937            return doctype.getSystemId();
 938            } catch ( Error except ) { }
 939            }
 940 
 941         if ( doc instanceof HTMLDocument )
 942             return DTD.XHTMLSystemId;
 943         return null;
 944     }
 945 
 946 
 947     /**
 948      * Returns the suitable media format for a document
 949      * output with the specified method.
 950      */
 951     public static String whichMediaType( String method )
 952     {
 953         if ( method.equalsIgnoreCase( Method.XML ) )
 954             return "text/xml";
 955         if ( method.equalsIgnoreCase( Method.HTML ) )
 956             return "text/html";
 957         if ( method.equalsIgnoreCase( Method.XHTML ) )
 958             return "text/html";
 959         if ( method.equalsIgnoreCase( Method.TEXT ) )
 960             return "text/plain";
 961         if ( method.equalsIgnoreCase( Method.FOP ) )
 962             return "application/pdf";
 963         return null;


  19  * limitations under the License.
  20  */
  21 
  22 
  23 // Aug 21, 2000:
  24 //  Added ability to omit DOCTYPE declaration.
  25 //  Reported by Lars Martin <lars@smb-tec.com>
  26 // Aug 25, 2000:
  27 //  Added ability to omit comments.
  28 //  Contributed by Anupam Bagchi <abagchi@jtcsv.com>
  29 
  30 
  31 package com.sun.org.apache.xml.internal.serialize;
  32 
  33 
  34 import java.io.UnsupportedEncodingException;
  35 
  36 import org.w3c.dom.Document;
  37 import org.w3c.dom.DocumentType;
  38 import org.w3c.dom.Node;

  39 
  40 
  41 /**
  42  * Specifies an output format to control the serializer. Based on the
  43  * XSLT specification for output format, plus additional parameters.
  44  * Used to select the suitable serializer and determine how the
  45  * document should be formatted on output.
  46  * <p>
  47  * The two interesting constructors are:
  48  * <ul>
  49  * <li>{@link #OutputFormat(String,String,boolean)} creates a format
  50  *  for the specified method (XML, HTML, Text, etc), encoding and indentation
  51  * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
  52  *  compatible with the document type (XML, HTML, Text, etc), encoding and
  53  *  indentation
  54  * </ul>
  55  *
  56  *
  57  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  58  *         <a href="mailto:visco@intalio.com">Keith Visco</a>


 255     /**
 256      * Constructs a new output format with the default values for
 257      * the specified method and encoding. If <tt>indent</tt>
 258      * is true, the document will be pretty printed with the default
 259      * indentation level and default line wrapping.
 260      *
 261      * @param method The specified output method
 262      * @param encoding The specified encoding
 263      * @param indenting True for pretty printing
 264      * @see #setEncoding
 265      * @see #setIndenting
 266      * @see #setMethod
 267      */
 268     public OutputFormat( String method, String encoding, boolean indenting )
 269     {
 270         setMethod( method );
 271         setEncoding( encoding );
 272         setIndenting( indenting );
 273     }
 274 







































 275     /**
 276      * Returns the method specified for this output format.
 277      * Typically the method will be <tt>xml</tt>, <tt>html</tt>
 278      * or <tt>text</tt>, but it might be other values.
 279      * If no method was specified, null will be returned
 280      * and the most suitable method will be determined for
 281      * the document by calling {@link #whichMethod}.
 282      *
 283      * @return The specified output method, or null
 284      */
 285     public String getMethod()
 286     {
 287         return _method;
 288     }
 289 
 290 
 291     /**
 292      * Sets the method for this output format.
 293      *
 294      * @see #getMethod


 780          * @return preserve the preserve flag
 781          */     public boolean getPreserveEmptyAttributes () {          return _preserveEmptyAttributes;        }       /**
 782          * Sets the preserveEmptyAttribute flag. If flag is false, then'
 783          * attributes with empty string values are output as the attribute
 784          * name only (in HTML mode).
 785          * @param preserve the preserve flag
 786          */     public void setPreserveEmptyAttributes (boolean preserve) {             _preserveEmptyAttributes = preserve;    }
 787 
 788     /**
 789      * Returns the last printable character based on the selected
 790      * encoding. Control characters and non-printable characters
 791      * are always printed as character references.
 792      */
 793     public char getLastPrintable()
 794     {
 795         if ( getEncoding() != null &&
 796              ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
 797             return 0xFF;
 798         else
 799             return 0xFFFF;








































































































 800     }
 801 
 802 
 803     /**
 804      * Returns the suitable media format for a document
 805      * output with the specified method.
 806      */
 807     public static String whichMediaType( String method )
 808     {
 809         if ( method.equalsIgnoreCase( Method.XML ) )
 810             return "text/xml";
 811         if ( method.equalsIgnoreCase( Method.HTML ) )
 812             return "text/html";
 813         if ( method.equalsIgnoreCase( Method.XHTML ) )
 814             return "text/html";
 815         if ( method.equalsIgnoreCase( Method.TEXT ) )
 816             return "text/plain";
 817         if ( method.equalsIgnoreCase( Method.FOP ) )
 818             return "application/pdf";
 819         return null;