19 * limitations under the License.
20 */
21
22
23 // Aug 21, 2000:
24 // Added ability to omit DOCTYPE declaration.
25 // Reported by Lars Martin <lars@smb-tec.com>
26 // Aug 25, 2000:
27 // Added ability to omit comments.
28 // Contributed by Anupam Bagchi <abagchi@jtcsv.com>
29
30
31 package com.sun.org.apache.xml.internal.serialize;
32
33
34 import java.io.UnsupportedEncodingException;
35
36 import org.w3c.dom.Document;
37 import org.w3c.dom.DocumentType;
38 import org.w3c.dom.Node;
39 import org.w3c.dom.html.HTMLDocument;
40
41
42 /**
43 * Specifies an output format to control the serializer. Based on the
44 * XSLT specification for output format, plus additional parameters.
45 * Used to select the suitable serializer and determine how the
46 * document should be formatted on output.
47 * <p>
48 * The two interesting constructors are:
49 * <ul>
50 * <li>{@link #OutputFormat(String,String,boolean)} creates a format
51 * for the specified method (XML, HTML, Text, etc), encoding and indentation
52 * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
53 * compatible with the document type (XML, HTML, Text, etc), encoding and
54 * indentation
55 * </ul>
56 *
57 *
58 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
59 * <a href="mailto:visco@intalio.com">Keith Visco</a>
256 /**
257 * Constructs a new output format with the default values for
258 * the specified method and encoding. If <tt>indent</tt>
259 * is true, the document will be pretty printed with the default
260 * indentation level and default line wrapping.
261 *
262 * @param method The specified output method
263 * @param encoding The specified encoding
264 * @param indenting True for pretty printing
265 * @see #setEncoding
266 * @see #setIndenting
267 * @see #setMethod
268 */
269 public OutputFormat( String method, String encoding, boolean indenting )
270 {
271 setMethod( method );
272 setEncoding( encoding );
273 setIndenting( indenting );
274 }
275
276
277 /**
278 * Constructs a new output format with the proper method,
279 * document type identifiers and media type for the specified
280 * document.
281 *
282 * @param doc The document to output
283 * @see #whichMethod
284 */
285 public OutputFormat( Document doc )
286 {
287 setMethod( whichMethod( doc ) );
288 setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
289 setMediaType( whichMediaType( getMethod() ) );
290 }
291
292
293 /**
294 * Constructs a new output format with the proper method,
295 * document type identifiers and media type for the specified
296 * document, and with the specified encoding. If <tt>indent</tt>
297 * is true, the document will be pretty printed with the default
298 * indentation level and default line wrapping.
299 *
300 * @param doc The document to output
301 * @param encoding The specified encoding
302 * @param indenting True for pretty printing
303 * @see #setEncoding
304 * @see #setIndenting
305 * @see #whichMethod
306 */
307 public OutputFormat( Document doc, String encoding, boolean indenting )
308 {
309 this( doc );
310 setEncoding( encoding );
311 setIndenting( indenting );
312 }
313
314
315 /**
316 * Returns the method specified for this output format.
317 * Typically the method will be <tt>xml</tt>, <tt>html</tt>
318 * or <tt>text</tt>, but it might be other values.
319 * If no method was specified, null will be returned
320 * and the most suitable method will be determined for
321 * the document by calling {@link #whichMethod}.
322 *
323 * @return The specified output method, or null
324 */
325 public String getMethod()
326 {
327 return _method;
328 }
329
330
331 /**
332 * Sets the method for this output format.
333 *
334 * @see #getMethod
820 * @return preserve the preserve flag
821 */ public boolean getPreserveEmptyAttributes () { return _preserveEmptyAttributes; } /**
822 * Sets the preserveEmptyAttribute flag. If flag is false, then'
823 * attributes with empty string values are output as the attribute
824 * name only (in HTML mode).
825 * @param preserve the preserve flag
826 */ public void setPreserveEmptyAttributes (boolean preserve) { _preserveEmptyAttributes = preserve; }
827
828 /**
829 * Returns the last printable character based on the selected
830 * encoding. Control characters and non-printable characters
831 * are always printed as character references.
832 */
833 public char getLastPrintable()
834 {
835 if ( getEncoding() != null &&
836 ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
837 return 0xFF;
838 else
839 return 0xFFFF;
840 }
841
842
843 /**
844 * Determine the output method for the specified document.
845 * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
846 * then the method is said to be <tt>html</tt>. If the root
847 * element is 'html' and all text nodes preceding the root
848 * element are all whitespace, then the method is said to be
849 * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
850 *
851 * @param doc The document to check
852 * @return The suitable method
853 */
854 public static String whichMethod( Document doc )
855 {
856 Node node;
857 String value;
858 int i;
859
860 // If document is derived from HTMLDocument then the default
861 // method is html.
862 if ( doc instanceof HTMLDocument )
863 return Method.HTML;
864
865 // Lookup the root element and the text nodes preceding it.
866 // If root element is html and all text nodes contain whitespace
867 // only, the method is html.
868
869 // FIXME (SM) should we care about namespaces here?
870
871 node = doc.getFirstChild();
872 while (node != null) {
873 // If the root element is html, the method is html.
874 if ( node.getNodeType() == Node.ELEMENT_NODE ) {
875 if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
876 return Method.HTML;
877 } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
878 return Method.FOP;
879 } else {
880 return Method.XML;
881 }
882 } else if ( node.getNodeType() == Node.TEXT_NODE ) {
883 // If a text node preceding the root element contains
884 // only whitespace, this might be html, otherwise it's
885 // definitely xml.
886 value = node.getNodeValue();
887 for ( i = 0 ; i < value.length() ; ++i )
888 if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
889 value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
890 return Method.XML;
891 }
892 node = node.getNextSibling();
893 }
894 // Anything else, the method is xml.
895 return Method.XML;
896 }
897
898
899 /**
900 * Returns the document type public identifier
901 * specified for this document, or null.
902 */
903 public static String whichDoctypePublic( Document doc )
904 {
905 DocumentType doctype;
906
907 /* DOM Level 2 was introduced into the code base*/
908 doctype = doc.getDoctype();
909 if ( doctype != null ) {
910 // Note on catch: DOM Level 1 does not specify this method
911 // and the code will throw a NoSuchMethodError
912 try {
913 return doctype.getPublicId();
914 } catch ( Error except ) { }
915 }
916
917 if ( doc instanceof HTMLDocument )
918 return DTD.XHTMLPublicId;
919 return null;
920 }
921
922
923 /**
924 * Returns the document type system identifier
925 * specified for this document, or null.
926 */
927 public static String whichDoctypeSystem( Document doc )
928 {
929 DocumentType doctype;
930
931 /* DOM Level 2 was introduced into the code base*/
932 doctype = doc.getDoctype();
933 if ( doctype != null ) {
934 // Note on catch: DOM Level 1 does not specify this method
935 // and the code will throw a NoSuchMethodError
936 try {
937 return doctype.getSystemId();
938 } catch ( Error except ) { }
939 }
940
941 if ( doc instanceof HTMLDocument )
942 return DTD.XHTMLSystemId;
943 return null;
944 }
945
946
947 /**
948 * Returns the suitable media format for a document
949 * output with the specified method.
950 */
951 public static String whichMediaType( String method )
952 {
953 if ( method.equalsIgnoreCase( Method.XML ) )
954 return "text/xml";
955 if ( method.equalsIgnoreCase( Method.HTML ) )
956 return "text/html";
957 if ( method.equalsIgnoreCase( Method.XHTML ) )
958 return "text/html";
959 if ( method.equalsIgnoreCase( Method.TEXT ) )
960 return "text/plain";
961 if ( method.equalsIgnoreCase( Method.FOP ) )
962 return "application/pdf";
963 return null;
|
19 * limitations under the License.
20 */
21
22
23 // Aug 21, 2000:
24 // Added ability to omit DOCTYPE declaration.
25 // Reported by Lars Martin <lars@smb-tec.com>
26 // Aug 25, 2000:
27 // Added ability to omit comments.
28 // Contributed by Anupam Bagchi <abagchi@jtcsv.com>
29
30
31 package com.sun.org.apache.xml.internal.serialize;
32
33
34 import java.io.UnsupportedEncodingException;
35
36 import org.w3c.dom.Document;
37 import org.w3c.dom.DocumentType;
38 import org.w3c.dom.Node;
39
40
41 /**
42 * Specifies an output format to control the serializer. Based on the
43 * XSLT specification for output format, plus additional parameters.
44 * Used to select the suitable serializer and determine how the
45 * document should be formatted on output.
46 * <p>
47 * The two interesting constructors are:
48 * <ul>
49 * <li>{@link #OutputFormat(String,String,boolean)} creates a format
50 * for the specified method (XML, HTML, Text, etc), encoding and indentation
51 * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
52 * compatible with the document type (XML, HTML, Text, etc), encoding and
53 * indentation
54 * </ul>
55 *
56 *
57 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
58 * <a href="mailto:visco@intalio.com">Keith Visco</a>
255 /**
256 * Constructs a new output format with the default values for
257 * the specified method and encoding. If <tt>indent</tt>
258 * is true, the document will be pretty printed with the default
259 * indentation level and default line wrapping.
260 *
261 * @param method The specified output method
262 * @param encoding The specified encoding
263 * @param indenting True for pretty printing
264 * @see #setEncoding
265 * @see #setIndenting
266 * @see #setMethod
267 */
268 public OutputFormat( String method, String encoding, boolean indenting )
269 {
270 setMethod( method );
271 setEncoding( encoding );
272 setIndenting( indenting );
273 }
274
275 /**
276 * Returns the method specified for this output format.
277 * Typically the method will be <tt>xml</tt>, <tt>html</tt>
278 * or <tt>text</tt>, but it might be other values.
279 * If no method was specified, null will be returned
280 * and the most suitable method will be determined for
281 * the document by calling {@link #whichMethod}.
282 *
283 * @return The specified output method, or null
284 */
285 public String getMethod()
286 {
287 return _method;
288 }
289
290
291 /**
292 * Sets the method for this output format.
293 *
294 * @see #getMethod
780 * @return preserve the preserve flag
781 */ public boolean getPreserveEmptyAttributes () { return _preserveEmptyAttributes; } /**
782 * Sets the preserveEmptyAttribute flag. If flag is false, then'
783 * attributes with empty string values are output as the attribute
784 * name only (in HTML mode).
785 * @param preserve the preserve flag
786 */ public void setPreserveEmptyAttributes (boolean preserve) { _preserveEmptyAttributes = preserve; }
787
788 /**
789 * Returns the last printable character based on the selected
790 * encoding. Control characters and non-printable characters
791 * are always printed as character references.
792 */
793 public char getLastPrintable()
794 {
795 if ( getEncoding() != null &&
796 ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
797 return 0xFF;
798 else
799 return 0xFFFF;
800 }
801
802
803 /**
804 * Returns the suitable media format for a document
805 * output with the specified method.
806 */
807 public static String whichMediaType( String method )
808 {
809 if ( method.equalsIgnoreCase( Method.XML ) )
810 return "text/xml";
811 if ( method.equalsIgnoreCase( Method.HTML ) )
812 return "text/html";
813 if ( method.equalsIgnoreCase( Method.XHTML ) )
814 return "text/html";
815 if ( method.equalsIgnoreCase( Method.TEXT ) )
816 return "text/plain";
817 if ( method.equalsIgnoreCase( Method.FOP ) )
818 return "application/pdf";
819 return null;
|