1 /*
   2  * reserved comment block
   3  * DO NOT REMOVE OR ALTER!
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 
  23 // Aug 21, 2000:
  24 //  Added ability to omit DOCTYPE declaration.
  25 //  Reported by Lars Martin <lars@smb-tec.com>
  26 // Aug 25, 2000:
  27 //  Added ability to omit comments.
  28 //  Contributed by Anupam Bagchi <abagchi@jtcsv.com>
  29 
  30 
  31 package com.sun.org.apache.xml.internal.serialize;
  32 
  33 
  34 import java.io.UnsupportedEncodingException;
  35 
  36 import org.w3c.dom.Document;
  37 import org.w3c.dom.DocumentType;
  38 import org.w3c.dom.Node;
  39 import org.w3c.dom.html.HTMLDocument;
  40 
  41 
  42 /**
  43  * Specifies an output format to control the serializer. Based on the
  44  * XSLT specification for output format, plus additional parameters.
  45  * Used to select the suitable serializer and determine how the
  46  * document should be formatted on output.
  47  * <p>
  48  * The two interesting constructors are:
  49  * <ul>
  50  * <li>{@link #OutputFormat(String,String,boolean)} creates a format
  51  *  for the specified method (XML, HTML, Text, etc), encoding and indentation
  52  * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
  53  *  compatible with the document type (XML, HTML, Text, etc), encoding and
  54  *  indentation
  55  * </ul>
  56  *
  57  *
  58  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  59  *         <a href="mailto:visco@intalio.com">Keith Visco</a>
  60  * @see Serializer
  61  * @see Method
  62  * @see LineSeparator
  63  *
  64  * @deprecated As of JDK 1.9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation
  65  * is replaced by that of Xalan. Main class
  66  * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced
  67  * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}.
  68  */
  69 public class OutputFormat
  70 {
  71 
  72 
  73     public static class DTD
  74     {
  75 
  76         /**
  77          * Public identifier for HTML 4.01 (Strict) document type.
  78          */
  79         public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN";
  80 
  81         /**
  82          * System identifier for HTML 4.01 (Strict) document type.
  83          */
  84         public static final String HTMLSystemId =
  85             "http://www.w3.org/TR/html4/strict.dtd";
  86 
  87         /**
  88          * Public identifier for XHTML 1.0 (Strict) document type.
  89          */
  90         public static final String XHTMLPublicId =
  91             "-//W3C//DTD XHTML 1.0 Strict//EN";
  92 
  93         /**
  94          * System identifier for XHTML 1.0 (Strict) document type.
  95          */
  96         public static final String XHTMLSystemId =
  97             "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
  98 
  99     }
 100 
 101 
 102     public static class Defaults
 103     {
 104 
 105         /**
 106          * If indentation is turned on, the default identation
 107          * level is 4.
 108          *
 109          * @see #setIndenting(boolean)
 110          */
 111         public static final int Indent = 4;
 112 
 113         /**
 114          * The default encoding for Web documents it UTF-8.
 115          *
 116          * @see #getEncoding()
 117          */
 118         public static final String Encoding = "UTF-8";
 119 
 120         /**
 121          * The default line width at which to break long lines
 122          * when identing. This is set to 72.
 123          */
 124         public static final int LineWidth = 72;
 125 
 126     }
 127 
 128 
 129     /**
 130      * Holds the output method specified for this document,
 131      * or null if no method was specified.
 132      */
 133     private String _method;
 134 
 135 
 136     /**
 137      * Specifies the version of the output method.
 138      */
 139     private String _version;
 140 
 141 
 142     /**
 143      * The indentation level, or zero if no indentation
 144      * was requested.
 145      */
 146     private int _indent = 0;
 147 
 148 
 149     /**
 150      * The encoding to use, if an input stream is used.
 151      * The default is always UTF-8.
 152      */
 153     private String _encoding = Defaults.Encoding;
 154 
 155     /**
 156      * The EncodingInfo instance for _encoding.
 157      */
 158     private EncodingInfo _encodingInfo = null;
 159 
 160     // whether java names for encodings are permitted
 161     private boolean _allowJavaNames = false;
 162 
 163     /**
 164      * The specified media type or null.
 165      */
 166     private String _mediaType;
 167 
 168 
 169     /**
 170      * The specified document type system identifier, or null.
 171      */
 172     private String _doctypeSystem;
 173 
 174 
 175     /**
 176      * The specified document type public identifier, or null.
 177      */
 178     private String _doctypePublic;
 179 
 180 
 181     /**
 182      * Ture if the XML declaration should be ommited;
 183      */
 184     private boolean _omitXmlDeclaration = false;
 185 
 186 
 187     /**
 188      * Ture if the DOCTYPE declaration should be ommited;
 189      */
 190     private boolean _omitDoctype = false;
 191 
 192 
 193     /**
 194      * Ture if comments should be ommited;
 195      */
 196     private boolean _omitComments = false;
 197 
 198 
 199     /**
 200      * Ture if the comments should be ommited;
 201      */
 202     private boolean _stripComments = false;
 203 
 204 
 205     /**
 206      * True if the document type should be marked as standalone.
 207      */
 208     private boolean _standalone = false;
 209 
 210 
 211     /**
 212      * List of element tag names whose text node children must
 213      * be output as CDATA.
 214      */
 215     private String[] _cdataElements;
 216 
 217 
 218     /**
 219      * List of element tag names whose text node children must
 220      * be output unescaped.
 221      */
 222     private String[] _nonEscapingElements;
 223 
 224 
 225     /**
 226      * The selected line separator.
 227      */
 228     private String _lineSeparator = LineSeparator.Web;
 229 
 230 
 231     /**
 232      * The line width at which to wrap long lines when indenting.
 233      */
 234     private int _lineWidth = Defaults.LineWidth;
 235 
 236 
 237     /**
 238      * True if spaces should be preserved in elements that do not
 239      * specify otherwise, or specify the default behavior.
 240      */
 241     private boolean _preserve = false;
 242         /** If true, an empty string valued attribute is output as "". If false and
 243          * and we are using the HTMLSerializer, then only the attribute name is
 244          * serialized. Defaults to false for backwards compatibility.
 245          */
 246         private boolean _preserveEmptyAttributes = false;
 247 
 248     /**
 249      * Constructs a new output format with the default values.
 250      */
 251     public OutputFormat()
 252     {
 253     }
 254 
 255 
 256     /**
 257      * Constructs a new output format with the default values for
 258      * the specified method and encoding. If <tt>indent</tt>
 259      * is true, the document will be pretty printed with the default
 260      * indentation level and default line wrapping.
 261      *
 262      * @param method The specified output method
 263      * @param encoding The specified encoding
 264      * @param indenting True for pretty printing
 265      * @see #setEncoding
 266      * @see #setIndenting
 267      * @see #setMethod
 268      */
 269     public OutputFormat( String method, String encoding, boolean indenting )
 270     {
 271         setMethod( method );
 272         setEncoding( encoding );
 273         setIndenting( indenting );
 274     }
 275 
 276 
 277     /**
 278      * Constructs a new output format with the proper method,
 279      * document type identifiers and media type for the specified
 280      * document.
 281      *
 282      * @param doc The document to output
 283      * @see #whichMethod
 284      */
 285     public OutputFormat( Document doc )
 286     {
 287         setMethod( whichMethod( doc ) );
 288         setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
 289         setMediaType( whichMediaType( getMethod() ) );
 290     }
 291 
 292 
 293     /**
 294      * Constructs a new output format with the proper method,
 295      * document type identifiers and media type for the specified
 296      * document, and with the specified encoding. If <tt>indent</tt>
 297      * is true, the document will be pretty printed with the default
 298      * indentation level and default line wrapping.
 299      *
 300      * @param doc The document to output
 301      * @param encoding The specified encoding
 302      * @param indenting True for pretty printing
 303      * @see #setEncoding
 304      * @see #setIndenting
 305      * @see #whichMethod
 306      */
 307     public OutputFormat( Document doc, String encoding, boolean indenting )
 308     {
 309         this( doc );
 310         setEncoding( encoding );
 311         setIndenting( indenting );
 312     }
 313 
 314 
 315     /**
 316      * Returns the method specified for this output format.
 317      * Typically the method will be <tt>xml</tt>, <tt>html</tt>
 318      * or <tt>text</tt>, but it might be other values.
 319      * If no method was specified, null will be returned
 320      * and the most suitable method will be determined for
 321      * the document by calling {@link #whichMethod}.
 322      *
 323      * @return The specified output method, or null
 324      */
 325     public String getMethod()
 326     {
 327         return _method;
 328     }
 329 
 330 
 331     /**
 332      * Sets the method for this output format.
 333      *
 334      * @see #getMethod
 335      * @param method The output method, or null
 336      */
 337     public void setMethod( String method )
 338     {
 339         _method = method;
 340     }
 341 
 342 
 343     /**
 344      * Returns the version for this output method.
 345      * If no version was specified, will return null
 346      * and the default version number will be used.
 347      * If the serializerr does not support that particular
 348      * version, it should default to a supported version.
 349      *
 350      * @return The specified method version, or null
 351      */
 352     public String getVersion()
 353     {
 354         return _version;
 355     }
 356 
 357 
 358     /**
 359      * Sets the version for this output method.
 360      * For XML the value would be "1.0", for HTML
 361      * it would be "4.0".
 362      *
 363      * @see #getVersion
 364      * @param version The output method version, or null
 365      */
 366     public void setVersion( String version )
 367     {
 368         _version = version;
 369     }
 370 
 371 
 372     /**
 373      * Returns the indentation specified. If no indentation
 374      * was specified, zero is returned and the document
 375      * should not be indented.
 376      *
 377      * @return The indentation or zero
 378      * @see #setIndenting
 379      */
 380     public int getIndent()
 381     {
 382         return _indent;
 383     }
 384 
 385 
 386     /**
 387      * Returns true if indentation was specified.
 388      */
 389     public boolean getIndenting()
 390     {
 391         return ( _indent > 0 );
 392     }
 393 
 394 
 395     /**
 396      * Sets the indentation. The document will not be
 397      * indented if the indentation is set to zero.
 398      * Calling {@link #setIndenting} will reset this
 399      * value to zero (off) or the default (on).
 400      *
 401      * @param indent The indentation, or zero
 402      */
 403     public void setIndent( int indent )
 404     {
 405         if ( indent < 0 )
 406             _indent = 0;
 407         else
 408             _indent = indent;
 409     }
 410 
 411 
 412     /**
 413      * Sets the indentation on and off. When set on, the default
 414      * indentation level and default line wrapping is used
 415      * (see {@link Defaults#Indent} and {@link Defaults#LineWidth}).
 416      * To specify a different indentation level or line wrapping,
 417      * use {@link #setIndent} and {@link #setLineWidth}.
 418      *
 419      * @param on True if indentation should be on
 420      */
 421     public void setIndenting( boolean on )
 422     {
 423         if ( on ) {
 424             _indent = Defaults.Indent;
 425             _lineWidth = Defaults.LineWidth;
 426         } else {
 427             _indent = 0;
 428             _lineWidth = 0;
 429         }
 430     }
 431 
 432 
 433     /**
 434      * Returns the specified encoding. If no encoding was
 435      * specified, the default is always "UTF-8".
 436      *
 437      * @return The encoding
 438      */
 439     public String getEncoding()
 440     {
 441         return _encoding;
 442     }
 443 
 444 
 445     /**
 446      * Sets the encoding for this output method. If no
 447      * encoding was specified, the default is always "UTF-8".
 448      * Make sure the encoding is compatible with the one
 449      * used by the {@link java.io.Writer}.
 450      *
 451      * @see #getEncoding
 452      * @param encoding The encoding, or null
 453      */
 454     public void setEncoding( String encoding )
 455     {
 456         _encoding = encoding;
 457         _encodingInfo = null;
 458     }
 459 
 460     /**
 461      * Sets the encoding for this output method with an <code>EncodingInfo</code>
 462      * instance.
 463      */
 464     public void setEncoding(EncodingInfo encInfo) {
 465         _encoding = encInfo.getIANAName();
 466         _encodingInfo = encInfo;
 467     }
 468 
 469     /**
 470      * Returns an <code>EncodingInfo<code> instance for the encoding.
 471      *
 472      * @see #setEncoding
 473      */
 474     public EncodingInfo getEncodingInfo() throws UnsupportedEncodingException {
 475         if (_encodingInfo == null)
 476             _encodingInfo = Encodings.getEncodingInfo(_encoding, _allowJavaNames);
 477         return _encodingInfo;
 478     }
 479 
 480     /**
 481      * Sets whether java encoding names are permitted
 482      */
 483     public void setAllowJavaNames (boolean allow) {
 484         _allowJavaNames = allow;
 485     }
 486 
 487     /**
 488      * Returns whether java encoding names are permitted
 489      */
 490     public boolean setAllowJavaNames () {
 491         return _allowJavaNames;
 492     }
 493 
 494     /**
 495      * Returns the specified media type, or null.
 496      * To determine the media type based on the
 497      * document type, use {@link #whichMediaType}.
 498      *
 499      * @return The specified media type, or null
 500      */
 501     public String getMediaType()
 502     {
 503         return _mediaType;
 504     }
 505 
 506 
 507     /**
 508      * Sets the media type.
 509      *
 510      * @see #getMediaType
 511      * @param mediaType The specified media type
 512      */
 513     public void setMediaType( String mediaType )
 514     {
 515         _mediaType = mediaType;
 516     }
 517 
 518 
 519     /**
 520      * Sets the document type public and system identifiers.
 521      * Required only if the DOM Document or SAX events do not
 522      * specify the document type, and one must be present in
 523      * the serialized document. Any document type specified
 524      * by the DOM Document or SAX events will override these
 525      * values.
 526      *
 527      * @param publicId The public identifier, or null
 528      * @param systemId The system identifier, or null
 529      */
 530     public void setDoctype( String publicId, String systemId )
 531     {
 532         _doctypePublic = publicId;
 533         _doctypeSystem = systemId;
 534     }
 535 
 536 
 537     /**
 538      * Returns the specified document type public identifier,
 539      * or null.
 540      */
 541     public String getDoctypePublic()
 542     {
 543         return _doctypePublic;
 544     }
 545 
 546 
 547     /**
 548      * Returns the specified document type system identifier,
 549      * or null.
 550      */
 551     public String getDoctypeSystem()
 552     {
 553         return _doctypeSystem;
 554     }
 555 
 556 
 557     /**
 558      * Returns true if comments should be ommited.
 559      * The default is false.
 560      */
 561     public boolean getOmitComments()
 562     {
 563         return _omitComments;
 564     }
 565 
 566 
 567     /**
 568      * Sets comment omitting on and off.
 569      *
 570      * @param omit True if comments should be ommited
 571      */
 572     public void setOmitComments( boolean omit )
 573     {
 574         _omitComments = omit;
 575     }
 576 
 577 
 578     /**
 579      * Returns true if the DOCTYPE declaration should
 580      * be ommited. The default is false.
 581      */
 582     public boolean getOmitDocumentType()
 583     {
 584         return _omitDoctype;
 585     }
 586 
 587 
 588     /**
 589      * Sets DOCTYPE declaration omitting on and off.
 590      *
 591      * @param omit True if DOCTYPE declaration should be ommited
 592      */
 593     public void setOmitDocumentType( boolean omit )
 594     {
 595         _omitDoctype = omit;
 596     }
 597 
 598 
 599     /**
 600      * Returns true if the XML document declaration should
 601      * be ommited. The default is false.
 602      */
 603     public boolean getOmitXMLDeclaration()
 604     {
 605         return _omitXmlDeclaration;
 606     }
 607 
 608 
 609     /**
 610      * Sets XML declaration omitting on and off.
 611      *
 612      * @param omit True if XML declaration should be ommited
 613      */
 614     public void setOmitXMLDeclaration( boolean omit )
 615     {
 616         _omitXmlDeclaration = omit;
 617     }
 618 
 619 
 620     /**
 621      * Returns true if the document type is standalone.
 622      * The default is false.
 623      */
 624     public boolean getStandalone()
 625     {
 626         return _standalone;
 627     }
 628 
 629 
 630     /**
 631      * Sets document DTD standalone. The public and system
 632      * identifiers must be null for the document to be
 633      * serialized as standalone.
 634      *
 635      * @param standalone True if document DTD is standalone
 636      */
 637     public void setStandalone( boolean standalone )
 638     {
 639         _standalone = standalone;
 640     }
 641 
 642 
 643     /**
 644      * Returns a list of all the elements whose text node children
 645      * should be output as CDATA, or null if no such elements were
 646      * specified.
 647      */
 648     public String[] getCDataElements()
 649     {
 650         return _cdataElements;
 651     }
 652 
 653 
 654     /**
 655      * Returns true if the text node children of the given elements
 656      * should be output as CDATA.
 657      *
 658      * @param tagName The element's tag name
 659      * @return True if should serialize as CDATA
 660      */
 661     public boolean isCDataElement( String tagName )
 662     {
 663         int i;
 664 
 665         if ( _cdataElements == null )
 666             return false;
 667         for ( i = 0 ; i < _cdataElements.length ; ++i )
 668             if ( _cdataElements[ i ].equals( tagName ) )
 669                 return true;
 670         return false;
 671     }
 672 
 673 
 674     /**
 675      * Sets the list of elements for which text node children
 676      * should be output as CDATA.
 677      *
 678      * @param cdataElements List of CDATA element tag names
 679      */
 680     public void setCDataElements( String[] cdataElements )
 681     {
 682         _cdataElements = cdataElements;
 683     }
 684 
 685 
 686     /**
 687      * Returns a list of all the elements whose text node children
 688      * should be output unescaped (no character references), or null
 689      * if no such elements were specified.
 690      */
 691     public String[] getNonEscapingElements()
 692     {
 693         return _nonEscapingElements;
 694     }
 695 
 696 
 697     /**
 698      * Returns true if the text node children of the given elements
 699      * should be output unescaped.
 700      *
 701      * @param tagName The element's tag name
 702      * @return True if should serialize unescaped
 703      */
 704     public boolean isNonEscapingElement( String tagName )
 705     {
 706         int i;
 707 
 708         if ( _nonEscapingElements == null ) {
 709             return false;
 710         }
 711         for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
 712             if ( _nonEscapingElements[ i ].equals( tagName ) )
 713                 return true;
 714         return false;
 715     }
 716 
 717 
 718     /**
 719      * Sets the list of elements for which text node children
 720      * should be output unescaped (no character references).
 721      *
 722      * @param nonEscapingElements List of unescaped element tag names
 723      */
 724     public void setNonEscapingElements( String[] nonEscapingElements )
 725     {
 726         _nonEscapingElements = nonEscapingElements;
 727     }
 728 
 729 
 730 
 731     /**
 732      * Returns a specific line separator to use. The default is the
 733      * Web line separator (<tt>\n</tt>). A string is returned to
 734      * support double codes (CR + LF).
 735      *
 736      * @return The specified line separator
 737      */
 738     public String getLineSeparator()
 739     {
 740         return _lineSeparator;
 741     }
 742 
 743 
 744     /**
 745      * Sets the line separator. The default is the Web line separator
 746      * (<tt>\n</tt>). The machine's line separator can be obtained
 747      * from the system property <tt>line.separator</tt>, but is only
 748      * useful if the document is edited on machines of the same type.
 749      * For general documents, use the Web line separator.
 750      *
 751      * @param lineSeparator The specified line separator
 752      */
 753     public void setLineSeparator( String lineSeparator )
 754     {
 755         if ( lineSeparator == null )
 756             _lineSeparator =  LineSeparator.Web;
 757         else
 758             _lineSeparator = lineSeparator;
 759     }
 760 
 761 
 762     /**
 763      * Returns true if the default behavior for this format is to
 764      * preserve spaces. All elements that do not specify otherwise
 765      * or specify the default behavior will be formatted based on
 766      * this rule. All elements that specify space preserving will
 767      * always preserve space.
 768      */
 769     public boolean getPreserveSpace()
 770     {
 771         return _preserve;
 772     }
 773 
 774 
 775     /**
 776      * Sets space preserving as the default behavior. The default is
 777      * space stripping and all elements that do not specify otherwise
 778      * or use the default value will not preserve spaces.
 779      *
 780      * @param preserve True if spaces should be preserved
 781      */
 782     public void setPreserveSpace( boolean preserve )
 783     {
 784         _preserve = preserve;
 785     }
 786 
 787 
 788     /**
 789      * Return the selected line width for breaking up long lines.
 790      * When indenting, and only when indenting, long lines will be
 791      * broken at space boundaries based on this line width.
 792      * No line wrapping occurs if this value is zero.
 793      */
 794     public int getLineWidth()
 795     {
 796         return _lineWidth;
 797     }
 798 
 799 
 800     /**
 801      * Sets the line width. If zero then no line wrapping will
 802      * occur. Calling {@link #setIndenting} will reset this
 803      * value to zero (off) or the default (on).
 804      *
 805      * @param lineWidth The line width to use, zero for default
 806      * @see #getLineWidth
 807      * @see #setIndenting
 808      */
 809     public void setLineWidth( int lineWidth )
 810     {
 811         if ( lineWidth <= 0 )
 812             _lineWidth = 0;
 813         else
 814             _lineWidth = lineWidth;
 815     }
 816         /**
 817          * Returns the preserveEmptyAttribute flag. If flag is false, then'
 818          * attributes with empty string values are output as the attribute
 819          * name only (in HTML mode).
 820          * @return preserve the preserve flag
 821          */     public boolean getPreserveEmptyAttributes () {          return _preserveEmptyAttributes;        }       /**
 822          * Sets the preserveEmptyAttribute flag. If flag is false, then'
 823          * attributes with empty string values are output as the attribute
 824          * name only (in HTML mode).
 825          * @param preserve the preserve flag
 826          */     public void setPreserveEmptyAttributes (boolean preserve) {             _preserveEmptyAttributes = preserve;    }
 827 
 828     /**
 829      * Returns the last printable character based on the selected
 830      * encoding. Control characters and non-printable characters
 831      * are always printed as character references.
 832      */
 833     public char getLastPrintable()
 834     {
 835         if ( getEncoding() != null &&
 836              ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
 837             return 0xFF;
 838         else
 839             return 0xFFFF;
 840     }
 841 
 842 
 843     /**
 844      * Determine the output method for the specified document.
 845      * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
 846      * then the method is said to be <tt>html</tt>. If the root
 847      * element is 'html' and all text nodes preceding the root
 848      * element are all whitespace, then the method is said to be
 849      * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
 850      *
 851      * @param doc The document to check
 852      * @return The suitable method
 853      */
 854     public static String whichMethod( Document doc )
 855     {
 856         Node    node;
 857         String  value;
 858         int     i;
 859 
 860         // If document is derived from HTMLDocument then the default
 861         // method is html.
 862         if ( doc instanceof HTMLDocument )
 863             return Method.HTML;
 864 
 865         // Lookup the root element and the text nodes preceding it.
 866         // If root element is html and all text nodes contain whitespace
 867         // only, the method is html.
 868 
 869         // FIXME (SM) should we care about namespaces here?
 870 
 871         node = doc.getFirstChild();
 872         while (node != null) {
 873             // If the root element is html, the method is html.
 874             if ( node.getNodeType() == Node.ELEMENT_NODE ) {
 875                 if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
 876                     return Method.HTML;
 877                 } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
 878                     return Method.FOP;
 879                 } else {
 880                     return Method.XML;
 881                 }
 882             } else if ( node.getNodeType() == Node.TEXT_NODE ) {
 883                 // If a text node preceding the root element contains
 884                 // only whitespace, this might be html, otherwise it's
 885                 // definitely xml.
 886                 value = node.getNodeValue();
 887                 for ( i = 0 ; i < value.length() ; ++i )
 888                     if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
 889                          value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
 890                         return Method.XML;
 891             }
 892             node = node.getNextSibling();
 893         }
 894         // Anything else, the method is xml.
 895         return Method.XML;
 896     }
 897 
 898 
 899     /**
 900      * Returns the document type public identifier
 901      * specified for this document, or null.
 902      */
 903     public static String whichDoctypePublic( Document doc )
 904     {
 905         DocumentType doctype;
 906 
 907            /*  DOM Level 2 was introduced into the code base*/
 908            doctype = doc.getDoctype();
 909            if ( doctype != null ) {
 910            // Note on catch: DOM Level 1 does not specify this method
 911            // and the code will throw a NoSuchMethodError
 912            try {
 913            return doctype.getPublicId();
 914            } catch ( Error except ) {  }
 915            }
 916 
 917         if ( doc instanceof HTMLDocument )
 918             return DTD.XHTMLPublicId;
 919         return null;
 920     }
 921 
 922 
 923     /**
 924      * Returns the document type system identifier
 925      * specified for this document, or null.
 926      */
 927     public static String whichDoctypeSystem( Document doc )
 928     {
 929         DocumentType doctype;
 930 
 931         /* DOM Level 2 was introduced into the code base*/
 932            doctype = doc.getDoctype();
 933            if ( doctype != null ) {
 934            // Note on catch: DOM Level 1 does not specify this method
 935            // and the code will throw a NoSuchMethodError
 936            try {
 937            return doctype.getSystemId();
 938            } catch ( Error except ) { }
 939            }
 940 
 941         if ( doc instanceof HTMLDocument )
 942             return DTD.XHTMLSystemId;
 943         return null;
 944     }
 945 
 946 
 947     /**
 948      * Returns the suitable media format for a document
 949      * output with the specified method.
 950      */
 951     public static String whichMediaType( String method )
 952     {
 953         if ( method.equalsIgnoreCase( Method.XML ) )
 954             return "text/xml";
 955         if ( method.equalsIgnoreCase( Method.HTML ) )
 956             return "text/html";
 957         if ( method.equalsIgnoreCase( Method.XHTML ) )
 958             return "text/html";
 959         if ( method.equalsIgnoreCase( Method.TEXT ) )
 960             return "text/plain";
 961         if ( method.equalsIgnoreCase( Method.FOP ) )
 962             return "application/pdf";
 963         return null;
 964     }
 965 
 966 
 967 }