src/java.desktop/share/classes/javax/swing/text/html/parser/Parser.java

Print this page




  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package javax.swing.text.html.parser;
  27 
  28 import javax.swing.text.SimpleAttributeSet;
  29 import javax.swing.text.html.HTML;
  30 import javax.swing.text.ChangedCharSetException;
  31 import java.io.*;
  32 import java.util.Hashtable;
  33 import java.util.Properties;
  34 import java.util.Vector;
  35 import java.util.Enumeration;
  36 import java.net.URL;
  37 
  38 import sun.misc.MessageUtils;
  39 
  40 /**
  41  * A simple DTD-driven HTML parser. The parser reads an
  42  * HTML file from an InputStream and calls various methods
  43  * (which should be overridden in a subclass) when tags and
  44  * data are encountered.
  45  * <p>
  46  * Unfortunately there are many badly implemented HTML parsers
  47  * out there, and as a result there are many badly formatted
  48  * HTML files. This parser attempts to parse most HTML files.
  49  * This means that the implementation sometimes deviates from
  50  * the SGML specification in favor of HTML.
  51  * <p>
  52  * The parser treats \r and \r\n as \n. Newlines after starttags
  53  * and before end tags are ignored just as specified in the SGML/HTML
  54  * specification.
  55  * <p>
  56  * The html spec does not specify how spaces are to be coalesced very well.
  57  * Specifically, the following scenarios are not discussed (note that a
  58  * space should be used here, but I am using &amp;nbsp to force the space to
  59  * be displayed):




  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package javax.swing.text.html.parser;
  27 
  28 import javax.swing.text.SimpleAttributeSet;
  29 import javax.swing.text.html.HTML;
  30 import javax.swing.text.ChangedCharSetException;
  31 import java.io.*;
  32 import java.util.Hashtable;
  33 import java.util.Properties;
  34 import java.util.Vector;
  35 import java.util.Enumeration;
  36 import java.net.URL;
  37 


  38 /**
  39  * A simple DTD-driven HTML parser. The parser reads an
  40  * HTML file from an InputStream and calls various methods
  41  * (which should be overridden in a subclass) when tags and
  42  * data are encountered.
  43  * <p>
  44  * Unfortunately there are many badly implemented HTML parsers
  45  * out there, and as a result there are many badly formatted
  46  * HTML files. This parser attempts to parse most HTML files.
  47  * This means that the implementation sometimes deviates from
  48  * the SGML specification in favor of HTML.
  49  * <p>
  50  * The parser treats \r and \r\n as \n. Newlines after starttags
  51  * and before end tags are ignored just as specified in the SGML/HTML
  52  * specification.
  53  * <p>
  54  * The html spec does not specify how spaces are to be coalesced very well.
  55  * Specifically, the following scenarios are not discussed (note that a
  56  * space should be used here, but I am using &amp;nbsp to force the space to
  57  * be displayed):