/* * reserved comment block * DO NOT REMOVE OR ALTER! */ /* * Copyright 2001-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: ToHTMLStream.java,v 1.2.4.1 2005/09/15 08:15:26 suresh_emailid Exp $ */ package com.sun.org.apache.xml.internal.serializer; import java.io.IOException; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.util.Properties; import javax.xml.transform.Result; import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; import com.sun.org.apache.xml.internal.serializer.utils.Utils; import org.xml.sax.Attributes; import org.xml.sax.SAXException; /** * This serializer takes a series of SAX or * SAX-like events and writes its output * to the given stream. * * This class is not a public API, it is public * because it is used from another package. * * @xsl.usage internal */ public final class ToHTMLStream extends ToStream { /** This flag is set while receiving events from the DTD */ protected boolean m_inDTD = false; /** True if the current element is a block element. (seems like * this needs to be a stack. -sb). */ private boolean m_inBlockElem = false; /** * Map that tells which XML characters should have special treatment, and it * provides character to entity name lookup. */ private static final CharInfo m_htmlcharInfo = // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ static final Trie m_elementFlags = new Trie(); static { initTagReference(m_elementFlags); } static void initTagReference(Trie m_elementFlags) { // HTML 4.0 loose DTD m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); m_elementFlags.put( "FRAME", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put( "ISINDEX", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put( "APPLET", new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); // HTML 4.0 strict DTD m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); m_elementFlags.put( "SUP", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put( "SUB", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put( "SPAN", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put( "BDO", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put( "BR", new ElemDesc( 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put( "ADDRESS", new ElemDesc( 0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put( "DIV", new ElemDesc( 0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); m_elementFlags.put( "MAP", new ElemDesc( 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); m_elementFlags.put( "AREA", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put( "LINK", new ElemDesc( 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put( "IMG", new ElemDesc( 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.EMPTY | ElemDesc.WHITESPACESENSITIVE)); m_elementFlags.put( "OBJECT", new ElemDesc( 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.HEADMISC | ElemDesc.WHITESPACESENSITIVE)); m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); m_elementFlags.put( "HR", new ElemDesc( 0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET | ElemDesc.EMPTY)); m_elementFlags.put( "P", new ElemDesc( 0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put( "H1", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put( "H2", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put( "H3", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put( "H4", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put( "H5", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put( "H6", new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); m_elementFlags.put( "PRE", new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); m_elementFlags.put( "Q", new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); m_elementFlags.put( "BLOCKQUOTE", new ElemDesc( 0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("INS", new ElemDesc(0)); m_elementFlags.put("DEL", new ElemDesc(0)); m_elementFlags.put( "DL", new ElemDesc( 0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put( "OL", new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); m_elementFlags.put( "UL", new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); m_elementFlags.put( "INPUT", new ElemDesc( 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); m_elementFlags.put( "SELECT", new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); m_elementFlags.put("OPTGROUP", new ElemDesc(0)); m_elementFlags.put("OPTION", new ElemDesc(0)); m_elementFlags.put( "TEXTAREA", new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); m_elementFlags.put( "FIELDSET", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); m_elementFlags.put("LEGEND", new ElemDesc(0)); m_elementFlags.put( "BUTTON", new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); m_elementFlags.put( "TABLE", new ElemDesc( 0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put( "COL", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put("TH", new ElemDesc(0)); m_elementFlags.put("TD", new ElemDesc(0)); m_elementFlags.put( "HEAD", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); m_elementFlags.put( "BASE", new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put( "META", new ElemDesc( 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); m_elementFlags.put( "STYLE", new ElemDesc( 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); m_elementFlags.put( "SCRIPT", new ElemDesc( 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.HEADMISC | ElemDesc.RAW)); m_elementFlags.put( "NOSCRIPT", new ElemDesc( 0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET)); m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); // From "John Ky" elemDesc = (ElemDesc) m_elementFlags.get("FRAME"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("HEAD"); elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("HR"); elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); // ---------------------------------------------- // HTML 4.0, section 16.5 elemDesc = (ElemDesc) m_elementFlags.get("IFRAME"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); // ---------------------------------------------- // Netscape 4 extension elemDesc = (ElemDesc) m_elementFlags.get("ILAYER"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("IMG"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("INPUT"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("INS"); elemDesc.setAttr("CITE", ElemDesc.ATTRURL); // ---------------------------------------------- // Netscape 4 extension elemDesc = (ElemDesc) m_elementFlags.get("LAYER"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("LINK"); elemDesc.setAttr("HREF", ElemDesc.ATTRURL); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("MENU"); elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("OBJECT"); elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); elemDesc.setAttr("DATA", ElemDesc.ATTRURL); elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("OL"); elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("OPTGROUP"); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("OPTION"); elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("Q"); elemDesc.setAttr("CITE", ElemDesc.ATTRURL); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("SCRIPT"); elemDesc.setAttr("SRC", ElemDesc.ATTRURL); elemDesc.setAttr("FOR", ElemDesc.ATTRURL); elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("SELECT"); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("TABLE"); elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("TD"); elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("TEXTAREA"); elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("TH"); elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // ---------------------------------------------- // The nowrap attribute of a tr element is both // a Netscape and Internet-Explorer extension elemDesc = (ElemDesc) m_elementFlags.get("TR"); elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // ---------------------------------------------- elemDesc = (ElemDesc) m_elementFlags.get("UL"); elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); } /** * Dummy element for elements not found. */ static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); /** True if URLs should be specially escaped with the %xx form. */ private boolean m_specialEscapeURLs = true; /** True if the META tag should be omitted. */ private boolean m_omitMetaTag = false; /** * Tells if the formatter should use special URL escaping. * * @param bool True if URLs should be specially escaped with the %xx form. */ public void setSpecialEscapeURLs(boolean bool) { m_specialEscapeURLs = bool; } /** * Tells if the formatter should omit the META tag. * * @param bool True if the META tag should be omitted. */ public void setOmitMetaTag(boolean bool) { m_omitMetaTag = bool; } /** * Specifies an output format for this serializer. It the * serializer has already been associated with an output format, * it will switch to the new format. This method should not be * called while the serializer is in the process of serializing * a document. * * This method can be called multiple times before starting * the serialization of a particular result-tree. In principle * all serialization parameters can be changed, with the exception * of method="html" (it must be method="html" otherwise we * shouldn't even have a ToHTMLStream object here!) * * @param format The output format or serialzation parameters * to use. */ public void setOutputFormat(Properties format) { m_specialEscapeURLs = OutputPropertyUtils.getBooleanProperty( OutputPropertiesFactory.S_USE_URL_ESCAPING, format); m_omitMetaTag = OutputPropertyUtils.getBooleanProperty( OutputPropertiesFactory.S_OMIT_META_TAG, format); super.setOutputFormat(format); } /** * Tells if the formatter should use special URL escaping. * * @return True if URLs should be specially escaped with the %xx form. */ private final boolean getSpecialEscapeURLs() { return m_specialEscapeURLs; } /** * Tells if the formatter should omit the META tag. * * @return True if the META tag should be omitted. */ private final boolean getOmitMetaTag() { return m_omitMetaTag; } /** * Get a description of the given element. * * @param name non-null name of element, case insensitive. * * @return non-null reference to ElemDesc, which may be m_dummy if no * element description matches the given name. */ public static final ElemDesc getElemDesc(String name) { /* this method used to return m_dummy when name was null * but now it doesn't check and and requires non-null name. */ Object obj = m_elementFlags.get(name); if (null != obj) return (ElemDesc)obj; return m_dummy; } /** * A Trie that is just a copy of the "static" one. * We need this one to be able to use the faster, but not thread-safe * method Trie.get2(name) */ private Trie m_htmlInfo = new Trie(m_elementFlags); /** * Calls to this method could be replaced with calls to * getElemDesc(name), but this one should be faster. */ private ElemDesc getElemDesc2(String name) { Object obj = m_htmlInfo.get2(name); if (null != obj) return (ElemDesc)obj; return m_dummy; } /** * Default constructor. */ public ToHTMLStream() { super(); m_charInfo = m_htmlcharInfo; // initialize namespaces m_prefixMap = new NamespaceMappings(); } /** The name of the current element. */ // private String m_currentElementName = null; /** * Receive notification of the beginning of a document. * * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * * @throws org.xml.sax.SAXException */ protected void startDocumentInternal() throws org.xml.sax.SAXException { super.startDocumentInternal(); m_needToCallStartDocument = false; m_needToOutputDocTypeDecl = true; m_startNewLine = false; setOmitXMLDeclaration(true); if (true == m_needToOutputDocTypeDecl) { String doctypeSystem = getDoctypeSystem(); String doctypePublic = getDoctypePublic(); if ((null != doctypeSystem) || (null != doctypePublic)) { final java.io.Writer writer = m_writer; try { writer.write("'); outputLineSep(); } catch(IOException e) { throw new SAXException(e); } } } m_needToOutputDocTypeDecl = false; } /** * Receive notification of the end of a document. * * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * * @throws org.xml.sax.SAXException */ public final void endDocument() throws org.xml.sax.SAXException { flushPending(); if (m_doIndent && !m_isprevtext) { try { outputLineSep(); } catch(IOException e) { throw new SAXException(e); } } flushWriter(); if (m_tracer != null) super.fireEndDoc(); } /** * Receive notification of the beginning of an element. * * * @param namespaceURI * @param localName * @param name The element type name. * @param atts The attributes attached to the element, if any. * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * @see #endElement * @see org.xml.sax.AttributeList */ public void startElement( String namespaceURI, String localName, String name, Attributes atts) throws org.xml.sax.SAXException { ElemContext elemContext = m_elemContext; // clean up any pending things first if (elemContext.m_startTagOpen) { closeStartTag(); elemContext.m_startTagOpen = false; } else if (m_cdataTagOpen) { closeCDATA(); m_cdataTagOpen = false; } else if (m_needToCallStartDocument) { startDocumentInternal(); m_needToCallStartDocument = false; } // if this element has a namespace then treat it like XML if (null != namespaceURI && namespaceURI.length() > 0) { super.startElement(namespaceURI, localName, name, atts); return; } try { // getElemDesc2(name) is faster than getElemDesc(name) ElemDesc elemDesc = getElemDesc2(name); int elemFlags = elemDesc.getFlags(); // deal with indentation issues first if (m_doIndent) { boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; if (m_ispreserve) m_ispreserve = false; else if ( (null != elemContext.m_elementName) && (!m_inBlockElem || isBlockElement) /* && !isWhiteSpaceSensitive */ ) { m_startNewLine = true; indent(); } m_inBlockElem = !isBlockElement; } // save any attributes for later processing if (atts != null) addAttributes(atts); m_isprevtext = false; final java.io.Writer writer = m_writer; writer.write('<'); writer.write(name); if (m_tracer != null) firePseudoAttributes(); if ((elemFlags & ElemDesc.EMPTY) != 0) { // an optimization for elements which are expected // to be empty. m_elemContext = elemContext.push(); /* XSLTC sometimes calls namespaceAfterStartElement() * so we need to remember the name */ m_elemContext.m_elementName = name; m_elemContext.m_elementDesc = elemDesc; return; } else { elemContext = elemContext.push(namespaceURI,localName,name); m_elemContext = elemContext; elemContext.m_elementDesc = elemDesc; elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; } if ((elemFlags & ElemDesc.HEADELEM) != 0) { // This is the element, do some special processing closeStartTag(); elemContext.m_startTagOpen = false; if (!m_omitMetaTag) { if (m_doIndent) indent(); writer.write( ""); } } } catch (IOException e) { throw new SAXException(e); } } /** * Receive notification of the end of an element. * * * @param namespaceURI * @param localName * @param name The element type name * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. */ public final void endElement( final String namespaceURI, final String localName, final String name) throws org.xml.sax.SAXException { // deal with any pending issues if (m_cdataTagOpen) closeCDATA(); // if the element has a namespace, treat it like XML, not HTML if (null != namespaceURI && namespaceURI.length() > 0) { super.endElement(namespaceURI, localName, name); return; } try { ElemContext elemContext = m_elemContext; final ElemDesc elemDesc = elemContext.m_elementDesc; final int elemFlags = elemDesc.getFlags(); final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; // deal with any indentation issues if (m_doIndent) { final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; boolean shouldIndent = false; if (m_ispreserve) { m_ispreserve = false; } else if (m_doIndent && (!m_inBlockElem || isBlockElement)) { m_startNewLine = true; shouldIndent = true; } if (!elemContext.m_startTagOpen && shouldIndent) indent(elemContext.m_currentElemDepth - 1); m_inBlockElem = !isBlockElement; } final java.io.Writer writer = m_writer; if (!elemContext.m_startTagOpen) { writer.write("'); } else { // the start-tag open when this method was called, // so we need to process it now. if (m_tracer != null) super.fireStartElem(name); // the starting tag was still open when we received this endElement() call // so we need to process any gathered attributes NOW, before they go away. int nAttrs = m_attributes.getLength(); if (nAttrs > 0) { processAttributes(m_writer, nAttrs); // clear attributes object for re-use with next element m_attributes.clear(); } if (!elemEmpty) { // As per Dave/Paul recommendation 12/06/2000 // if (shouldIndent) // writer.write('>'); // indent(m_currentIndent); writer.write(">'); } else { writer.write('>'); } } // clean up because the element has ended if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0) m_ispreserve = true; m_isprevtext = false; // fire off the end element event if (m_tracer != null) super.fireEndElem(name); // OPTIMIZE-EMPTY if (elemEmpty) { // a quick exit if the HTML element had no children. // This block of code can be removed if the corresponding block of code // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed m_elemContext = elemContext.m_prev; return; } // some more clean because the element has ended. if (!elemContext.m_startTagOpen) { if (m_doIndent && !m_preserves.isEmpty()) m_preserves.pop(); } m_elemContext = elemContext.m_prev; // m_isRawStack.pop(); } catch (IOException e) { throw new SAXException(e); } } /** * Process an attribute. * @param writer The writer to write the processed output to. * @param name The name of the attribute. * @param value The value of the attribute. * @param elemDesc The description of the HTML element * that has this attribute. * * @throws org.xml.sax.SAXException */ protected void processAttribute( java.io.Writer writer, String name, String value, ElemDesc elemDesc) throws IOException { writer.write(' '); if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) && elemDesc != null && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) { writer.write(name); } else { // %REVIEW% %OPT% // Two calls to single-char write may NOT // be more efficient than one to string-write... writer.write(name); writer.write("=\""); if ( elemDesc != null && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) writeAttrURI(writer, value, m_specialEscapeURLs); else writeAttrString(writer, value, this.getEncoding()); writer.write('"'); } } /** * Tell if a character is an ASCII digit. */ private boolean isASCIIDigit(char c) { return (c >= '0' && c <= '9'); } /** * Make an integer into an HH hex value. * Does no checking on the size of the input, since this * is only meant to be used locally by writeAttrURI. * * @param i must be a value less than 255. * * @return should be a two character string. */ private static String makeHHString(int i) { String s = Integer.toHexString(i).toUpperCase(); if (s.length() == 1) { s = "0" + s; } return s; } /** * Dmitri Ilyin: Makes sure if the String is HH encoded sign. * @param str must be 2 characters long * * @return true or false */ private boolean isHHSign(String str) { boolean sign = true; try { char r = (char) Integer.parseInt(str, 16); } catch (NumberFormatException e) { sign = false; } return sign; } /** * Write the specified string after substituting non ASCII characters, * with %HH, where HH is the hex of the byte value. * * @param string String to convert to XML format. * @param doURLEscaping True if we should try to encode as * per http://www.ietf.org/rfc/rfc2396.txt. * * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. */ public void writeAttrURI( final java.io.Writer writer, String string, boolean doURLEscaping) throws IOException { // http://www.ietf.org/rfc/rfc2396.txt says: // A URI is always in an "escaped" form, since escaping or unescaping a // completed URI might change its semantics. Normally, the only time // escape encodings can safely be made is when the URI is being created // from its component parts; each component may have its own set of // characters that are reserved, so only the mechanism responsible for // generating or interpreting that component can determine whether or // not escaping a character will change its semantics. Likewise, a URI // must be separated into its components before the escaped characters // within those components can be safely decoded. // // ...So we do our best to do limited escaping of the URL, without // causing damage. If the URL is already properly escaped, in theory, this // function should not change the string value. final int end = string.length(); if (end > m_attrBuff.length) { m_attrBuff = new char[end*2 + 1]; } string.getChars(0,end, m_attrBuff, 0); final char[] chars = m_attrBuff; int cleanStart = 0; int cleanLength = 0; char ch = 0; for (int i = 0; i < end; i++) { ch = chars[i]; if ((ch < 32) || (ch > 126)) { if (cleanLength > 0) { writer.write(chars, cleanStart, cleanLength); cleanLength = 0; } if (doURLEscaping) { // Encode UTF16 to UTF8. // Reference is Unicode, A Primer, by Tony Graham. // Page 92. // Note that Kay doesn't escape 0x20... // if(ch == 0x20) // Not sure about this... -sb // { // writer.write(ch); // } // else if (ch <= 0x7F) { writer.write('%'); writer.write(makeHHString(ch)); } else if (ch <= 0x7FF) { // Clear low 6 bits before rotate, put high 4 bits in low byte, // and set two high bits. int high = (ch >> 6) | 0xC0; int low = (ch & 0x3F) | 0x80; // First 6 bits, + high bit writer.write('%'); writer.write(makeHHString(high)); writer.write('%'); writer.write(makeHHString(low)); } else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate { // I'm sure this can be done in 3 instructions, but I choose // to try and do it exactly like it is done in the book, at least // until we are sure this is totally clean. I don't think performance // is a big issue with this particular function, though I could be // wrong. Also, the stuff below clearly does more masking than // it needs to do. // Clear high 6 bits. int highSurrogate = ((int) ch) & 0x03FF; // Middle 4 bits (wwww) + 1 // "Note that the value of wwww from the high surrogate bit pattern // is incremented to make the uuuuu bit pattern in the scalar value // so the surrogate pair don't address the BMP." int wwww = ((highSurrogate & 0x03C0) >> 6); int uuuuu = wwww + 1; // next 4 bits int zzzz = (highSurrogate & 0x003C) >> 2; // low 2 bits int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; // Get low surrogate character. ch = chars[++i]; // Clear high 6 bits. int lowSurrogate = ((int) ch) & 0x03FF; // put the middle 4 bits into the bottom of yyyyyy (byte 3) yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); // bottom 6 bits. int xxxxxx = (lowSurrogate & 0x003F); int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu int byte2 = 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; int byte3 = 0x80 | yyyyyy; int byte4 = 0x80 | xxxxxx; writer.write('%'); writer.write(makeHHString(byte1)); writer.write('%'); writer.write(makeHHString(byte2)); writer.write('%'); writer.write(makeHHString(byte3)); writer.write('%'); writer.write(makeHHString(byte4)); } else { int high = (ch >> 12) | 0xE0; // top 4 bits int middle = ((ch & 0x0FC0) >> 6) | 0x80; // middle 6 bits int low = (ch & 0x3F) | 0x80; // First 6 bits, + high bit writer.write('%'); writer.write(makeHHString(high)); writer.write('%'); writer.write(makeHHString(middle)); writer.write('%'); writer.write(makeHHString(low)); } } else if (escapingNotNeeded(ch)) { writer.write(ch); } else { writer.write("&#"); writer.write(Integer.toString(ch)); writer.write(';'); } // In this character range we have first written out any previously accumulated // "clean" characters, then processed the current more complicated character, // which may have incremented "i". // We now we reset the next possible clean character. cleanStart = i + 1; } // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as // not allowing quotes in the URI proper syntax, nor in the fragment // identifier, we believe that it's OK to double escape quotes. else if (ch == '"') { // If the character is a '%' number number, try to avoid double-escaping. // There is a question if this is legal behavior. // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) // We are no longer escaping '%' if (cleanLength > 0) { writer.write(chars, cleanStart, cleanLength); cleanLength = 0; } // Mike Kay encodes this as ", so he may know something I don't? if (doURLEscaping) writer.write("%22"); else writer.write("""); // we have to escape this, I guess. // We have written out any clean characters, then the escaped '%' and now we // We now we reset the next possible clean character. cleanStart = i + 1; } else if (ch == '&') { // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) // instead of "&" to avoid confusion with the beginning of a character // reference (entity reference open delimiter). if (cleanLength > 0) { writer.write(chars, cleanStart, cleanLength); cleanLength = 0; } writer.write("&"); cleanStart = i + 1; } else { // no processing for this character, just count how // many characters in a row that we have that need no processing cleanLength++; } } // are there any clean characters at the end of the array // that we haven't processed yet? if (cleanLength > 1) { // if the whole string can be written out as-is do so // otherwise write out the clean chars at the end of the // array if (cleanStart == 0) writer.write(string); else writer.write(chars, cleanStart, cleanLength); } else if (cleanLength == 1) { // a little optimization for 1 clean character // (we could have let the previous if(...) handle them all) writer.write(ch); } } /** * Writes the specified string after substituting specials, * and UTF-16 surrogates for character references &#xnn. * * @param string String to convert to XML format. * @param encoding CURRENTLY NOT IMPLEMENTED. * * @throws org.xml.sax.SAXException */ public void writeAttrString( final java.io.Writer writer, String string, String encoding) throws IOException { final int end = string.length(); if (end > m_attrBuff.length) { m_attrBuff = new char[end * 2 + 1]; } string.getChars(0, end, m_attrBuff, 0); final char[] chars = m_attrBuff; int cleanStart = 0; int cleanLength = 0; char ch = 0; for (int i = 0; i < end; i++) { ch = chars[i]; // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); // System.out.println("ch: "+(int)ch); // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) { cleanLength++; } else if ('<' == ch || '>' == ch) { cleanLength++; // no escaping in this case, as specified in 15.2 } else if ( ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) { cleanLength++; // no escaping in this case, as specified in 15.2 } else { if (cleanLength > 0) { writer.write(chars,cleanStart,cleanLength); cleanLength = 0; } int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); if (i != pos) { i = pos - 1; } else { if (Encodings.isHighUTF16Surrogate(ch)) { writeUTF16Surrogate(ch, chars, i, end); i++; // two input characters processed // this increments by one and the for() // loop itself increments by another one. } // The next is kind of a hack to keep from escaping in the case // of Shift_JIS and the like. /* else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) && (ch != 160)) { writer.write(ch); // no escaping in this case } else */ String outputStringForChar = m_charInfo.getOutputStringForChar(ch); if (null != outputStringForChar) { writer.write(outputStringForChar); } else if (escapingNotNeeded(ch)) { writer.write(ch); // no escaping in this case } else { writer.write("&#"); writer.write(Integer.toString(ch)); writer.write(';'); } } cleanStart = i + 1; } } // end of for() // are there any clean characters at the end of the array // that we haven't processed yet? if (cleanLength > 1) { // if the whole string can be written out as-is do so // otherwise write out the clean chars at the end of the // array if (cleanStart == 0) writer.write(string); else writer.write(chars, cleanStart, cleanLength); } else if (cleanLength == 1) { // a little optimization for 1 clean character // (we could have let the previous if(...) handle them all) writer.write(ch); } } /** * Receive notification of character data. * *

The Parser will call this method to report each chunk of * character data. SAX parsers may return all contiguous character * data in a single chunk, or they may split it into several * chunks; however, all of the characters in any single event * must come from the same external entity, so that the Locator * provides useful information.

* *

The application must not attempt to read from the array * outside of the specified range.

* *

Note that some parsers will report whitespace using the * ignorableWhitespace() method rather than this one (validating * parsers must do so).

* * @param chars The characters from the XML document. * @param start The start position in the array. * @param length The number of characters to read from the array. * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * @see #ignorableWhitespace * @see org.xml.sax.Locator * * @throws org.xml.sax.SAXException */ public final void characters(char chars[], int start, int length) throws org.xml.sax.SAXException { if (m_elemContext.m_isRaw) { try { if (m_elemContext.m_startTagOpen) { closeStartTag(); m_elemContext.m_startTagOpen = false; } m_ispreserve = true; // With m_ispreserve just set true it looks like shouldIndent() // will always return false, so drop any possible indentation. // if (shouldIndent()) // indent(); // writer.write(""); // time to generate characters event if (m_tracer != null) super.fireCharEvent(chars, start, length); return; } catch (IOException ioe) { throw new org.xml.sax.SAXException( Utils.messages.createMessage( MsgKey.ER_OIERROR, null), ioe); //"IO error", ioe); } } else { super.characters(chars, start, length); } } /** * Receive notification of cdata. * *

The Parser will call this method to report each chunk of * character data. SAX parsers may return all contiguous character * data in a single chunk, or they may split it into several * chunks; however, all of the characters in any single event * must come from the same external entity, so that the Locator * provides useful information.

* *

The application must not attempt to read from the array * outside of the specified range.

* *

Note that some parsers will report whitespace using the * ignorableWhitespace() method rather than this one (validating * parsers must do so).

* * @param ch The characters from the XML document. * @param start The start position in the array. * @param length The number of characters to read from the array. * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * @see #ignorableWhitespace * @see org.xml.sax.Locator * * @throws org.xml.sax.SAXException */ public final void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException { if ((null != m_elemContext.m_elementName) && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) { try { if (m_elemContext.m_startTagOpen) { closeStartTag(); m_elemContext.m_startTagOpen = false; } m_ispreserve = true; if (shouldIndent()) indent(); // writer.write(ch, start, length); writeNormalizedChars(ch, start, length, true, m_lineSepUse); } catch (IOException ioe) { throw new org.xml.sax.SAXException( Utils.messages.createMessage( MsgKey.ER_OIERROR, null), ioe); //"IO error", ioe); } } else { super.cdata(ch, start, length); } } /** * Receive notification of a processing instruction. * * @param target The processing instruction target. * @param data The processing instruction data, or null if * none was supplied. * @throws org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * * @throws org.xml.sax.SAXException */ public void processingInstruction(String target, String data) throws org.xml.sax.SAXException { // Process any pending starDocument and startElement first. flushPending(); // Use a fairly nasty hack to tell if the next node is supposed to be // unescaped text. if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) { startNonEscaping(); } else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) { endNonEscaping(); } else { try { if (m_elemContext.m_startTagOpen) { closeStartTag(); m_elemContext.m_startTagOpen = false; } else if (m_needToCallStartDocument) startDocumentInternal(); if (shouldIndent()) indent(); final java.io.Writer writer = m_writer; //writer.write(" 0 && !Character.isSpaceChar(data.charAt(0))) writer.write(' '); //writer.write(data + ">"); // different from XML writer.write(data); // different from XML writer.write('>'); // different from XML // Always output a newline char if not inside of an // element. The whitespace is not significant in that // case. if (m_elemContext.m_currentElemDepth <= 0) outputLineSep(); m_startNewLine = true; } catch(IOException e) { throw new SAXException(e); } } // now generate the PI event if (m_tracer != null) super.fireEscapingEvent(target, data); } /** * Receive notivication of a entityReference. * * @param name non-null reference to entity name string. * * @throws org.xml.sax.SAXException */ public final void entityReference(String name) throws org.xml.sax.SAXException { try { final java.io.Writer writer = m_writer; writer.write('&'); writer.write(name); writer.write(';'); } catch(IOException e) { throw new SAXException(e); } } /** * @see ExtendedContentHandler#endElement(String) */ public final void endElement(String elemName) throws SAXException { endElement(null, null, elemName); } /** * Process the attributes, which means to write out the currently * collected attributes to the writer. The attributes are not * cleared by this method * * @param writer the writer to write processed attributes to. * @param nAttrs the number of attributes in m_attributes * to be processed * * @throws org.xml.sax.SAXException */ public void processAttributes(java.io.Writer writer, int nAttrs) throws IOException,SAXException { /* * process the collected attributes */ for (int i = 0; i < nAttrs; i++) { processAttribute( writer, m_attributes.getQName(i), m_attributes.getValue(i), m_elemContext.m_elementDesc); } } /** * For the enclosing elements starting tag write out out any attributes * followed by ">" * *@throws org.xml.sax.SAXException */ protected void closeStartTag() throws SAXException { try { // finish processing attributes, time to fire off the start element event if (m_tracer != null) super.fireStartElem(m_elemContext.m_elementName); int nAttrs = m_attributes.getLength(); if (nAttrs>0) { processAttributes(m_writer, nAttrs); // clear attributes object for re-use with next element m_attributes.clear(); } m_writer.write('>'); /* whether Xalan or XSLTC, we have the prefix mappings now, so * lets determine if the current element is specified in the cdata- * section-elements list. */ if (m_cdataSectionElements != null) m_elemContext.m_isCdataSection = isCdataSection(); if (m_doIndent) { m_isprevtext = false; m_preserves.push(m_ispreserve); } } catch(IOException e) { throw new SAXException(e); } } /** * Initialize the serializer with the specified output stream and output * format. Must be called before calling any of the serialize methods. * * @param output The output stream to use * @param format The output format * @throws UnsupportedEncodingException The encoding specified in the * output format is not supported */ protected synchronized void init(OutputStream output, Properties format) throws UnsupportedEncodingException { if (null == format) { format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML); } super.init(output,format, false); } /** * Specifies an output stream to which the document should be * serialized. This method should not be called while the * serializer is in the process of serializing a document. *

* The encoding specified in the output properties is used, or * if no encoding was specified, the default for the selected * output method. * * @param output The output stream */ public void setOutputStream(OutputStream output) { try { Properties format; if (null == m_format) format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML); else format = m_format; init(output, format, true); } catch (UnsupportedEncodingException uee) { // Should have been warned in init, I guess... } } /** * This method is used when a prefix/uri namespace mapping * is indicated after the element was started with a * startElement() and before and endElement(). * startPrefixMapping(prefix,uri) would be used before the * startElement() call. * @param uri the URI of the namespace * @param prefix the prefix associated with the given URI. * * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) */ public void namespaceAfterStartElement(String prefix, String uri) throws SAXException { // hack for XSLTC with finding URI for default namespace if (m_elemContext.m_elementURI == null) { String prefix1 = getPrefixPart(m_elemContext.m_elementName); if (prefix1 == null && EMPTYSTRING.equals(prefix)) { // the elements URI is not known yet, and it // doesn't have a prefix, and we are currently // setting the uri for prefix "", so we have // the uri for the element... lets remember it m_elemContext.m_elementURI = uri; } } startPrefixMapping(prefix,uri,false); } public void startDTD(String name, String publicId, String systemId) throws SAXException { m_inDTD = true; super.startDTD(name, publicId, systemId); } /** * Report the end of DTD declarations. * @throws org.xml.sax.SAXException The application may raise an exception. * @see #startDTD */ public void endDTD() throws org.xml.sax.SAXException { m_inDTD = false; /* for ToHTMLStream the DOCTYPE is entirely output in the * startDocumentInternal() method, so don't do anything here */ } /** * This method does nothing. */ public void attributeDecl( String eName, String aName, String type, String valueDefault, String value) throws SAXException { // The internal DTD subset is not serialized by the ToHTMLStream serializer } /** * This method does nothing. */ public void elementDecl(String name, String model) throws SAXException { // The internal DTD subset is not serialized by the ToHTMLStream serializer } /** * This method does nothing. */ public void internalEntityDecl(String name, String value) throws SAXException { // The internal DTD subset is not serialized by the ToHTMLStream serializer } /** * This method does nothing. */ public void externalEntityDecl( String name, String publicId, String systemId) throws SAXException { // The internal DTD subset is not serialized by the ToHTMLStream serializer } /** * This method is used to add an attribute to the currently open element. * The caller has guaranted that this attribute is unique, which means that it * not been seen before and will not be seen again. * * @param name the qualified name of the attribute * @param value the value of the attribute which can contain only * ASCII printable characters characters in the range 32 to 127 inclusive. * @param flags the bit values of this integer give optimization information. */ public void addUniqueAttribute(String name, String value, int flags) throws SAXException { try { final java.io.Writer writer = m_writer; if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) { // "flags" has indicated that the characters // '>' '<' '&' and '"' are not in the value and // m_htmlcharInfo has recorded that there are no other // entities in the range 0 to 127 so we write out the // value directly writer.write(' '); writer.write(name); writer.write("=\""); writer.write(value); writer.write('"'); } else if ( (flags & HTML_ATTREMPTY) > 0 && (value.length() == 0 || value.equalsIgnoreCase(name))) { writer.write(' '); writer.write(name); } else { writer.write(' '); writer.write(name); writer.write("=\""); if ((flags & HTML_ATTRURL) > 0) { writeAttrURI(writer, value, m_specialEscapeURLs); } else { writeAttrString(writer, value, this.getEncoding()); } writer.write('"'); } } catch (IOException e) { throw new SAXException(e); } } public void comment(char ch[], int start, int length) throws SAXException { // The internal DTD subset is not serialized by the ToHTMLStream serializer if (m_inDTD) return; super.comment(ch, start, length); } public boolean reset() { boolean ret = super.reset(); if (!ret) return false; initToHTMLStream(); return true; } private void initToHTMLStream() { // m_elementDesc = null; m_inBlockElem = false; m_inDTD = false; // m_isRawStack.clear(); m_omitMetaTag = false; m_specialEscapeURLs = true; } static class Trie { /** * A digital search trie for 7-bit ASCII text * The API is a subset of java.util.Hashtable * The key must be a 7-bit ASCII string * The value may be any Java Object * One can get an object stored in a trie from its key, * but the search is either case sensitive or case * insensitive to the characters in the key, and this * choice of sensitivity or insensitivity is made when * the Trie is created, before any objects are put in it. * * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. * It exists to cut the serializers dependancy on that package. * * @xsl.usage internal */ /** Size of the m_nextChar array. */ public static final int ALPHA_SIZE = 128; /** The root node of the tree. */ final Node m_Root; /** helper buffer to convert Strings to char arrays */ private char[] m_charBuffer = new char[0]; /** true if the search for an object is lower case only with the key */ private final boolean m_lowerCaseOnly; /** * Construct the trie that has a case insensitive search. */ public Trie() { m_Root = new Node(); m_lowerCaseOnly = false; } /** * Construct the trie given the desired case sensitivity with the key. * @param lowerCaseOnly true if the search keys are to be loser case only, * not case insensitive. */ public Trie(boolean lowerCaseOnly) { m_Root = new Node(); m_lowerCaseOnly = lowerCaseOnly; } /** * Put an object into the trie for lookup. * * @param key must be a 7-bit ASCII string * @param value any java object. * * @return The old object that matched key, or null. */ public Object put(String key, Object value) { final int len = key.length(); if (len > m_charBuffer.length) { // make the biggest buffer ever needed in get(String) m_charBuffer = new char[len]; } Node node = m_Root; for (int i = 0; i < len; i++) { Node nextNode = node.m_nextChar[Character.toLowerCase(key.charAt(i))]; if (nextNode != null) { node = nextNode; } else { for (; i < len; i++) { Node newNode = new Node(); if (m_lowerCaseOnly) { // put this value into the tree only with a lower case key node.m_nextChar[Character.toLowerCase( key.charAt(i))] = newNode; } else { // put this value into the tree with a case insensitive key node.m_nextChar[Character.toUpperCase( key.charAt(i))] = newNode; node.m_nextChar[Character.toLowerCase( key.charAt(i))] = newNode; } node = newNode; } break; } } Object ret = node.m_Value; node.m_Value = value; return ret; } /** * Get an object that matches the key. * * @param key must be a 7-bit ASCII string * * @return The object that matches the key, or null. */ public Object get(final String key) { final int len = key.length(); /* If the name is too long, we won't find it, this also keeps us * from overflowing m_charBuffer */ if (m_charBuffer.length < len) return null; Node node = m_Root; switch (len) // optimize the look up based on the number of chars { // case 0 looks silly, but the generated bytecode runs // faster for lookup of elements of length 2 with this in // and a fair bit faster. Don't know why. case 0 : { return null; } case 1 : { final char ch = key.charAt(0); if (ch < ALPHA_SIZE) { node = node.m_nextChar[ch]; if (node != null) return node.m_Value; } return null; } // comment out case 2 because the default is faster // case 2 : // { // final char ch0 = key.charAt(0); // final char ch1 = key.charAt(1); // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) // { // node = node.m_nextChar[ch0]; // if (node != null) // { // // if (ch1 < ALPHA_SIZE) // { // node = node.m_nextChar[ch1]; // if (node != null) // return node.m_Value; // } // } // } // return null; // } default : { for (int i = 0; i < len; i++) { // A thread-safe way to loop over the characters final char ch = key.charAt(i); if (ALPHA_SIZE <= ch) { // the key is not 7-bit ASCII so we won't find it here return null; } node = node.m_nextChar[ch]; if (node == null) return null; } return node.m_Value; } } } /** * The node representation for the trie. * @xsl.usage internal */ private class Node { /** * Constructor, creates a Node[ALPHA_SIZE]. */ Node() { m_nextChar = new Node[ALPHA_SIZE]; m_Value = null; } /** The next nodes. */ final Node m_nextChar[]; /** The value. */ Object m_Value; } /** * Construct the trie from another Trie. * Both the existing Trie and this new one share the same table for * lookup, and it is assumed that the table is fully populated and * not changing anymore. * * @param existingTrie the Trie that this one is a copy of. */ public Trie(Trie existingTrie) { // copy some fields from the existing Trie into this one. m_Root = existingTrie.m_Root; m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; // get a buffer just big enough to hold the longest key in the table. int max = existingTrie.getLongestKeyLength(); m_charBuffer = new char[max]; } /** * Get an object that matches the key. * This method is faster than get(), but is not thread-safe. * * @param key must be a 7-bit ASCII string * * @return The object that matches the key, or null. */ public Object get2(final String key) { final int len = key.length(); /* If the name is too long, we won't find it, this also keeps us * from overflowing m_charBuffer */ if (m_charBuffer.length < len) return null; Node node = m_Root; switch (len) // optimize the look up based on the number of chars { // case 0 looks silly, but the generated bytecode runs // faster for lookup of elements of length 2 with this in // and a fair bit faster. Don't know why. case 0 : { return null; } case 1 : { final char ch = key.charAt(0); if (ch < ALPHA_SIZE) { node = node.m_nextChar[ch]; if (node != null) return node.m_Value; } return null; } default : { /* Copy string into array. This is not thread-safe because * it modifies the contents of m_charBuffer. If multiple * threads were to use this Trie they all would be * using this same array (not good). So this * method is not thread-safe, but it is faster because * converting to a char[] and looping over elements of * the array is faster than a String's charAt(i). */ key.getChars(0, len, m_charBuffer, 0); for (int i = 0; i < len; i++) { final char ch = m_charBuffer[i]; if (ALPHA_SIZE <= ch) { // the key is not 7-bit ASCII so we won't find it here return null; } node = node.m_nextChar[ch]; if (node == null) return null; } return node.m_Value; } } } /** * Get the length of the longest key used in the table. */ public int getLongestKeyLength() { return m_charBuffer.length; } } }