< prev index next >

src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java

Print this page

        

@@ -27,16 +27,20 @@
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
 import java.io.Writer;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.EmptyStackException;
 import java.util.Enumeration;
-import java.util.Iterator;
 import java.util.Properties;
+import java.util.Queue;
 import java.util.Set;
 import java.util.StringTokenizer;
-import java.util.ArrayList;
 import javax.xml.transform.ErrorListener;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerException;
 import org.w3c.dom.Node;

@@ -87,10 +91,33 @@
      * encoding.
      */
     Object m_charToByteConverter = null;
 
     /**
+     * Used to buffer the text nodes and the entity reference nodes if
+     * indentation is on.
+     */
+    protected CharacterBuffer m_charactersBuffer = new CharacterBuffer();
+
+    /**
+     * Used to decide if a text node is pretty-printed with indentation.
+     * If m_childNodeNum > 1, the text node will be indented.
+     *
+     */
+    protected Deque<Integer> m_childNodeNumStack = new ArrayDeque<>();
+
+    protected int m_childNodeNum = 0;
+
+    /**
+     * Used to handle xml:space attribute
+     *
+     */
+    protected BoolStack m_preserveSpaces = new BoolStack();
+
+    protected boolean m_ispreserveSpace = false;
+
+    /**
      * Stack to keep track of whether or not we need to
      * preserve whitespace.
      *
      * Used to push/pop values used for the field m_ispreserve, but
      * m_ispreserve is only relevant if m_doIndent is true.

@@ -765,15 +792,13 @@
     protected void indent(int depth) throws IOException
     {
 
         if (m_startNewLine)
             outputLineSep();
-        /* For m_indentAmount > 0 this extra test might be slower
-         * but Xalan's default value is 0, so this extra test
-         * will run faster in that situation.
+        /*
+         * Default value is 4, so printSpace directly.
          */
-        if (m_indentAmount > 0)
             printSpace(depth * m_indentAmount);
 
     }
 
     /**

@@ -1232,11 +1257,10 @@
      * @throws org.xml.sax.SAXException
      */
     protected void cdata(char ch[], int start, final int length)
         throws org.xml.sax.SAXException
     {
-
         try
         {
             final int old_start = start;
             if (m_elemContext.m_startTagOpen)
             {

@@ -1321,11 +1345,11 @@
      */
     protected void charactersRaw(char ch[], int start, int length)
         throws org.xml.sax.SAXException
     {
 
-        if (m_inEntityRef)
+        if (isInEntityRef())
             return;
         try
         {
             if (m_elemContext.m_startTagOpen)
             {

@@ -1376,13 +1400,15 @@
     {
         // It does not make sense to continue with rest of the method if the number of
         // characters to read from array is 0.
         // Section 7.6.1 of XSLT 1.0 (http://www.w3.org/TR/xslt#value-of) suggest no text node
         // is created if string is empty.
-        if (length == 0 || (m_inEntityRef && !m_expandDTDEntities))
+        if (length == 0 || (isInEntityRef()))
             return;
-        if (m_elemContext.m_startTagOpen)
+
+        final boolean shouldFormat = shouldFormatOutput();
+        if (m_elemContext.m_startTagOpen && !shouldFormat)
         {
             closeStartTag();
             m_elemContext.m_startTagOpen = false;
         }
         else if (m_needToCallStartDocument)

@@ -1405,25 +1431,53 @@
         // the check with _escaping is a bit of a hack for XLSTC
 
         if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping))
         {
             charactersRaw(chars, start, length);
-
+            m_isprevtext = true;
             // time to fire off characters generation event
             if (m_tracer != null)
                 super.fireCharEvent(chars, start, length);
 
             return;
         }
 
-        if (m_elemContext.m_startTagOpen)
+        if (m_elemContext.m_startTagOpen && !shouldFormat)
         {
             closeStartTag();
             m_elemContext.m_startTagOpen = false;
         }
 
+        if (shouldFormat) {
+            m_charactersBuffer.addText(chars, start, length);
+        } else {
+            outputCharacters(chars, start, length);
+        }
 
+        // time to fire off characters generation event
+        if (m_tracer != null)
+            super.fireCharEvent(chars, start, length);
+    }
+
+
+    /**
+     * This method checks if the content in current element should be formatted.
+     *
+     * @return True if the content should be formatted.
+     */
+    protected boolean shouldFormatOutput() {
+        return !m_ispreserveSpace && m_doIndent;
+    }
+
+    /**
+     * Write out the characters.
+     *
+     * @param chars The characters of the text.
+     * @param start The start position in the char array.
+     * @param length The number of characters from the char array.
+     */
+    private void outputCharacters(final char chars[], final int start, final int length) throws SAXException {
         try
         {
             int i;
             char ch1;
             int startClean;

@@ -1512,15 +1566,58 @@
         }
         catch (IOException e)
         {
             throw new SAXException(e);
         }
+    }
 
-        // time to fire off characters generation event
-        if (m_tracer != null)
-            super.fireCharEvent(chars, start, length);
+    /**
+     * Used to flush the buffered characters when indentation is on, this method
+     * will be called when the next node is traversed.
+     *
+     */
+    final protected void flushCharactersBuffer() throws SAXException {
+        try {
+            if (shouldFormatOutput() && m_charactersBuffer.hasContent()) {
+                if (m_elemContext.m_startTagOpen) {
+                    closeStartTag();
+                    m_elemContext.m_startTagOpen = false;
+                }
+
+                if (m_elemContext.m_isCdataSection) {
+                    /*
+                     * due to cdata-section-elements atribute, we need this as
+                     * cdata
+                     */
+                    char[] chars = m_charactersBuffer.toChars();
+                    cdata(chars, 0, chars.length);
+                    return;
+                }
+
+                m_childNodeNum++;
+                if (shouldIndentForText()) {
+                    indent();
+                    m_startNewLine = true;
     }
+                m_charactersBuffer.flush();
+            }
+        } catch (IOException e) {
+            throw new SAXException(e);
+        } finally {
+            m_charactersBuffer.clear();
+        }
+    }
+
+    /**
+     * True if should indent in flushCharactersBuffer method.
+     * This method may be overridden in sub-class.
+     *
+     */
+    protected boolean shouldIndentForText() {
+        return (shouldIndent() && m_childNodeNum > 1);
+    }
+
     /**
      * This method checks if a given character is between C0 or C1 range
      * of Control characters.
      * This method is added to support Control Characters for XML 1.1
      * If a given character is TAB (0x09), LF (0x0A) or CR (0x0D), this method

@@ -1608,11 +1705,11 @@
      *
      * @throws org.xml.sax.SAXException
      */
     public void characters(String s) throws org.xml.sax.SAXException
     {
-        if (m_inEntityRef && !m_expandDTDEntities)
+        if (isInEntityRef())
             return;
         final int length = s.length();
         if (length > m_charsBuff.length)
         {
             m_charsBuff = new char[length * 2 + 1];

@@ -1756,13 +1853,16 @@
         String localName,
         String name,
         Attributes atts)
         throws org.xml.sax.SAXException
     {
-        if (m_inEntityRef)
+        if (isInEntityRef())
             return;
 
+        m_childNodeNum++;
+        flushCharactersBuffer();
+
         if (m_needToCallStartDocument)
         {
             startDocumentInternal();
             m_needToCallStartDocument = false;
         }

@@ -1810,10 +1910,16 @@
 
         // process the attributes now, because after this SAX call they might be gone
         if (atts != null)
             addAttributes(atts);
 
+        m_ispreserveSpace = m_preserveSpaces.peekOrFalse();
+        m_preserveSpaces.push(m_ispreserveSpace);
+
+        m_childNodeNumStack.push(m_childNodeNum);
+        m_childNodeNum = 0;
+
         m_elemContext = m_elemContext.push(namespaceURI,localName,name);
         m_isprevtext = false;
 
         if (m_tracer != null){
             firePseudoAttributes();

@@ -2017,13 +2123,14 @@
      */
     public void endElement(String namespaceURI, String localName, String name)
         throws org.xml.sax.SAXException
     {
 
-        if (m_inEntityRef)
+        if (isInEntityRef())
             return;
 
+        flushCharactersBuffer();
         // namespaces declared at the current depth are no longer valid
         // so get rid of them
         m_prefixMap.popNamespaces(m_elemContext.m_currentElemDepth, null);
 
         try

@@ -2053,11 +2160,11 @@
             else
             {
                 if (m_cdataTagOpen)
                     closeCDATA();
 
-                if (shouldIndent())
+                if (shouldIndent() && (m_childNodeNum > 1 || !m_isprevtext))
                     indent(m_elemContext.m_currentElemDepth - 1);
                 writer.write('<');
                 writer.write('/');
                 writer.write(name);
                 writer.write('>');

@@ -2071,10 +2178,13 @@
         if (!m_elemContext.m_startTagOpen && m_doIndent)
         {
             m_ispreserve = m_preserves.isEmpty() ? false : m_preserves.pop();
         }
 
+        m_ispreserveSpace = m_preserveSpaces.popAndTop();
+        m_childNodeNum = m_childNodeNumStack.pop();
+
         m_isprevtext = false;
 
         // fire off the end element event
         if (m_tracer != null)
             super.fireEndElem(name);

@@ -2206,12 +2316,14 @@
     public void comment(char ch[], int start, int length)
         throws org.xml.sax.SAXException
     {
 
         int start_old = start;
-        if (m_inEntityRef)
+        if (isInEntityRef())
             return;
+        m_childNodeNum++;
+        flushCharactersBuffer();
         if (m_elemContext.m_startTagOpen)
         {
             closeStartTag();
             m_elemContext.m_startTagOpen = false;
         }

@@ -2387,10 +2499,13 @@
      * @throws org.xml.sax.SAXException The application may raise an exception.
      * @see #endCDATA
      */
     public void startCDATA() throws org.xml.sax.SAXException
     {
+        m_childNodeNum++;
+        flushCharactersBuffer();
+
         m_cdataStartCalled = true;
     }
 
     /**
      * Report the beginning of an entity.

@@ -2410,21 +2525,34 @@
     public void startEntity(String name) throws org.xml.sax.SAXException
     {
         if (name.equals("[dtd]"))
             m_inExternalDTD = true;
 
-        if (!m_expandDTDEntities && !m_inExternalDTD) {
-            /* Only leave the entity as-is if
-             * we've been told not to expand them
-             * and this is not the magic [dtd] name.
+        // if this is not the magic [dtd] name
+        if (!m_inExternalDTD) {
+            // if it's not in nested entity reference
+            if (!isInEntityRef()) {
+                if (shouldFormatOutput()) {
+                    m_charactersBuffer.addEntityReference(name);
+                } else {
+                    outputEntityReference(name);
+                }
+            }
+            m_inEntityRef++;
+        }
+    }
+
+    /**
+     * Write out the entity reference with the form as "&amp;entityName;".
+     *
+     * @param name The name of the entity.
              */
+    private void outputEntityReference(String name) throws SAXException {
             startNonEscaping();
             characters("&" + name + ';');
             endNonEscaping();
-        }
-
-        m_inEntityRef = true;
+        m_isprevtext = true;
     }
 
     /**
      * For the enclosing elements starting tag write out
      * out any attributes followed by ">"

@@ -2521,11 +2649,11 @@
      *
      * @return True if an indent should occur.
      */
     protected boolean shouldIndent()
     {
-        return m_doIndent && (!m_ispreserve && !m_isprevtext) && (m_elemContext.m_currentElemDepth > 0 || m_isStandalone);
+        return shouldFormatOutput() && (m_elemContext.m_currentElemDepth > 0 || m_isStandalone);
     }
 
     /**
      * Searches for the list of qname properties with the specified key in the
      * property list. If the key is not found in this property list, the default

@@ -2813,10 +2941,37 @@
         String rawName,
         String type,
         String value,
         boolean xslAttribute)
     {
+        if (m_charactersBuffer.isAnyCharactersBuffered()) {
+            /*
+             * If stylesheet includes xsl:copy-of an attribute node, XSLTC will
+             * fire an addAttribute event. When a text node is handling in
+             * ToStream, addAttribute has no effect. But closeStartTag call is
+             * delayed to flushCharactersBuffer() method if the text node is
+             * buffered, so here we ignore the attribute to avoid corrupting the
+             * start tag content.
+             *
+             */
+            return m_attributes.getIndex(rawName) < 0;
+        } else {
+            return doAddAttributeAlways(uri, localName, rawName, type, value, xslAttribute);
+        }
+    }
+
+    /**
+     * Does really add the attribute to the set of attributes.
+     */
+    private boolean doAddAttributeAlways(
+        String uri,
+        String localName,
+        String rawName,
+        String type,
+        String value,
+        boolean xslAttribute)
+    {
         boolean was_added;
         int index;
         //if (uri == null || localName == null || uri.length() == 0)
             index = m_attributes.getIndex(rawName);
         // Don't use 'localName' as it gives incorrect value, rely only on 'rawName'

@@ -2921,16 +3076,30 @@
                 {
                     // TODO Auto-generated catch block
                     e.printStackTrace();
                 }
             }
+
             m_attributes.addAttribute(uri, localName, rawName, type, value);
             was_added = true;
             if (m_tracer != null){
                 firePseudoAttributes();
             }
         }
+
+        if (rawName.equals("xml:space")) {
+            if (value.equals("preserve")) {
+                m_ispreserveSpace = true;
+                if (m_preserveSpaces.size() > 0)
+                    m_preserveSpaces.setTop(m_ispreserveSpace);
+            } else if (value.equals("default")) {
+                m_ispreserveSpace = false;
+                if (m_preserveSpaces.size() > 0)
+                    m_preserveSpaces.setTop(m_ispreserveSpace);
+            }
+        }
+
         return was_added;
     }
 
     /**
      * To fire off the pseudo characters of attributes, as they currently

@@ -3057,14 +3226,18 @@
          this.m_escaping = true;
          // Leave m_format alone for now - Brian M.
          // this.m_format = null;
          this.m_inDoctype = false;
          this.m_ispreserve = false;
-         this.m_ispreserve = false;
+         this.m_preserves.clear();
+         this.m_ispreserveSpace = false;
+         this.m_preserveSpaces.clear();
+         this.m_childNodeNum = 0;
+         this.m_childNodeNumStack.clear();
+         this.m_charactersBuffer.clear();
          this.m_isprevtext = false;
          this.m_isUTF8 = false; //  ?? used anywhere ??
-         this.m_preserves.clear();
          this.m_shouldFlush = true;
          this.m_spaceBeforeClose = false;
          this.m_startNewLine = false;
          this.m_lineSepUse = true;
          // DON'T SET THE WRITER TO NULL, IT MAY BE REUSED !!

@@ -3236,10 +3409,133 @@
             System.arraycopy(m_values, 0, newVector, 0, m_index + 1);
             m_values = newVector;
         }
     }
 
+    /**
+     * This inner class is used to buffer the text nodes and the entity
+     * reference nodes if indentation is on. There is only one CharacterBuffer
+     * instance in ToStream, it contains a queue of GenericCharacters,
+     * GenericCharacters can be a text node or an entity reference node. The
+     * text nodes and entity reference nodes are joined together and then are
+     * flushed.
+     */
+    private class CharacterBuffer {
+        /**
+         * GenericCharacters is immutable.
+         */
+        private abstract class GenericCharacters {
+            /**
+             * @return True if having any character other than whitespace or
+             *         line feed.
+             */
+            abstract boolean hasContent();
+
+            abstract void flush() throws SAXException;
+
+            /**
+             * Converts this GenericCharacters to a new character array.
+             */
+            abstract char[] toChars();
+        }
+
+        private Queue<GenericCharacters> bufferedCharacters = new ArrayDeque<>();
+
+        /**
+         * Append a text node to the buffer.
+         */
+        public void addText(final char chars[], final int start, final int length) {
+            bufferedCharacters.add(new GenericCharacters() {
+                char[] text;
+
+                {
+                    text = Arrays.copyOfRange(chars, start, start + length);
+                }
+
+                boolean hasContent() {
+                    for (int i = 0; i < text.length; i++) {
+                        if (!isWhiteSpace(text[i])) {
+                            return true;
+                        }
+                    }
+                    return false;
+                }
+
+                void flush() throws SAXException {
+                    outputCharacters(text, 0, text.length);
+                }
+
+                char[] toChars() {
+                    return text;
+                }
+                
+                boolean isWhiteSpace(char ch) {
+                    return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r';
+                }
+
+            });
+        }
+
+        /**
+         * Append an entity reference to the buffer.
+         */
+        public void addEntityReference(String entityName) {
+            bufferedCharacters.add(new GenericCharacters() {
+                boolean hasContent() {
+                    return true;
+                }
+
+                void flush() throws SAXException {
+                    outputEntityReference(entityName);
+                }
+
+                char[] toChars() {
+                    return ("&" + entityName + ";").toCharArray();
+                }
+            });
+        }
+
+        /**
+         * @return True if any GenericCharacters is already buffered.
+         */
+        public boolean isAnyCharactersBuffered() {
+            return !bufferedCharacters.isEmpty();
+        }
+
+        /**
+         * @return True if any buffered GenericCharacters has content.
+         */
+        public boolean hasContent() {
+            return bufferedCharacters.stream().anyMatch(GenericCharacters::hasContent);
+        }
+
+        /**
+         * Flush all buffered GenericCharacters.
+         */
+        public void flush() throws SAXException {
+            GenericCharacters element;
+            while ((element = bufferedCharacters.poll()) != null)
+                element.flush();
+        }
+
+        /**
+         * Converts all buffered GenericCharacters to a new character array.
+         */
+        public char[] toChars() {
+            return bufferedCharacters.stream().map(GenericCharacters::toChars)
+                    .collect(StringBuilder::new, StringBuilder::append, StringBuilder::append).toString()
+                    .toCharArray();
+        }
+
+        /**
+         * Clear the buffer.
+         */
+        public void clear() {
+            bufferedCharacters.clear();
+        }
+    }
+
     // Implement DTDHandler
     /**
      * If this method is called, the serializer is used as a
      * DTDHandler, which changes behavior how the serializer
      * handles document entities.
< prev index next >