--- old/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java 2016-12-15 18:42:10.061346301 +0800 +++ new/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java 2016-12-15 18:42:09.934409801 +0800 @@ -29,12 +29,16 @@ import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Deque; +import java.util.EmptyStackException; import java.util.Enumeration; -import java.util.Iterator; import java.util.Properties; +import java.util.Queue; import java.util.Set; import java.util.StringTokenizer; -import java.util.ArrayList; import javax.xml.transform.ErrorListener; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; @@ -89,6 +93,29 @@ Object m_charToByteConverter = null; /** + * Used to buffer the text nodes and the entity reference nodes if + * indentation is on. + */ + protected CharacterBuffer m_charactersBuffer = new CharacterBuffer(); + + /** + * Used to decide if a text node is pretty-printed with indentation. + * If m_childNodeNum > 1, the text node will be indented. + * + */ + protected Deque m_childNodeNumStack = new ArrayDeque<>(); + + protected int m_childNodeNum = 0; + + /** + * Used to handle xml:space attribute + * + */ + protected BoolStack m_preserveSpaces = new BoolStack(); + + protected boolean m_ispreserveSpace = false; + + /** * Stack to keep track of whether or not we need to * preserve whitespace. * @@ -767,12 +794,10 @@ if (m_startNewLine) outputLineSep(); - /* For m_indentAmount > 0 this extra test might be slower - * but Xalan's default value is 0, so this extra test - * will run faster in that situation. + /* + * Default value is 4, so printSpace directly. */ - if (m_indentAmount > 0) - printSpace(depth * m_indentAmount); + printSpace(depth * m_indentAmount); } @@ -1234,7 +1259,6 @@ protected void cdata(char ch[], int start, final int length) throws org.xml.sax.SAXException { - try { final int old_start = start; @@ -1323,7 +1347,7 @@ throws org.xml.sax.SAXException { - if (m_inEntityRef) + if (isInEntityRef()) return; try { @@ -1378,9 +1402,11 @@ // characters to read from array is 0. // Section 7.6.1 of XSLT 1.0 (http://www.w3.org/TR/xslt#value-of) suggest no text node // is created if string is empty. - if (length == 0 || (m_inEntityRef && !m_expandDTDEntities)) + if (length == 0 || (isInEntityRef())) return; - if (m_elemContext.m_startTagOpen) + + final boolean shouldFormat = shouldFormatOutput(); + if (m_elemContext.m_startTagOpen && !shouldFormat) { closeStartTag(); m_elemContext.m_startTagOpen = false; @@ -1407,7 +1433,7 @@ if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping)) { charactersRaw(chars, start, length); - + m_isprevtext = true; // time to fire off characters generation event if (m_tracer != null) super.fireCharEvent(chars, start, length); @@ -1415,13 +1441,41 @@ return; } - if (m_elemContext.m_startTagOpen) + if (m_elemContext.m_startTagOpen && !shouldFormat) { closeStartTag(); m_elemContext.m_startTagOpen = false; } + if (shouldFormat) { + m_charactersBuffer.addText(chars, start, length); + } else { + outputCharacters(chars, start, length); + } + + // time to fire off characters generation event + if (m_tracer != null) + super.fireCharEvent(chars, start, length); + } + + /** + * This method checks if the content in current element should be formatted. + * + * @return True if the content should be formatted. + */ + protected boolean shouldFormatOutput() { + return !m_ispreserveSpace && m_doIndent; + } + + /** + * Write out the characters. + * + * @param chars The characters of the text. + * @param start The start position in the char array. + * @param length The number of characters from the char array. + */ + private void outputCharacters(final char chars[], final int start, final int length) throws SAXException { try { int i; @@ -1459,8 +1513,8 @@ m_ispreserve = true; -// int lengthClean; // number of clean characters in a row -// final boolean[] isAsciiClean = m_charInfo.getASCIIClean(); +// int lengthClean; // number of clean characters in a row +// final boolean[] isAsciiClean = m_charInfo.getASCIIClean(); final boolean isXML10 = XMLVERSION10.equals(getVersion()); // we've skipped the leading whitespace, now deal with the rest @@ -1514,11 +1568,54 @@ { throw new SAXException(e); } + } + + /** + * Used to flush the buffered characters when indentation is on, this method + * will be called when the next node is traversed. + * + */ + final protected void flushCharactersBuffer() throws SAXException { + try { + if (shouldFormatOutput() && m_charactersBuffer.hasContent()) { + if (m_elemContext.m_startTagOpen) { + closeStartTag(); + m_elemContext.m_startTagOpen = false; + } + + if (m_elemContext.m_isCdataSection) { + /* + * due to cdata-section-elements atribute, we need this as + * cdata + */ + char[] chars = m_charactersBuffer.toChars(); + cdata(chars, 0, chars.length); + return; + } + + m_childNodeNum++; + if (shouldIndentForText()) { + indent(); + m_startNewLine = true; + } + m_charactersBuffer.flush(); + } + } catch (IOException e) { + throw new SAXException(e); + } finally { + m_charactersBuffer.clear(); + } + } - // time to fire off characters generation event - if (m_tracer != null) - super.fireCharEvent(chars, start, length); + /** + * True if should indent in flushCharactersBuffer method. + * This method may be overridden in sub-class. + * + */ + protected boolean shouldIndentForText() { + return (shouldIndent() && m_childNodeNum > 1); } + /** * This method checks if a given character is between C0 or C1 range * of Control characters. @@ -1610,7 +1707,7 @@ */ public void characters(String s) throws org.xml.sax.SAXException { - if (m_inEntityRef && !m_expandDTDEntities) + if (isInEntityRef()) return; final int length = s.length(); if (length > m_charsBuff.length) @@ -1758,9 +1855,12 @@ Attributes atts) throws org.xml.sax.SAXException { - if (m_inEntityRef) + if (isInEntityRef()) return; + m_childNodeNum++; + flushCharactersBuffer(); + if (m_needToCallStartDocument) { startDocumentInternal(); @@ -1812,6 +1912,12 @@ if (atts != null) addAttributes(atts); + m_ispreserveSpace = m_preserveSpaces.peekOrFalse(); + m_preserveSpaces.push(m_ispreserveSpace); + + m_childNodeNumStack.push(m_childNodeNum); + m_childNodeNum = 0; + m_elemContext = m_elemContext.push(namespaceURI,localName,name); m_isprevtext = false; @@ -2019,9 +2125,10 @@ throws org.xml.sax.SAXException { - if (m_inEntityRef) + if (isInEntityRef()) return; + flushCharactersBuffer(); // namespaces declared at the current depth are no longer valid // so get rid of them m_prefixMap.popNamespaces(m_elemContext.m_currentElemDepth, null); @@ -2055,7 +2162,7 @@ if (m_cdataTagOpen) closeCDATA(); - if (shouldIndent()) + if (shouldIndent() && (m_childNodeNum > 1 || !m_isprevtext)) indent(m_elemContext.m_currentElemDepth - 1); writer.write('<'); writer.write('/'); @@ -2073,6 +2180,9 @@ m_ispreserve = m_preserves.isEmpty() ? false : m_preserves.pop(); } + m_ispreserveSpace = m_preserveSpaces.popAndTop(); + m_childNodeNum = m_childNodeNumStack.pop(); + m_isprevtext = false; // fire off the end element event @@ -2208,8 +2318,10 @@ { int start_old = start; - if (m_inEntityRef) + if (isInEntityRef()) return; + m_childNodeNum++; + flushCharactersBuffer(); if (m_elemContext.m_startTagOpen) { closeStartTag(); @@ -2389,6 +2501,9 @@ */ public void startCDATA() throws org.xml.sax.SAXException { + m_childNodeNum++; + flushCharactersBuffer(); + m_cdataStartCalled = true; } @@ -2412,17 +2527,30 @@ if (name.equals("[dtd]")) m_inExternalDTD = true; - if (!m_expandDTDEntities && !m_inExternalDTD) { - /* Only leave the entity as-is if - * we've been told not to expand them - * and this is not the magic [dtd] name. - */ - startNonEscaping(); - characters("&" + name + ';'); - endNonEscaping(); + // if this is not the magic [dtd] name + if (!m_inExternalDTD) { + // if it's not in nested entity reference + if (!isInEntityRef()) { + if (shouldFormatOutput()) { + m_charactersBuffer.addEntityReference(name); + } else { + outputEntityReference(name); + } + } + m_inEntityRef++; } + } - m_inEntityRef = true; + /** + * Write out the entity reference with the form as "&entityName;". + * + * @param name The name of the entity. + */ + private void outputEntityReference(String name) throws SAXException { + startNonEscaping(); + characters("&" + name + ';'); + endNonEscaping(); + m_isprevtext = true; } /** @@ -2523,7 +2651,7 @@ */ protected boolean shouldIndent() { - return m_doIndent && (!m_ispreserve && !m_isprevtext) && (m_elemContext.m_currentElemDepth > 0 || m_isStandalone); + return shouldFormatOutput() && (m_elemContext.m_currentElemDepth > 0 || m_isStandalone); } /** @@ -2815,10 +2943,37 @@ String value, boolean xslAttribute) { + if (m_charactersBuffer.isAnyCharactersBuffered()) { + /* + * If stylesheet includes xsl:copy-of an attribute node, XSLTC will + * fire an addAttribute event. When a text node is handling in + * ToStream, addAttribute has no effect. But closeStartTag call is + * delayed to flushCharactersBuffer() method if the text node is + * buffered, so here we ignore the attribute to avoid corrupting the + * start tag content. + * + */ + return m_attributes.getIndex(rawName) < 0; + } else { + return doAddAttributeAlways(uri, localName, rawName, type, value, xslAttribute); + } + } + + /** + * Does really add the attribute to the set of attributes. + */ + private boolean doAddAttributeAlways( + String uri, + String localName, + String rawName, + String type, + String value, + boolean xslAttribute) + { boolean was_added; int index; //if (uri == null || localName == null || uri.length() == 0) - index = m_attributes.getIndex(rawName); + index = m_attributes.getIndex(rawName); // Don't use 'localName' as it gives incorrect value, rely only on 'rawName' /*else { index = m_attributes.getIndex(uri, localName); @@ -2923,12 +3078,26 @@ e.printStackTrace(); } } + m_attributes.addAttribute(uri, localName, rawName, type, value); was_added = true; if (m_tracer != null){ firePseudoAttributes(); } } + + if (rawName.equals("xml:space")) { + if (value.equals("preserve")) { + m_ispreserveSpace = true; + if (m_preserveSpaces.size() > 0) + m_preserveSpaces.setTop(m_ispreserveSpace); + } else if (value.equals("default")) { + m_ispreserveSpace = false; + if (m_preserveSpaces.size() > 0) + m_preserveSpaces.setTop(m_ispreserveSpace); + } + } + return was_added; } @@ -3059,10 +3228,14 @@ // this.m_format = null; this.m_inDoctype = false; this.m_ispreserve = false; - this.m_ispreserve = false; + this.m_preserves.clear(); + this.m_ispreserveSpace = false; + this.m_preserveSpaces.clear(); + this.m_childNodeNum = 0; + this.m_childNodeNumStack.clear(); + this.m_charactersBuffer.clear(); this.m_isprevtext = false; this.m_isUTF8 = false; // ?? used anywhere ?? - this.m_preserves.clear(); this.m_shouldFlush = true; this.m_spaceBeforeClose = false; this.m_startNewLine = false; @@ -3238,6 +3411,129 @@ } } + /** + * This inner class is used to buffer the text nodes and the entity + * reference nodes if indentation is on. There is only one CharacterBuffer + * instance in ToStream, it contains a queue of GenericCharacters, + * GenericCharacters can be a text node or an entity reference node. The + * text nodes and entity reference nodes are joined together and then are + * flushed. + */ + private class CharacterBuffer { + /** + * GenericCharacters is immutable. + */ + private abstract class GenericCharacters { + /** + * @return True if having any character other than whitespace or + * line feed. + */ + abstract boolean hasContent(); + + abstract void flush() throws SAXException; + + /** + * Converts this GenericCharacters to a new character array. + */ + abstract char[] toChars(); + } + + private Queue bufferedCharacters = new ArrayDeque<>(); + + /** + * Append a text node to the buffer. + */ + public void addText(final char chars[], final int start, final int length) { + bufferedCharacters.add(new GenericCharacters() { + char[] text; + + { + text = Arrays.copyOfRange(chars, start, start + length); + } + + boolean hasContent() { + for (int i = 0; i < text.length; i++) { + if (!isWhiteSpace(text[i])) { + return true; + } + } + return false; + } + + void flush() throws SAXException { + outputCharacters(text, 0, text.length); + } + + char[] toChars() { + return text; + } + + boolean isWhiteSpace(char ch) { + return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'; + } + + }); + } + + /** + * Append an entity reference to the buffer. + */ + public void addEntityReference(String entityName) { + bufferedCharacters.add(new GenericCharacters() { + boolean hasContent() { + return true; + } + + void flush() throws SAXException { + outputEntityReference(entityName); + } + + char[] toChars() { + return ("&" + entityName + ";").toCharArray(); + } + }); + } + + /** + * @return True if any GenericCharacters is already buffered. + */ + public boolean isAnyCharactersBuffered() { + return !bufferedCharacters.isEmpty(); + } + + /** + * @return True if any buffered GenericCharacters has content. + */ + public boolean hasContent() { + return bufferedCharacters.stream().anyMatch(GenericCharacters::hasContent); + } + + /** + * Flush all buffered GenericCharacters. + */ + public void flush() throws SAXException { + GenericCharacters element; + while ((element = bufferedCharacters.poll()) != null) + element.flush(); + } + + /** + * Converts all buffered GenericCharacters to a new character array. + */ + public char[] toChars() { + return bufferedCharacters.stream().map(GenericCharacters::toChars) + .collect(StringBuilder::new, StringBuilder::append, StringBuilder::append).toString() + .toCharArray(); + } + + /** + * Clear the buffer. + */ + public void clear() { + bufferedCharacters.clear(); + } + } + // Implement DTDHandler /** * If this method is called, the serializer is used as a