< prev index next >

src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java

Print this page

        

*** 49,59 **** /** * This abstract class is a base class for other stream * serializers (xml, html, text ...) that write output to a stream. * * @xsl.usage internal ! * @LastModified: Feb 2018 */ abstract public class ToStream extends SerializerBase { private static final String COMMENT_BEGIN = "<!--"; private static final String COMMENT_END = "-->"; --- 49,59 ---- /** * This abstract class is a base class for other stream * serializers (xml, html, text ...) that write output to a stream. * * @xsl.usage internal ! * @LastModified: Sept 2018 */ abstract public class ToStream extends SerializerBase { private static final String COMMENT_BEGIN = "<!--"; private static final String COMMENT_END = "-->";
*** 191,200 **** --- 191,202 ---- * If this flag is true DTD entity references are not left as-is, * which is exiting older behavior. */ private boolean m_expandDTDEntities = true; + private char m_highSurrogate = 0; + /** * Default constructor */ public ToStream() { }
*** 951,1024 **** * @param c the first (high) part of the surrogate, which * must be confirmed before calling this method. * @param ch Character array. * @param i position Where the surrogate was detected. * @param end The end index of the significant characters. ! * @return 0 if the pair of characters was written out as-is, ! * the unicode code point of the character represented by ! * the surrogate pair if an entity reference with that value ! * was written out. * * @throws IOException * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected. */ protected int writeUTF16Surrogate(char c, char ch[], int i, int end) ! throws IOException { ! int codePoint = 0; if (i + 1 >= end) { ! throw new IOException( ! Utils.messages.createMessage( ! MsgKey.ER_INVALID_UTF16_SURROGATE, ! new Object[] { Integer.toHexString((int) c)})); } - final char high = c; - final char low = ch[i+1]; if (!Encodings.isLowUTF16Surrogate(low)) { ! throw new IOException( ! Utils.messages.createMessage( ! MsgKey.ER_INVALID_UTF16_SURROGATE, ! new Object[] { ! Integer.toHexString((int) c) ! + " " ! + Integer.toHexString(low)})); } final Writer writer = m_writer; // If we make it to here we have a valid high, low surrogate pair ! if (m_encodingInfo.isInEncoding(c,low)) { // If the character formed by the surrogate pair // is in the encoding, so just write it out ! writer.write(ch,i,2); } else { // Don't know what to do with this char, it is // not in the encoding and not a high char in // a surrogate pair, so write out as an entity ref final String encoding = getEncoding(); if (encoding != null) { ! /* The output encoding is known, ! * so somthing is wrong. ! */ ! codePoint = Encodings.toCodePoint(high, low); ! // not in the encoding, so write out a character reference ! writer.write('&'); ! writer.write('#'); ! writer.write(Integer.toString(codePoint)); ! writer.write(';'); } else { /* The output encoding is not known, * so just write it out as-is. */ ! writer.write(ch, i, 2); } } // non-zero only if character reference was written out. ! return codePoint; } /** * Handle one of the default entities, return false if it * is not a default entity. --- 953,1019 ---- * @param c the first (high) part of the surrogate, which * must be confirmed before calling this method. * @param ch Character array. * @param i position Where the surrogate was detected. * @param end The end index of the significant characters. ! * @return the status of writing a surrogate pair. ! * -1 -- nothing is written ! * 0 -- the pair is written as-is ! * code point -- the pair is written as an entity reference * * @throws IOException * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected. */ protected int writeUTF16Surrogate(char c, char ch[], int i, int end) ! throws IOException, SAXException { ! int status = -1; if (i + 1 >= end) { ! m_highSurrogate = c; ! return status; ! } ! ! char high, low; ! if (m_highSurrogate == 0) { ! high = c; ! low = ch[i+1]; ! status = 0; ! } else { ! high = m_highSurrogate; ! low = c; ! m_highSurrogate = 0; } if (!Encodings.isLowUTF16Surrogate(low)) { ! throwIOE(high, low); } final Writer writer = m_writer; // If we make it to here we have a valid high, low surrogate pair ! if (m_encodingInfo.isInEncoding(high,low)) { // If the character formed by the surrogate pair // is in the encoding, so just write it out ! writer.write(new char[]{high, low}, 0, 2); } else { // Don't know what to do with this char, it is // not in the encoding and not a high char in // a surrogate pair, so write out as an entity ref final String encoding = getEncoding(); if (encoding != null) { ! status = writeCharRef(writer, high, low); } else { /* The output encoding is not known, * so just write it out as-is. */ ! writer.write(new char[]{high, low}, 0, 2); } } // non-zero only if character reference was written out. ! return status; } /** * Handle one of the default entities, return false if it * is not a default entity.
*** 1104,1139 **** { writer.write(m_lineSep, 0, m_lineSepLen); } else if (isCData && (!escapingNotNeeded(c))) { ! // if (i != 0) ! if (m_cdataTagOpen) ! closeCDATA(); ! ! // This needs to go into a function... ! if (Encodings.isHighUTF16Surrogate(c)) ! { ! writeUTF16Surrogate(c, ch, i, end); ! i++ ; // process two input characters ! } ! else ! { ! writer.write("&#"); ! ! String intStr = Integer.toString((int) c); ! ! writer.write(intStr); ! writer.write(';'); ! } ! ! // if ((i != 0) && (i < (end - 1))) ! // if (!m_cdataTagOpen && (i < (end - 1))) ! // { ! // writer.write(CDATA_DELIMITER_OPEN); ! // m_cdataTagOpen = true; ! // } } else if ( isCData && ((i < (end - 2)) && (']' == c) --- 1099,1109 ---- { writer.write(m_lineSep, 0, m_lineSepLen); } else if (isCData && (!escapingNotNeeded(c))) { ! i = handleEscaping(writer, c, ch, i, end); } else if ( isCData && ((i < (end - 2)) && (']' == c)
*** 1153,1185 **** writer.write(CDATA_DELIMITER_OPEN); m_cdataTagOpen = true; } writer.write(c); } ! ! // This needs to go into a function... ! else if (Encodings.isHighUTF16Surrogate(c)) ! { ! if (m_cdataTagOpen) ! closeCDATA(); ! writeUTF16Surrogate(c, ch, i, end); ! i++; // process two input characters } - else - { - if (m_cdataTagOpen) - closeCDATA(); - writer.write("&#"); ! String intStr = Integer.toString((int) c); ! writer.write(intStr); ! writer.write(';'); } } } ! } /** * Ends an un-escaping section. * --- 1123,1170 ---- writer.write(CDATA_DELIMITER_OPEN); m_cdataTagOpen = true; } writer.write(c); } ! else { ! i = handleEscaping(writer, c, ch, i, end); ! } ! } } ! } ! /** ! * Handles escaping, writes either with a surrogate pair or a character ! * reference. ! * ! * @param c the current char ! * @param ch the character array ! * @param i the current position ! * @param end the end index of the array ! * @return the next index ! * ! * @throws IOException ! * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected. ! */ ! private int handleEscaping(Writer writer, char c, char ch[], int i, int end) ! throws IOException, SAXException { ! if (Encodings.isHighUTF16Surrogate(c) || Encodings.isLowUTF16Surrogate(c)) ! { ! if (writeUTF16Surrogate(c, ch, i, end) >= 0) { ! // move the index if the low surrogate is consumed ! // as writeUTF16Surrogate has written the pair ! if (Encodings.isHighUTF16Surrogate(c)) { ! i++ ; } } } ! else ! { ! writeCharRef(writer, c); ! } ! return i; } /** * Ends an un-escaping section. *
*** 1244,1254 **** { closeStartTag(); m_elemContext.m_startTagOpen = false; } ! if (shouldIndent()) indent(); boolean writeCDataBrackets = (((length >= 1) && escapingNotNeeded(ch[start]))); --- 1229,1239 ---- { closeStartTag(); m_elemContext.m_startTagOpen = false; } ! if (!m_cdataTagOpen && shouldIndent()) indent(); boolean writeCDataBrackets = (((length >= 1) && escapingNotNeeded(ch[start])));
*** 1642,1652 **** char[] chars, int end, int i, char ch, int lastDirty, ! boolean fromTextNode) throws IOException { int startClean = lastDirty + 1; // if we have some clean characters accumulated // process them before the dirty one. if (i > startClean) --- 1627,1637 ---- char[] chars, int end, int i, char ch, int lastDirty, ! boolean fromTextNode) throws IOException, SAXException { int startClean = lastDirty + 1; // if we have some clean characters accumulated // process them before the dirty one. if (i > startClean)
*** 1721,1801 **** int i, char[] chars, int len, boolean fromTextNode, boolean escLF) ! throws IOException { int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF); if (i == pos) { if (Encodings.isHighUTF16Surrogate(ch)) { - - // Should be the UTF-16 low surrogate of the hig/low pair. - char next; - // Unicode code point formed from the high/low pair. - int codePoint = 0; - if (i + 1 >= len) { ! throw new IOException( ! Utils.messages.createMessage( ! MsgKey.ER_INVALID_UTF16_SURROGATE, ! new Object[] { Integer.toHexString(ch)})); ! //"Invalid UTF-16 surrogate detected: " ! ! //+Integer.toHexString(ch)+ " ?"); } else { ! next = chars[++i]; ! if (!(Encodings.isLowUTF16Surrogate(next))) ! throw new IOException( ! Utils.messages.createMessage( ! MsgKey ! .ER_INVALID_UTF16_SURROGATE, ! new Object[] { ! Integer.toHexString(ch) ! + " " ! + Integer.toHexString(next)})); ! //"Invalid UTF-16 surrogate detected: " ! //+Integer.toHexString(ch)+" "+Integer.toHexString(next)); ! codePoint = Encodings.toCodePoint(ch,next); ! } ! ! writer.write("&#"); ! writer.write(Integer.toString(codePoint)); ! writer.write(';'); pos += 2; // count the two characters that went into writing out this entity } else { /* This if check is added to support control characters in XML 1.1. * If a character is a Control Character within C0 and C1 range, it is desirable * to write it out as Numeric Character Reference(NCR) regardless of XML Version * being used for output document. */ if (isCharacterInC0orC1Range(ch) || (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch))) { ! writer.write("&#"); ! writer.write(Integer.toString(ch)); ! writer.write(';'); } else if ((!escapingNotNeeded(ch) || ( (fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))) && m_elemContext.m_currentElemDepth > 0) { ! writer.write("&#"); ! writer.write(Integer.toString(ch)); ! writer.write(';'); } else { writer.write(ch); } --- 1706,1768 ---- int i, char[] chars, int len, boolean fromTextNode, boolean escLF) ! throws IOException, SAXException { int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF); if (i == pos) { + if (m_highSurrogate != 0) { + if (!(Encodings.isLowUTF16Surrogate(ch))) { + throwIOE(m_highSurrogate, ch); + } + writeCharRef(writer, m_highSurrogate, ch); + m_highSurrogate = 0; + return ++pos; + } + if (Encodings.isHighUTF16Surrogate(ch)) { if (i + 1 >= len) { ! // save for the next read ! m_highSurrogate = ch; ! pos++; } else { ! // the next should be the UTF-16 low surrogate of the hig/low pair. ! char next = chars[++i]; if (!(Encodings.isLowUTF16Surrogate(next))) ! throwIOE(ch, next); ! writeCharRef(writer, ch, next); pos += 2; // count the two characters that went into writing out this entity } + } else { /* This if check is added to support control characters in XML 1.1. * If a character is a Control Character within C0 and C1 range, it is desirable * to write it out as Numeric Character Reference(NCR) regardless of XML Version * being used for output document. */ if (isCharacterInC0orC1Range(ch) || (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch))) { ! writeCharRef(writer, ch); } else if ((!escapingNotNeeded(ch) || ( (fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))) && m_elemContext.m_currentElemDepth > 0) { ! writeCharRef(writer, ch); } else { writer.write(ch); }
*** 1805,1814 **** --- 1772,1820 ---- } return pos; } /** + * Writes out a character reference. + * @param writer the writer + * @param c the character + * @throws IOException + */ + private void writeCharRef(Writer writer, char c) throws IOException, SAXException { + if (m_cdataTagOpen) + closeCDATA(); + writer.write("&#"); + writer.write(Integer.toString(c)); + writer.write(';'); + } + + /** + * Writes out a pair of surrogates as a character reference + * @param writer the writer + * @param high the high surrogate + * @param low the low surrogate + * @throws IOException + */ + private int writeCharRef(Writer writer, char high, char low) throws IOException, SAXException { + if (m_cdataTagOpen) + closeCDATA(); + // Unicode code point formed from the high/low pair. + int codePoint = Encodings.toCodePoint(high, low); + writer.write("&#"); + writer.write(Integer.toString(codePoint)); + writer.write(';'); + return codePoint; + } + + private void throwIOE(char ch, char next) throws IOException { + throw new IOException(Utils.messages.createMessage( + MsgKey.ER_INVALID_UTF16_SURROGATE, + new Object[] {Integer.toHexString(ch) + " " + + Integer.toHexString(next)})); + } + + /** * Receive notification of the beginning of an element, although this is a * SAX method additional namespace or attribute information can occur before * or after this call, that is associated with this element. * *
*** 2051,2061 **** */ public void writeAttrString( Writer writer, String string, String encoding) ! throws IOException { final int len = string.length(); if (len > m_attrBuff.length) { m_attrBuff = new char[len*2 + 1]; --- 2057,2067 ---- */ public void writeAttrString( Writer writer, String string, String encoding) ! throws IOException, SAXException { final int len = string.length(); if (len > m_attrBuff.length) { m_attrBuff = new char[len*2 + 1];
< prev index next >