34 import java.util.Enumeration;
35 import java.util.Iterator;
36 import java.util.List;
37 import java.util.Properties;
38 import java.util.Set;
39 import java.util.StringTokenizer;
40 import javax.xml.transform.ErrorListener;
41 import javax.xml.transform.OutputKeys;
42 import javax.xml.transform.Transformer;
43 import javax.xml.transform.TransformerException;
44 import org.w3c.dom.Node;
45 import org.xml.sax.Attributes;
46 import org.xml.sax.ContentHandler;
47 import org.xml.sax.SAXException;
48
49 /**
50 * This abstract class is a base class for other stream
51 * serializers (xml, html, text ...) that write output to a stream.
52 *
53 * @xsl.usage internal
54 * @LastModified: Feb 2018
55 */
56 abstract public class ToStream extends SerializerBase {
57
58 private static final String COMMENT_BEGIN = "<!--";
59 private static final String COMMENT_END = "-->";
60
61 /** Stack to keep track of disabling output escaping. */
62 protected BoolStack m_disableOutputEscapingStates = new BoolStack();
63
64 /**
65 * The encoding information associated with this serializer.
66 * Although initially there is no encoding,
67 * there is a dummy EncodingInfo object that will say
68 * that every character is in the encoding. This is useful
69 * for a serializer that is in temporary output state and has
70 * no associated encoding. A serializer in final output state
71 * will have an encoding, and will worry about whether
72 * single chars or surrogate pairs of high/low chars form
73 * characters in the output encoding.
74 */
176 * Tells if we're in an internal document type subset.
177 */
178 protected boolean m_inDoctype = false;
179
180 /**
181 * Flag to quickly tell if the encoding is UTF8.
182 */
183 boolean m_isUTF8 = false;
184
185 /**
186 * remembers if we are in between the startCDATA() and endCDATA() callbacks
187 */
188 protected boolean m_cdataStartCalled = false;
189
190 /**
191 * If this flag is true DTD entity references are not left as-is,
192 * which is exiting older behavior.
193 */
194 private boolean m_expandDTDEntities = true;
195
196 /**
197 * Default constructor
198 */
199 public ToStream() { }
200
201 /**
202 * This helper method to writes out "]]>" when closing a CDATA section.
203 *
204 * @throws org.xml.sax.SAXException
205 */
206 protected void closeCDATA() throws org.xml.sax.SAXException {
207 try {
208 m_writer.write(CDATA_DELIMITER_CLOSE);
209 // write out a CDATA section closing "]]>"
210 m_cdataTagOpen = false; // Remember that we have done so.
211 }
212 catch (IOException e) {
213 throw new SAXException(e);
214 }
215 }
936 }
937 return ret;
938 }
939
940 /**
941 * Once a surrogate has been detected, write out the pair of
942 * characters if it is in the encoding, or if there is no
943 * encoding, otherwise write out an entity reference
944 * of the value of the unicode code point of the character
945 * represented by the high/low surrogate pair.
946 * <p>
947 * An exception is thrown if there is no low surrogate in the pair,
948 * because the array ends unexpectely, or if the low char is there
949 * but its value is such that it is not a low surrogate.
950 *
951 * @param c the first (high) part of the surrogate, which
952 * must be confirmed before calling this method.
953 * @param ch Character array.
954 * @param i position Where the surrogate was detected.
955 * @param end The end index of the significant characters.
956 * @return 0 if the pair of characters was written out as-is,
957 * the unicode code point of the character represented by
958 * the surrogate pair if an entity reference with that value
959 * was written out.
960 *
961 * @throws IOException
962 * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
963 */
964 protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
965 throws IOException
966 {
967 int codePoint = 0;
968 if (i + 1 >= end)
969 {
970 throw new IOException(
971 Utils.messages.createMessage(
972 MsgKey.ER_INVALID_UTF16_SURROGATE,
973 new Object[] { Integer.toHexString((int) c)}));
974 }
975
976 final char high = c;
977 final char low = ch[i+1];
978 if (!Encodings.isLowUTF16Surrogate(low)) {
979 throw new IOException(
980 Utils.messages.createMessage(
981 MsgKey.ER_INVALID_UTF16_SURROGATE,
982 new Object[] {
983 Integer.toHexString((int) c)
984 + " "
985 + Integer.toHexString(low)}));
986 }
987
988 final Writer writer = m_writer;
989
990 // If we make it to here we have a valid high, low surrogate pair
991 if (m_encodingInfo.isInEncoding(c,low)) {
992 // If the character formed by the surrogate pair
993 // is in the encoding, so just write it out
994 writer.write(ch,i,2);
995 }
996 else {
997 // Don't know what to do with this char, it is
998 // not in the encoding and not a high char in
999 // a surrogate pair, so write out as an entity ref
1000 final String encoding = getEncoding();
1001 if (encoding != null) {
1002 /* The output encoding is known,
1003 * so somthing is wrong.
1004 */
1005 codePoint = Encodings.toCodePoint(high, low);
1006 // not in the encoding, so write out a character reference
1007 writer.write('&');
1008 writer.write('#');
1009 writer.write(Integer.toString(codePoint));
1010 writer.write(';');
1011 } else {
1012 /* The output encoding is not known,
1013 * so just write it out as-is.
1014 */
1015 writer.write(ch, i, 2);
1016 }
1017 }
1018 // non-zero only if character reference was written out.
1019 return codePoint;
1020 }
1021
1022 /**
1023 * Handle one of the default entities, return false if it
1024 * is not a default entity.
1025 *
1026 * @param ch character to be escaped.
1027 * @param i index into character array.
1028 * @param chars non-null reference to character array.
1029 * @param len length of chars.
1030 * @param fromTextNode true if the characters being processed
1031 * are from a text node, false if they are from an attribute value
1032 * @param escLF true if the linefeed should be escaped.
1033 *
1034 * @return i+1 if the character was written, else i.
1035 *
1036 * @throws java.io.IOException
1037 */
1038 protected int accumDefaultEntity(
1039 Writer writer,
1089 char ch[],
1090 int start,
1091 int length,
1092 boolean isCData,
1093 boolean useSystemLineSeparator)
1094 throws IOException, org.xml.sax.SAXException
1095 {
1096 final Writer writer = m_writer;
1097 int end = start + length;
1098
1099 for (int i = start; i < end; i++)
1100 {
1101 char c = ch[i];
1102
1103 if (CharInfo.S_LINEFEED == c && useSystemLineSeparator)
1104 {
1105 writer.write(m_lineSep, 0, m_lineSepLen);
1106 }
1107 else if (isCData && (!escapingNotNeeded(c)))
1108 {
1109 // if (i != 0)
1110 if (m_cdataTagOpen)
1111 closeCDATA();
1112
1113 // This needs to go into a function...
1114 if (Encodings.isHighUTF16Surrogate(c))
1115 {
1116 writeUTF16Surrogate(c, ch, i, end);
1117 i++ ; // process two input characters
1118 }
1119 else
1120 {
1121 writer.write("&#");
1122
1123 String intStr = Integer.toString((int) c);
1124
1125 writer.write(intStr);
1126 writer.write(';');
1127 }
1128
1129 // if ((i != 0) && (i < (end - 1)))
1130 // if (!m_cdataTagOpen && (i < (end - 1)))
1131 // {
1132 // writer.write(CDATA_DELIMITER_OPEN);
1133 // m_cdataTagOpen = true;
1134 // }
1135 }
1136 else if (
1137 isCData
1138 && ((i < (end - 2))
1139 && (']' == c)
1140 && (']' == ch[i + 1])
1141 && ('>' == ch[i + 2])))
1142 {
1143 writer.write(CDATA_CONTINUE);
1144
1145 i += 2;
1146 }
1147 else
1148 {
1149 if (escapingNotNeeded(c))
1150 {
1151 if (isCData && !m_cdataTagOpen)
1152 {
1153 writer.write(CDATA_DELIMITER_OPEN);
1154 m_cdataTagOpen = true;
1155 }
1156 writer.write(c);
1157 }
1158
1159 // This needs to go into a function...
1160 else if (Encodings.isHighUTF16Surrogate(c))
1161 {
1162 if (m_cdataTagOpen)
1163 closeCDATA();
1164 writeUTF16Surrogate(c, ch, i, end);
1165 i++; // process two input characters
1166 }
1167 else
1168 {
1169 if (m_cdataTagOpen)
1170 closeCDATA();
1171 writer.write("&#");
1172
1173 String intStr = Integer.toString((int) c);
1174
1175 writer.write(intStr);
1176 writer.write(';');
1177 }
1178 }
1179 }
1180
1181 }
1182
1183 /**
1184 * Ends an un-escaping section.
1185 *
1186 * @see #startNonEscaping
1187 *
1188 * @throws org.xml.sax.SAXException
1189 */
1190 public void endNonEscaping() throws org.xml.sax.SAXException
1191 {
1192 m_disableOutputEscapingStates.pop();
1193 }
1194
1195 /**
1196 * Starts an un-escaping section. All characters printed within an un-
1197 * escaping section are printed as is, without escaping special characters
1198 * into entity references. Only XML and HTML serializers need to support
1199 * this method.
1200 * <p> The contents of the un-escaping section will be delivered through the
1229 * @param length The number of characters to read from the array.
1230 * @throws org.xml.sax.SAXException Any SAX exception, possibly
1231 * wrapping another exception.
1232 * @see #ignorableWhitespace
1233 * @see org.xml.sax.Locator
1234 *
1235 * @throws org.xml.sax.SAXException
1236 */
1237 protected void cdata(char ch[], int start, final int length)
1238 throws org.xml.sax.SAXException
1239 {
1240 try
1241 {
1242 final int old_start = start;
1243 if (m_elemContext.m_startTagOpen)
1244 {
1245 closeStartTag();
1246 m_elemContext.m_startTagOpen = false;
1247 }
1248
1249 if (shouldIndent())
1250 indent();
1251
1252 boolean writeCDataBrackets =
1253 (((length >= 1) && escapingNotNeeded(ch[start])));
1254
1255 /* Write out the CDATA opening delimiter only if
1256 * we are supposed to, and if we are not already in
1257 * the middle of a CDATA section
1258 */
1259 if (writeCDataBrackets && !m_cdataTagOpen)
1260 {
1261 m_writer.write(CDATA_DELIMITER_OPEN);
1262 m_cdataTagOpen = true;
1263 }
1264
1265 // writer.write(ch, start, length);
1266 if (isEscapingDisabled())
1267 {
1268 charactersRaw(ch, start, length);
1269 }
1627 }
1628 /**
1629 * Process a dirty character and any preeceding clean characters
1630 * that were not yet processed.
1631 * @param chars array of characters being processed
1632 * @param end one (1) beyond the last character
1633 * in chars to be processed
1634 * @param i the index of the dirty character
1635 * @param ch the character in chars[i]
1636 * @param lastDirty the last dirty character previous to i
1637 * @param fromTextNode true if the characters being processed are
1638 * from a text node, false if they are from an attribute value.
1639 * @return the index of the last character processed
1640 */
1641 private int processDirty(
1642 char[] chars,
1643 int end,
1644 int i,
1645 char ch,
1646 int lastDirty,
1647 boolean fromTextNode) throws IOException
1648 {
1649 int startClean = lastDirty + 1;
1650 // if we have some clean characters accumulated
1651 // process them before the dirty one.
1652 if (i > startClean)
1653 {
1654 int lengthClean = i - startClean;
1655 m_writer.write(chars, startClean, lengthClean);
1656 }
1657
1658 // process the "dirty" character
1659 if (CharInfo.S_LINEFEED == ch && fromTextNode)
1660 {
1661 m_writer.write(m_lineSep, 0, m_lineSepLen);
1662 }
1663 else
1664 {
1665 startClean =
1666 accumDefaultEscape(
1667 m_writer,
1706 * @param chars non-null reference to character array.
1707 * @param len length of chars.
1708 * @param fromTextNode true if the characters being processed are
1709 * from a text node, false if the characters being processed are from
1710 * an attribute value.
1711 * @param escLF true if the linefeed should be escaped.
1712 *
1713 * @return i+1 if a character was written, i+2 if two characters
1714 * were written out, else return i.
1715 *
1716 * @throws org.xml.sax.SAXException
1717 */
1718 protected int accumDefaultEscape(
1719 Writer writer,
1720 char ch,
1721 int i,
1722 char[] chars,
1723 int len,
1724 boolean fromTextNode,
1725 boolean escLF)
1726 throws IOException
1727 {
1728
1729 int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
1730
1731 if (i == pos)
1732 {
1733 if (Encodings.isHighUTF16Surrogate(ch))
1734 {
1735
1736 // Should be the UTF-16 low surrogate of the hig/low pair.
1737 char next;
1738 // Unicode code point formed from the high/low pair.
1739 int codePoint = 0;
1740
1741 if (i + 1 >= len)
1742 {
1743 throw new IOException(
1744 Utils.messages.createMessage(
1745 MsgKey.ER_INVALID_UTF16_SURROGATE,
1746 new Object[] { Integer.toHexString(ch)}));
1747 //"Invalid UTF-16 surrogate detected: "
1748
1749 //+Integer.toHexString(ch)+ " ?");
1750 }
1751 else
1752 {
1753 next = chars[++i];
1754
1755 if (!(Encodings.isLowUTF16Surrogate(next)))
1756 throw new IOException(
1757 Utils.messages.createMessage(
1758 MsgKey
1759 .ER_INVALID_UTF16_SURROGATE,
1760 new Object[] {
1761 Integer.toHexString(ch)
1762 + " "
1763 + Integer.toHexString(next)}));
1764 //"Invalid UTF-16 surrogate detected: "
1765
1766 //+Integer.toHexString(ch)+" "+Integer.toHexString(next));
1767 codePoint = Encodings.toCodePoint(ch,next);
1768 }
1769
1770 writer.write("&#");
1771 writer.write(Integer.toString(codePoint));
1772 writer.write(';');
1773 pos += 2; // count the two characters that went into writing out this entity
1774 }
1775 else
1776 {
1777 /* This if check is added to support control characters in XML 1.1.
1778 * If a character is a Control Character within C0 and C1 range, it is desirable
1779 * to write it out as Numeric Character Reference(NCR) regardless of XML Version
1780 * being used for output document.
1781 */
1782 if (isCharacterInC0orC1Range(ch) ||
1783 (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
1784 {
1785 writer.write("&#");
1786 writer.write(Integer.toString(ch));
1787 writer.write(';');
1788 }
1789 else if ((!escapingNotNeeded(ch) ||
1790 ( (fromTextNode && m_charInfo.isSpecialTextChar(ch))
1791 || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
1792 && m_elemContext.m_currentElemDepth > 0)
1793 {
1794 writer.write("&#");
1795 writer.write(Integer.toString(ch));
1796 writer.write(';');
1797 }
1798 else
1799 {
1800 writer.write(ch);
1801 }
1802 pos++; // count the single character that was processed
1803 }
1804
1805 }
1806 return pos;
1807 }
1808
1809 /**
1810 * Receive notification of the beginning of an element, although this is a
1811 * SAX method additional namespace or attribute information can occur before
1812 * or after this call, that is associated with this element.
1813 *
1814 *
1815 * @param namespaceURI The Namespace URI, or the empty string if the
1816 * element has no Namespace URI or if Namespace
1817 * processing is not being performed.
1818 * @param localName The local name (without prefix), or the
1819 * empty string if Namespace processing is not being
1820 * performed.
1821 * @param name The element type name.
1822 * @param atts The attributes attached to the element, if any.
1823 * @throws org.xml.sax.SAXException Any SAX exception, possibly
1824 * wrapping another exception.
1825 * @see org.xml.sax.ContentHandler#startElement
1826 * @see org.xml.sax.ContentHandler#endElement
1827 * @see org.xml.sax.AttributeList
1828 *
1829 * @throws org.xml.sax.SAXException
2036 writer.write(name);
2037 writer.write("=\"");
2038 writeAttrString(writer, value, encoding);
2039 writer.write('\"');
2040 }
2041 }
2042
2043 /**
2044 * Returns the specified <var>string</var> after substituting <VAR>specials</VAR>,
2045 * and UTF-16 surrogates for chracter references <CODE>&#xnn</CODE>.
2046 *
2047 * @param string String to convert to XML format.
2048 * @param encoding CURRENTLY NOT IMPLEMENTED.
2049 *
2050 * @throws java.io.IOException
2051 */
2052 public void writeAttrString(
2053 Writer writer,
2054 String string,
2055 String encoding)
2056 throws IOException
2057 {
2058 final int len = string.length();
2059 if (len > m_attrBuff.length)
2060 {
2061 m_attrBuff = new char[len*2 + 1];
2062 }
2063 string.getChars(0,len, m_attrBuff, 0);
2064 final char[] stringChars = m_attrBuff;
2065
2066 for (int i = 0; i < len; )
2067 {
2068 char ch = stringChars[i];
2069 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
2070 {
2071 writer.write(ch);
2072 i++;
2073 }
2074 else
2075 { // I guess the parser doesn't normalize cr/lf in attributes. -sb
2076 // if ((CharInfo.S_CARRIAGERETURN == ch)
|
34 import java.util.Enumeration;
35 import java.util.Iterator;
36 import java.util.List;
37 import java.util.Properties;
38 import java.util.Set;
39 import java.util.StringTokenizer;
40 import javax.xml.transform.ErrorListener;
41 import javax.xml.transform.OutputKeys;
42 import javax.xml.transform.Transformer;
43 import javax.xml.transform.TransformerException;
44 import org.w3c.dom.Node;
45 import org.xml.sax.Attributes;
46 import org.xml.sax.ContentHandler;
47 import org.xml.sax.SAXException;
48
49 /**
50 * This abstract class is a base class for other stream
51 * serializers (xml, html, text ...) that write output to a stream.
52 *
53 * @xsl.usage internal
54 * @LastModified: Sept 2018
55 */
56 abstract public class ToStream extends SerializerBase {
57
58 private static final String COMMENT_BEGIN = "<!--";
59 private static final String COMMENT_END = "-->";
60
61 /** Stack to keep track of disabling output escaping. */
62 protected BoolStack m_disableOutputEscapingStates = new BoolStack();
63
64 /**
65 * The encoding information associated with this serializer.
66 * Although initially there is no encoding,
67 * there is a dummy EncodingInfo object that will say
68 * that every character is in the encoding. This is useful
69 * for a serializer that is in temporary output state and has
70 * no associated encoding. A serializer in final output state
71 * will have an encoding, and will worry about whether
72 * single chars or surrogate pairs of high/low chars form
73 * characters in the output encoding.
74 */
176 * Tells if we're in an internal document type subset.
177 */
178 protected boolean m_inDoctype = false;
179
180 /**
181 * Flag to quickly tell if the encoding is UTF8.
182 */
183 boolean m_isUTF8 = false;
184
185 /**
186 * remembers if we are in between the startCDATA() and endCDATA() callbacks
187 */
188 protected boolean m_cdataStartCalled = false;
189
190 /**
191 * If this flag is true DTD entity references are not left as-is,
192 * which is exiting older behavior.
193 */
194 private boolean m_expandDTDEntities = true;
195
196 private char m_highSurrogate = 0;
197
198 /**
199 * Default constructor
200 */
201 public ToStream() { }
202
203 /**
204 * This helper method to writes out "]]>" when closing a CDATA section.
205 *
206 * @throws org.xml.sax.SAXException
207 */
208 protected void closeCDATA() throws org.xml.sax.SAXException {
209 try {
210 m_writer.write(CDATA_DELIMITER_CLOSE);
211 // write out a CDATA section closing "]]>"
212 m_cdataTagOpen = false; // Remember that we have done so.
213 }
214 catch (IOException e) {
215 throw new SAXException(e);
216 }
217 }
938 }
939 return ret;
940 }
941
942 /**
943 * Once a surrogate has been detected, write out the pair of
944 * characters if it is in the encoding, or if there is no
945 * encoding, otherwise write out an entity reference
946 * of the value of the unicode code point of the character
947 * represented by the high/low surrogate pair.
948 * <p>
949 * An exception is thrown if there is no low surrogate in the pair,
950 * because the array ends unexpectely, or if the low char is there
951 * but its value is such that it is not a low surrogate.
952 *
953 * @param c the first (high) part of the surrogate, which
954 * must be confirmed before calling this method.
955 * @param ch Character array.
956 * @param i position Where the surrogate was detected.
957 * @param end The end index of the significant characters.
958 * @return the status of writing a surrogate pair.
959 * -1 -- nothing is written
960 * 0 -- the pair is written as-is
961 * code point -- the pair is written as an entity reference
962 *
963 * @throws IOException
964 * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
965 */
966 protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
967 throws IOException, SAXException
968 {
969 int status = -1;
970 if (i + 1 >= end)
971 {
972 m_highSurrogate = c;
973 return status;
974 }
975
976 char high, low;
977 if (m_highSurrogate == 0) {
978 high = c;
979 low = ch[i+1];
980 status = 0;
981 } else {
982 high = m_highSurrogate;
983 low = c;
984 m_highSurrogate = 0;
985 }
986
987 if (!Encodings.isLowUTF16Surrogate(low)) {
988 throwIOE(high, low);
989 }
990
991 final Writer writer = m_writer;
992
993 // If we make it to here we have a valid high, low surrogate pair
994 if (m_encodingInfo.isInEncoding(high,low)) {
995 // If the character formed by the surrogate pair
996 // is in the encoding, so just write it out
997 writer.write(new char[]{high, low}, 0, 2);
998 }
999 else {
1000 // Don't know what to do with this char, it is
1001 // not in the encoding and not a high char in
1002 // a surrogate pair, so write out as an entity ref
1003 final String encoding = getEncoding();
1004 if (encoding != null) {
1005 status = writeCharRef(writer, high, low);
1006 } else {
1007 /* The output encoding is not known,
1008 * so just write it out as-is.
1009 */
1010 writer.write(new char[]{high, low}, 0, 2);
1011 }
1012 }
1013 // non-zero only if character reference was written out.
1014 return status;
1015 }
1016
1017 /**
1018 * Handle one of the default entities, return false if it
1019 * is not a default entity.
1020 *
1021 * @param ch character to be escaped.
1022 * @param i index into character array.
1023 * @param chars non-null reference to character array.
1024 * @param len length of chars.
1025 * @param fromTextNode true if the characters being processed
1026 * are from a text node, false if they are from an attribute value
1027 * @param escLF true if the linefeed should be escaped.
1028 *
1029 * @return i+1 if the character was written, else i.
1030 *
1031 * @throws java.io.IOException
1032 */
1033 protected int accumDefaultEntity(
1034 Writer writer,
1084 char ch[],
1085 int start,
1086 int length,
1087 boolean isCData,
1088 boolean useSystemLineSeparator)
1089 throws IOException, org.xml.sax.SAXException
1090 {
1091 final Writer writer = m_writer;
1092 int end = start + length;
1093
1094 for (int i = start; i < end; i++)
1095 {
1096 char c = ch[i];
1097
1098 if (CharInfo.S_LINEFEED == c && useSystemLineSeparator)
1099 {
1100 writer.write(m_lineSep, 0, m_lineSepLen);
1101 }
1102 else if (isCData && (!escapingNotNeeded(c)))
1103 {
1104 i = handleEscaping(writer, c, ch, i, end);
1105 }
1106 else if (
1107 isCData
1108 && ((i < (end - 2))
1109 && (']' == c)
1110 && (']' == ch[i + 1])
1111 && ('>' == ch[i + 2])))
1112 {
1113 writer.write(CDATA_CONTINUE);
1114
1115 i += 2;
1116 }
1117 else
1118 {
1119 if (escapingNotNeeded(c))
1120 {
1121 if (isCData && !m_cdataTagOpen)
1122 {
1123 writer.write(CDATA_DELIMITER_OPEN);
1124 m_cdataTagOpen = true;
1125 }
1126 writer.write(c);
1127 }
1128 else {
1129 i = handleEscaping(writer, c, ch, i, end);
1130 }
1131 }
1132 }
1133
1134 }
1135
1136 /**
1137 * Handles escaping, writes either with a surrogate pair or a character
1138 * reference.
1139 *
1140 * @param c the current char
1141 * @param ch the character array
1142 * @param i the current position
1143 * @param end the end index of the array
1144 * @return the next index
1145 *
1146 * @throws IOException
1147 * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
1148 */
1149 private int handleEscaping(Writer writer, char c, char ch[], int i, int end)
1150 throws IOException, SAXException {
1151 if (Encodings.isHighUTF16Surrogate(c) || Encodings.isLowUTF16Surrogate(c))
1152 {
1153 if (writeUTF16Surrogate(c, ch, i, end) >= 0) {
1154 // move the index if the low surrogate is consumed
1155 // as writeUTF16Surrogate has written the pair
1156 if (Encodings.isHighUTF16Surrogate(c)) {
1157 i++ ;
1158 }
1159 }
1160 }
1161 else
1162 {
1163 writeCharRef(writer, c);
1164 }
1165 return i;
1166 }
1167
1168 /**
1169 * Ends an un-escaping section.
1170 *
1171 * @see #startNonEscaping
1172 *
1173 * @throws org.xml.sax.SAXException
1174 */
1175 public void endNonEscaping() throws org.xml.sax.SAXException
1176 {
1177 m_disableOutputEscapingStates.pop();
1178 }
1179
1180 /**
1181 * Starts an un-escaping section. All characters printed within an un-
1182 * escaping section are printed as is, without escaping special characters
1183 * into entity references. Only XML and HTML serializers need to support
1184 * this method.
1185 * <p> The contents of the un-escaping section will be delivered through the
1214 * @param length The number of characters to read from the array.
1215 * @throws org.xml.sax.SAXException Any SAX exception, possibly
1216 * wrapping another exception.
1217 * @see #ignorableWhitespace
1218 * @see org.xml.sax.Locator
1219 *
1220 * @throws org.xml.sax.SAXException
1221 */
1222 protected void cdata(char ch[], int start, final int length)
1223 throws org.xml.sax.SAXException
1224 {
1225 try
1226 {
1227 final int old_start = start;
1228 if (m_elemContext.m_startTagOpen)
1229 {
1230 closeStartTag();
1231 m_elemContext.m_startTagOpen = false;
1232 }
1233
1234 if (!m_cdataTagOpen && shouldIndent())
1235 indent();
1236
1237 boolean writeCDataBrackets =
1238 (((length >= 1) && escapingNotNeeded(ch[start])));
1239
1240 /* Write out the CDATA opening delimiter only if
1241 * we are supposed to, and if we are not already in
1242 * the middle of a CDATA section
1243 */
1244 if (writeCDataBrackets && !m_cdataTagOpen)
1245 {
1246 m_writer.write(CDATA_DELIMITER_OPEN);
1247 m_cdataTagOpen = true;
1248 }
1249
1250 // writer.write(ch, start, length);
1251 if (isEscapingDisabled())
1252 {
1253 charactersRaw(ch, start, length);
1254 }
1612 }
1613 /**
1614 * Process a dirty character and any preeceding clean characters
1615 * that were not yet processed.
1616 * @param chars array of characters being processed
1617 * @param end one (1) beyond the last character
1618 * in chars to be processed
1619 * @param i the index of the dirty character
1620 * @param ch the character in chars[i]
1621 * @param lastDirty the last dirty character previous to i
1622 * @param fromTextNode true if the characters being processed are
1623 * from a text node, false if they are from an attribute value.
1624 * @return the index of the last character processed
1625 */
1626 private int processDirty(
1627 char[] chars,
1628 int end,
1629 int i,
1630 char ch,
1631 int lastDirty,
1632 boolean fromTextNode) throws IOException, SAXException
1633 {
1634 int startClean = lastDirty + 1;
1635 // if we have some clean characters accumulated
1636 // process them before the dirty one.
1637 if (i > startClean)
1638 {
1639 int lengthClean = i - startClean;
1640 m_writer.write(chars, startClean, lengthClean);
1641 }
1642
1643 // process the "dirty" character
1644 if (CharInfo.S_LINEFEED == ch && fromTextNode)
1645 {
1646 m_writer.write(m_lineSep, 0, m_lineSepLen);
1647 }
1648 else
1649 {
1650 startClean =
1651 accumDefaultEscape(
1652 m_writer,
1691 * @param chars non-null reference to character array.
1692 * @param len length of chars.
1693 * @param fromTextNode true if the characters being processed are
1694 * from a text node, false if the characters being processed are from
1695 * an attribute value.
1696 * @param escLF true if the linefeed should be escaped.
1697 *
1698 * @return i+1 if a character was written, i+2 if two characters
1699 * were written out, else return i.
1700 *
1701 * @throws org.xml.sax.SAXException
1702 */
1703 protected int accumDefaultEscape(
1704 Writer writer,
1705 char ch,
1706 int i,
1707 char[] chars,
1708 int len,
1709 boolean fromTextNode,
1710 boolean escLF)
1711 throws IOException, SAXException
1712 {
1713
1714 int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
1715
1716 if (i == pos)
1717 {
1718 if (m_highSurrogate != 0) {
1719 if (!(Encodings.isLowUTF16Surrogate(ch))) {
1720 throwIOE(m_highSurrogate, ch);
1721 }
1722 writeCharRef(writer, m_highSurrogate, ch);
1723 m_highSurrogate = 0;
1724 return ++pos;
1725 }
1726
1727 if (Encodings.isHighUTF16Surrogate(ch))
1728 {
1729 if (i + 1 >= len)
1730 {
1731 // save for the next read
1732 m_highSurrogate = ch;
1733 pos++;
1734 }
1735 else
1736 {
1737 // the next should be the UTF-16 low surrogate of the hig/low pair.
1738 char next = chars[++i];
1739 if (!(Encodings.isLowUTF16Surrogate(next)))
1740 throwIOE(ch, next);
1741
1742 writeCharRef(writer, ch, next);
1743 pos += 2; // count the two characters that went into writing out this entity
1744 }
1745 }
1746 else
1747 {
1748 /* This if check is added to support control characters in XML 1.1.
1749 * If a character is a Control Character within C0 and C1 range, it is desirable
1750 * to write it out as Numeric Character Reference(NCR) regardless of XML Version
1751 * being used for output document.
1752 */
1753 if (isCharacterInC0orC1Range(ch) ||
1754 (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
1755 {
1756 writeCharRef(writer, ch);
1757 }
1758 else if ((!escapingNotNeeded(ch) ||
1759 ( (fromTextNode && m_charInfo.isSpecialTextChar(ch))
1760 || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
1761 && m_elemContext.m_currentElemDepth > 0)
1762 {
1763 writeCharRef(writer, ch);
1764 }
1765 else
1766 {
1767 writer.write(ch);
1768 }
1769 pos++; // count the single character that was processed
1770 }
1771
1772 }
1773 return pos;
1774 }
1775
1776 /**
1777 * Writes out a character reference.
1778 * @param writer the writer
1779 * @param c the character
1780 * @throws IOException
1781 */
1782 private void writeCharRef(Writer writer, char c) throws IOException, SAXException {
1783 if (m_cdataTagOpen)
1784 closeCDATA();
1785 writer.write("&#");
1786 writer.write(Integer.toString(c));
1787 writer.write(';');
1788 }
1789
1790 /**
1791 * Writes out a pair of surrogates as a character reference
1792 * @param writer the writer
1793 * @param high the high surrogate
1794 * @param low the low surrogate
1795 * @throws IOException
1796 */
1797 private int writeCharRef(Writer writer, char high, char low) throws IOException, SAXException {
1798 if (m_cdataTagOpen)
1799 closeCDATA();
1800 // Unicode code point formed from the high/low pair.
1801 int codePoint = Encodings.toCodePoint(high, low);
1802 writer.write("&#");
1803 writer.write(Integer.toString(codePoint));
1804 writer.write(';');
1805 return codePoint;
1806 }
1807
1808 private void throwIOE(char ch, char next) throws IOException {
1809 throw new IOException(Utils.messages.createMessage(
1810 MsgKey.ER_INVALID_UTF16_SURROGATE,
1811 new Object[] {Integer.toHexString(ch) + " "
1812 + Integer.toHexString(next)}));
1813 }
1814
1815 /**
1816 * Receive notification of the beginning of an element, although this is a
1817 * SAX method additional namespace or attribute information can occur before
1818 * or after this call, that is associated with this element.
1819 *
1820 *
1821 * @param namespaceURI The Namespace URI, or the empty string if the
1822 * element has no Namespace URI or if Namespace
1823 * processing is not being performed.
1824 * @param localName The local name (without prefix), or the
1825 * empty string if Namespace processing is not being
1826 * performed.
1827 * @param name The element type name.
1828 * @param atts The attributes attached to the element, if any.
1829 * @throws org.xml.sax.SAXException Any SAX exception, possibly
1830 * wrapping another exception.
1831 * @see org.xml.sax.ContentHandler#startElement
1832 * @see org.xml.sax.ContentHandler#endElement
1833 * @see org.xml.sax.AttributeList
1834 *
1835 * @throws org.xml.sax.SAXException
2042 writer.write(name);
2043 writer.write("=\"");
2044 writeAttrString(writer, value, encoding);
2045 writer.write('\"');
2046 }
2047 }
2048
2049 /**
2050 * Returns the specified <var>string</var> after substituting <VAR>specials</VAR>,
2051 * and UTF-16 surrogates for chracter references <CODE>&#xnn</CODE>.
2052 *
2053 * @param string String to convert to XML format.
2054 * @param encoding CURRENTLY NOT IMPLEMENTED.
2055 *
2056 * @throws java.io.IOException
2057 */
2058 public void writeAttrString(
2059 Writer writer,
2060 String string,
2061 String encoding)
2062 throws IOException, SAXException
2063 {
2064 final int len = string.length();
2065 if (len > m_attrBuff.length)
2066 {
2067 m_attrBuff = new char[len*2 + 1];
2068 }
2069 string.getChars(0,len, m_attrBuff, 0);
2070 final char[] stringChars = m_attrBuff;
2071
2072 for (int i = 0; i < len; )
2073 {
2074 char ch = stringChars[i];
2075 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
2076 {
2077 writer.write(ch);
2078 i++;
2079 }
2080 else
2081 { // I guess the parser doesn't normalize cr/lf in attributes. -sb
2082 // if ((CharInfo.S_CARRIAGERETURN == ch)
|