< prev index next >

src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java

Print this page




  34 import java.util.Enumeration;
  35 import java.util.Iterator;
  36 import java.util.List;
  37 import java.util.Properties;
  38 import java.util.Set;
  39 import java.util.StringTokenizer;
  40 import javax.xml.transform.ErrorListener;
  41 import javax.xml.transform.OutputKeys;
  42 import javax.xml.transform.Transformer;
  43 import javax.xml.transform.TransformerException;
  44 import org.w3c.dom.Node;
  45 import org.xml.sax.Attributes;
  46 import org.xml.sax.ContentHandler;
  47 import org.xml.sax.SAXException;
  48 
  49 /**
  50  * This abstract class is a base class for other stream
  51  * serializers (xml, html, text ...) that write output to a stream.
  52  *
  53  * @xsl.usage internal
  54  * @LastModified: Feb 2018
  55  */
  56 abstract public class ToStream extends SerializerBase {
  57 
  58     private static final String COMMENT_BEGIN = "<!--";
  59     private static final String COMMENT_END = "-->";
  60 
  61     /** Stack to keep track of disabling output escaping. */
  62     protected BoolStack m_disableOutputEscapingStates = new BoolStack();
  63 
  64     /**
  65      * The encoding information associated with this serializer.
  66      * Although initially there is no encoding,
  67      * there is a dummy EncodingInfo object that will say
  68      * that every character is in the encoding. This is useful
  69      * for a serializer that is in temporary output state and has
  70      * no associated encoding. A serializer in final output state
  71      * will have an encoding, and will worry about whether
  72      * single chars or surrogate pairs of high/low chars form
  73      * characters in the output encoding.
  74      */


 176      * Tells if we're in an internal document type subset.
 177      */
 178     protected boolean m_inDoctype = false;
 179 
 180     /**
 181      * Flag to quickly tell if the encoding is UTF8.
 182      */
 183     boolean m_isUTF8 = false;
 184 
 185     /**
 186      * remembers if we are in between the startCDATA() and endCDATA() callbacks
 187      */
 188     protected boolean m_cdataStartCalled = false;
 189 
 190     /**
 191      * If this flag is true DTD entity references are not left as-is,
 192      * which is exiting older behavior.
 193      */
 194     private boolean m_expandDTDEntities = true;
 195 


 196     /**
 197      * Default constructor
 198      */
 199     public ToStream() { }
 200 
 201     /**
 202      * This helper method to writes out "]]>" when closing a CDATA section.
 203      *
 204      * @throws org.xml.sax.SAXException
 205      */
 206     protected void closeCDATA() throws org.xml.sax.SAXException {
 207         try {
 208             m_writer.write(CDATA_DELIMITER_CLOSE);
 209             // write out a CDATA section closing "]]>"
 210             m_cdataTagOpen = false; // Remember that we have done so.
 211         }
 212         catch (IOException e) {
 213             throw new SAXException(e);
 214         }
 215     }


 936         }
 937         return ret;
 938     }
 939 
 940     /**
 941      * Once a surrogate has been detected, write out the pair of
 942      * characters if it is in the encoding, or if there is no
 943      * encoding, otherwise write out an entity reference
 944      * of the value of the unicode code point of the character
 945      * represented by the high/low surrogate pair.
 946      * <p>
 947      * An exception is thrown if there is no low surrogate in the pair,
 948      * because the array ends unexpectely, or if the low char is there
 949      * but its value is such that it is not a low surrogate.
 950      *
 951      * @param c the first (high) part of the surrogate, which
 952      * must be confirmed before calling this method.
 953      * @param ch Character array.
 954      * @param i position Where the surrogate was detected.
 955      * @param end The end index of the significant characters.
 956      * @return 0 if the pair of characters was written out as-is,
 957      * the unicode code point of the character represented by
 958      * the surrogate pair if an entity reference with that value
 959      * was written out.
 960      *
 961      * @throws IOException
 962      * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
 963      */
 964     protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
 965         throws IOException
 966     {
 967         int codePoint = 0;
 968         if (i + 1 >= end)
 969         {
 970             throw new IOException(
 971                 Utils.messages.createMessage(
 972                     MsgKey.ER_INVALID_UTF16_SURROGATE,
 973                     new Object[] { Integer.toHexString((int) c)}));









 974         }
 975 
 976         final char high = c;
 977         final char low = ch[i+1];
 978         if (!Encodings.isLowUTF16Surrogate(low)) {
 979             throw new IOException(
 980                 Utils.messages.createMessage(
 981                     MsgKey.ER_INVALID_UTF16_SURROGATE,
 982                     new Object[] {
 983                         Integer.toHexString((int) c)
 984                             + " "
 985                             + Integer.toHexString(low)}));
 986         }
 987 
 988         final Writer writer = m_writer;
 989 
 990         // If we make it to here we have a valid high, low surrogate pair
 991         if (m_encodingInfo.isInEncoding(c,low)) {
 992             // If the character formed by the surrogate pair
 993             // is in the encoding, so just write it out
 994             writer.write(ch,i,2);
 995         }
 996         else {
 997             // Don't know what to do with this char, it is
 998             // not in the encoding and not a high char in
 999             // a surrogate pair, so write out as an entity ref
1000             final String encoding = getEncoding();
1001             if (encoding != null) {
1002                 /* The output encoding is known,
1003                  * so somthing is wrong.
1004                   */
1005                 codePoint = Encodings.toCodePoint(high, low);
1006                 // not in the encoding, so write out a character reference
1007                 writer.write('&');
1008                 writer.write('#');
1009                 writer.write(Integer.toString(codePoint));
1010                 writer.write(';');
1011             } else {
1012                 /* The output encoding is not known,
1013                  * so just write it out as-is.
1014                  */
1015                 writer.write(ch, i, 2);
1016             }
1017         }
1018         // non-zero only if character reference was written out.
1019         return codePoint;
1020     }
1021 
1022     /**
1023      * Handle one of the default entities, return false if it
1024      * is not a default entity.
1025      *
1026      * @param ch character to be escaped.
1027      * @param i index into character array.
1028      * @param chars non-null reference to character array.
1029      * @param len length of chars.
1030      * @param fromTextNode true if the characters being processed
1031      * are from a text node, false if they are from an attribute value
1032      * @param escLF true if the linefeed should be escaped.
1033      *
1034      * @return i+1 if the character was written, else i.
1035      *
1036      * @throws java.io.IOException
1037      */
1038     protected int accumDefaultEntity(
1039         Writer writer,


1089         char ch[],
1090         int start,
1091         int length,
1092         boolean isCData,
1093         boolean useSystemLineSeparator)
1094         throws IOException, org.xml.sax.SAXException
1095     {
1096         final Writer writer = m_writer;
1097         int end = start + length;
1098 
1099         for (int i = start; i < end; i++)
1100         {
1101             char c = ch[i];
1102 
1103             if (CharInfo.S_LINEFEED == c && useSystemLineSeparator)
1104             {
1105                 writer.write(m_lineSep, 0, m_lineSepLen);
1106             }
1107             else if (isCData && (!escapingNotNeeded(c)))
1108             {
1109                 //                if (i != 0)
1110                 if (m_cdataTagOpen)
1111                     closeCDATA();
1112 
1113                 // This needs to go into a function...
1114                 if (Encodings.isHighUTF16Surrogate(c))
1115                 {
1116                     writeUTF16Surrogate(c, ch, i, end);
1117                     i++ ; // process two input characters
1118                 }
1119                 else
1120                 {
1121                     writer.write("&#");
1122 
1123                     String intStr = Integer.toString((int) c);
1124 
1125                     writer.write(intStr);
1126                     writer.write(';');
1127                 }
1128 
1129                 //                if ((i != 0) && (i < (end - 1)))
1130                 //                if (!m_cdataTagOpen && (i < (end - 1)))
1131                 //                {
1132                 //                    writer.write(CDATA_DELIMITER_OPEN);
1133                 //                    m_cdataTagOpen = true;
1134                 //                }
1135             }
1136             else if (
1137                 isCData
1138                     && ((i < (end - 2))
1139                         && (']' == c)
1140                         && (']' == ch[i + 1])
1141                         && ('>' == ch[i + 2])))
1142             {
1143                 writer.write(CDATA_CONTINUE);
1144 
1145                 i += 2;
1146             }
1147             else
1148             {
1149                 if (escapingNotNeeded(c))
1150                 {
1151                     if (isCData && !m_cdataTagOpen)
1152                     {
1153                         writer.write(CDATA_DELIMITER_OPEN);
1154                         m_cdataTagOpen = true;
1155                     }
1156                     writer.write(c);
1157                 }
1158 
1159                 // This needs to go into a function...
1160                 else if (Encodings.isHighUTF16Surrogate(c))
1161                 {
1162                     if (m_cdataTagOpen)
1163                         closeCDATA();
1164                     writeUTF16Surrogate(c, ch, i, end);
1165                     i++; // process two input characters
1166                 }
1167                 else
1168                 {
1169                     if (m_cdataTagOpen)
1170                         closeCDATA();
1171                     writer.write("&#");
1172 
1173                     String intStr = Integer.toString((int) c);
1174 
1175                     writer.write(intStr);
1176                     writer.write(';');




















1177                 }
1178             }
1179         }
1180 




1181     }
1182 
1183     /**
1184      * Ends an un-escaping section.
1185      *
1186      * @see #startNonEscaping
1187      *
1188      * @throws org.xml.sax.SAXException
1189      */
1190     public void endNonEscaping() throws org.xml.sax.SAXException
1191     {
1192         m_disableOutputEscapingStates.pop();
1193     }
1194 
1195     /**
1196      * Starts an un-escaping section. All characters printed within an un-
1197      * escaping section are printed as is, without escaping special characters
1198      * into entity references. Only XML and HTML serializers need to support
1199      * this method.
1200      * <p> The contents of the un-escaping section will be delivered through the


1229      * @param length The number of characters to read from the array.
1230      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1231      *            wrapping another exception.
1232      * @see #ignorableWhitespace
1233      * @see org.xml.sax.Locator
1234      *
1235      * @throws org.xml.sax.SAXException
1236      */
1237     protected void cdata(char ch[], int start, final int length)
1238         throws org.xml.sax.SAXException
1239     {
1240         try
1241         {
1242             final int old_start = start;
1243             if (m_elemContext.m_startTagOpen)
1244             {
1245                 closeStartTag();
1246                 m_elemContext.m_startTagOpen = false;
1247             }
1248 
1249             if (shouldIndent())
1250                 indent();
1251 
1252             boolean writeCDataBrackets =
1253                 (((length >= 1) && escapingNotNeeded(ch[start])));
1254 
1255             /* Write out the CDATA opening delimiter only if
1256              * we are supposed to, and if we are not already in
1257              * the middle of a CDATA section
1258              */
1259             if (writeCDataBrackets && !m_cdataTagOpen)
1260             {
1261                 m_writer.write(CDATA_DELIMITER_OPEN);
1262                 m_cdataTagOpen = true;
1263             }
1264 
1265             // writer.write(ch, start, length);
1266             if (isEscapingDisabled())
1267             {
1268                 charactersRaw(ch, start, length);
1269             }


1627     }
1628     /**
1629      * Process a dirty character and any preeceding clean characters
1630      * that were not yet processed.
1631      * @param chars array of characters being processed
1632      * @param end one (1) beyond the last character
1633      * in chars to be processed
1634      * @param i the index of the dirty character
1635      * @param ch the character in chars[i]
1636      * @param lastDirty the last dirty character previous to i
1637      * @param fromTextNode true if the characters being processed are
1638      * from a text node, false if they are from an attribute value.
1639      * @return the index of the last character processed
1640      */
1641     private int processDirty(
1642         char[] chars,
1643         int end,
1644         int i,
1645         char ch,
1646         int lastDirty,
1647         boolean fromTextNode) throws IOException
1648     {
1649         int startClean = lastDirty + 1;
1650         // if we have some clean characters accumulated
1651         // process them before the dirty one.
1652         if (i > startClean)
1653         {
1654             int lengthClean = i - startClean;
1655             m_writer.write(chars, startClean, lengthClean);
1656         }
1657 
1658         // process the "dirty" character
1659         if (CharInfo.S_LINEFEED == ch && fromTextNode)
1660         {
1661             m_writer.write(m_lineSep, 0, m_lineSepLen);
1662         }
1663         else
1664         {
1665             startClean =
1666                 accumDefaultEscape(
1667                     m_writer,


1706      * @param chars non-null reference to character array.
1707      * @param len length of chars.
1708      * @param fromTextNode true if the characters being processed are
1709      * from a text node, false if the characters being processed are from
1710      * an attribute value.
1711      * @param escLF true if the linefeed should be escaped.
1712      *
1713      * @return i+1 if a character was written, i+2 if two characters
1714      * were written out, else return i.
1715      *
1716      * @throws org.xml.sax.SAXException
1717      */
1718     protected int accumDefaultEscape(
1719         Writer writer,
1720         char ch,
1721         int i,
1722         char[] chars,
1723         int len,
1724         boolean fromTextNode,
1725         boolean escLF)
1726         throws IOException
1727     {
1728 
1729         int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
1730 
1731         if (i == pos)
1732         {









1733             if (Encodings.isHighUTF16Surrogate(ch))
1734             {
1735 
1736                 // Should be the UTF-16 low surrogate of the hig/low pair.
1737                 char next;
1738                 // Unicode code point formed from the high/low pair.
1739                 int codePoint = 0;
1740 
1741                 if (i + 1 >= len)
1742                 {
1743                     throw new IOException(
1744                         Utils.messages.createMessage(
1745                             MsgKey.ER_INVALID_UTF16_SURROGATE,
1746                             new Object[] { Integer.toHexString(ch)}));
1747                     //"Invalid UTF-16 surrogate detected: "
1748 
1749                     //+Integer.toHexString(ch)+ " ?");
1750                 }
1751                 else
1752                 {
1753                     next = chars[++i];
1754 
1755                     if (!(Encodings.isLowUTF16Surrogate(next)))
1756                         throw new IOException(
1757                             Utils.messages.createMessage(
1758                                 MsgKey
1759                                     .ER_INVALID_UTF16_SURROGATE,
1760                                 new Object[] {
1761                                     Integer.toHexString(ch)
1762                                         + " "
1763                                         + Integer.toHexString(next)}));
1764                     //"Invalid UTF-16 surrogate detected: "
1765 
1766                     //+Integer.toHexString(ch)+" "+Integer.toHexString(next));
1767                     codePoint = Encodings.toCodePoint(ch,next);
1768                 }
1769 
1770                 writer.write("&#");
1771                 writer.write(Integer.toString(codePoint));
1772                 writer.write(';');
1773                 pos += 2; // count the two characters that went into writing out this entity
1774             }

1775             else
1776             {
1777                 /*  This if check is added to support control characters in XML 1.1.
1778                  *  If a character is a Control Character within C0 and C1 range, it is desirable
1779                  *  to write it out as Numeric Character Reference(NCR) regardless of XML Version
1780                  *  being used for output document.
1781                  */
1782                 if (isCharacterInC0orC1Range(ch) ||
1783                         (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
1784                 {
1785                     writer.write("&#");
1786                     writer.write(Integer.toString(ch));
1787                     writer.write(';');
1788                 }
1789                 else if ((!escapingNotNeeded(ch) ||
1790                     (  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
1791                      || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
1792                 && m_elemContext.m_currentElemDepth > 0)
1793                 {
1794                     writer.write("&#");
1795                     writer.write(Integer.toString(ch));
1796                     writer.write(';');
1797                 }
1798                 else
1799                 {
1800                     writer.write(ch);
1801                 }
1802                 pos++;  // count the single character that was processed
1803             }
1804 
1805         }
1806         return pos;
1807     }
1808 
1809     /**







































1810      * Receive notification of the beginning of an element, although this is a
1811      * SAX method additional namespace or attribute information can occur before
1812      * or after this call, that is associated with this element.
1813      *
1814      *
1815      * @param namespaceURI The Namespace URI, or the empty string if the
1816      *        element has no Namespace URI or if Namespace
1817      *        processing is not being performed.
1818      * @param localName The local name (without prefix), or the
1819      *        empty string if Namespace processing is not being
1820      *        performed.
1821      * @param name The element type name.
1822      * @param atts The attributes attached to the element, if any.
1823      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1824      *            wrapping another exception.
1825      * @see org.xml.sax.ContentHandler#startElement
1826      * @see org.xml.sax.ContentHandler#endElement
1827      * @see org.xml.sax.AttributeList
1828      *
1829      * @throws org.xml.sax.SAXException


2036                 writer.write(name);
2037                 writer.write("=\"");
2038                 writeAttrString(writer, value, encoding);
2039                 writer.write('\"');
2040             }
2041     }
2042 
2043     /**
2044      * Returns the specified <var>string</var> after substituting <VAR>specials</VAR>,
2045      * and UTF-16 surrogates for chracter references <CODE>&amp;#xnn</CODE>.
2046      *
2047      * @param   string      String to convert to XML format.
2048      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
2049      *
2050      * @throws java.io.IOException
2051      */
2052     public void writeAttrString(
2053         Writer writer,
2054         String string,
2055         String encoding)
2056         throws IOException
2057     {
2058         final int len = string.length();
2059         if (len > m_attrBuff.length)
2060         {
2061            m_attrBuff = new char[len*2 + 1];
2062         }
2063         string.getChars(0,len, m_attrBuff, 0);
2064         final char[] stringChars = m_attrBuff;
2065 
2066         for (int i = 0; i < len; )
2067         {
2068             char ch = stringChars[i];
2069             if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
2070             {
2071                 writer.write(ch);
2072                 i++;
2073             }
2074             else
2075             { // I guess the parser doesn't normalize cr/lf in attributes. -sb
2076 //                if ((CharInfo.S_CARRIAGERETURN == ch)




  34 import java.util.Enumeration;
  35 import java.util.Iterator;
  36 import java.util.List;
  37 import java.util.Properties;
  38 import java.util.Set;
  39 import java.util.StringTokenizer;
  40 import javax.xml.transform.ErrorListener;
  41 import javax.xml.transform.OutputKeys;
  42 import javax.xml.transform.Transformer;
  43 import javax.xml.transform.TransformerException;
  44 import org.w3c.dom.Node;
  45 import org.xml.sax.Attributes;
  46 import org.xml.sax.ContentHandler;
  47 import org.xml.sax.SAXException;
  48 
  49 /**
  50  * This abstract class is a base class for other stream
  51  * serializers (xml, html, text ...) that write output to a stream.
  52  *
  53  * @xsl.usage internal
  54  * @LastModified: Sept 2018
  55  */
  56 abstract public class ToStream extends SerializerBase {
  57 
  58     private static final String COMMENT_BEGIN = "<!--";
  59     private static final String COMMENT_END = "-->";
  60 
  61     /** Stack to keep track of disabling output escaping. */
  62     protected BoolStack m_disableOutputEscapingStates = new BoolStack();
  63 
  64     /**
  65      * The encoding information associated with this serializer.
  66      * Although initially there is no encoding,
  67      * there is a dummy EncodingInfo object that will say
  68      * that every character is in the encoding. This is useful
  69      * for a serializer that is in temporary output state and has
  70      * no associated encoding. A serializer in final output state
  71      * will have an encoding, and will worry about whether
  72      * single chars or surrogate pairs of high/low chars form
  73      * characters in the output encoding.
  74      */


 176      * Tells if we're in an internal document type subset.
 177      */
 178     protected boolean m_inDoctype = false;
 179 
 180     /**
 181      * Flag to quickly tell if the encoding is UTF8.
 182      */
 183     boolean m_isUTF8 = false;
 184 
 185     /**
 186      * remembers if we are in between the startCDATA() and endCDATA() callbacks
 187      */
 188     protected boolean m_cdataStartCalled = false;
 189 
 190     /**
 191      * If this flag is true DTD entity references are not left as-is,
 192      * which is exiting older behavior.
 193      */
 194     private boolean m_expandDTDEntities = true;
 195 
 196     private char m_highSurrogate = 0;
 197 
 198     /**
 199      * Default constructor
 200      */
 201     public ToStream() { }
 202 
 203     /**
 204      * This helper method to writes out "]]>" when closing a CDATA section.
 205      *
 206      * @throws org.xml.sax.SAXException
 207      */
 208     protected void closeCDATA() throws org.xml.sax.SAXException {
 209         try {
 210             m_writer.write(CDATA_DELIMITER_CLOSE);
 211             // write out a CDATA section closing "]]>"
 212             m_cdataTagOpen = false; // Remember that we have done so.
 213         }
 214         catch (IOException e) {
 215             throw new SAXException(e);
 216         }
 217     }


 938         }
 939         return ret;
 940     }
 941 
 942     /**
 943      * Once a surrogate has been detected, write out the pair of
 944      * characters if it is in the encoding, or if there is no
 945      * encoding, otherwise write out an entity reference
 946      * of the value of the unicode code point of the character
 947      * represented by the high/low surrogate pair.
 948      * <p>
 949      * An exception is thrown if there is no low surrogate in the pair,
 950      * because the array ends unexpectely, or if the low char is there
 951      * but its value is such that it is not a low surrogate.
 952      *
 953      * @param c the first (high) part of the surrogate, which
 954      * must be confirmed before calling this method.
 955      * @param ch Character array.
 956      * @param i position Where the surrogate was detected.
 957      * @param end The end index of the significant characters.
 958      * @return the status of writing a surrogate pair.
 959      *        -1 -- nothing is written
 960      *         0 -- the pair is written as-is
 961      *         code point -- the pair is written as an entity reference
 962      *
 963      * @throws IOException
 964      * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
 965      */
 966     protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
 967         throws IOException, SAXException
 968     {
 969         int status = -1;
 970         if (i + 1 >= end)
 971         {
 972             m_highSurrogate = c;
 973             return status;
 974         }
 975 
 976         char high, low;
 977         if (m_highSurrogate == 0) {
 978             high = c;
 979             low = ch[i+1];
 980             status = 0;
 981         } else {
 982             high = m_highSurrogate;
 983             low = c;
 984             m_highSurrogate = 0;
 985         }
 986 


 987         if (!Encodings.isLowUTF16Surrogate(low)) {
 988             throwIOE(high, low);






 989         }
 990 
 991         final Writer writer = m_writer;
 992 
 993         // If we make it to here we have a valid high, low surrogate pair
 994         if (m_encodingInfo.isInEncoding(high,low)) {
 995             // If the character formed by the surrogate pair
 996             // is in the encoding, so just write it out
 997             writer.write(new char[]{high, low}, 0, 2);
 998         }
 999         else {
1000             // Don't know what to do with this char, it is
1001             // not in the encoding and not a high char in
1002             // a surrogate pair, so write out as an entity ref
1003             final String encoding = getEncoding();
1004             if (encoding != null) {
1005                 status = writeCharRef(writer, high, low);








1006             } else {
1007                 /* The output encoding is not known,
1008                  * so just write it out as-is.
1009                  */
1010                 writer.write(new char[]{high, low}, 0, 2);
1011             }
1012         }
1013         // non-zero only if character reference was written out.
1014         return status;
1015     }
1016 
1017     /**
1018      * Handle one of the default entities, return false if it
1019      * is not a default entity.
1020      *
1021      * @param ch character to be escaped.
1022      * @param i index into character array.
1023      * @param chars non-null reference to character array.
1024      * @param len length of chars.
1025      * @param fromTextNode true if the characters being processed
1026      * are from a text node, false if they are from an attribute value
1027      * @param escLF true if the linefeed should be escaped.
1028      *
1029      * @return i+1 if the character was written, else i.
1030      *
1031      * @throws java.io.IOException
1032      */
1033     protected int accumDefaultEntity(
1034         Writer writer,


1084         char ch[],
1085         int start,
1086         int length,
1087         boolean isCData,
1088         boolean useSystemLineSeparator)
1089         throws IOException, org.xml.sax.SAXException
1090     {
1091         final Writer writer = m_writer;
1092         int end = start + length;
1093 
1094         for (int i = start; i < end; i++)
1095         {
1096             char c = ch[i];
1097 
1098             if (CharInfo.S_LINEFEED == c && useSystemLineSeparator)
1099             {
1100                 writer.write(m_lineSep, 0, m_lineSepLen);
1101             }
1102             else if (isCData && (!escapingNotNeeded(c)))
1103             {
1104                 i = handleEscaping(writer, c, ch, i, end);

























1105             }
1106             else if (
1107                 isCData
1108                     && ((i < (end - 2))
1109                         && (']' == c)
1110                         && (']' == ch[i + 1])
1111                         && ('>' == ch[i + 2])))
1112             {
1113                 writer.write(CDATA_CONTINUE);
1114 
1115                 i += 2;
1116             }
1117             else
1118             {
1119                 if (escapingNotNeeded(c))
1120                 {
1121                     if (isCData && !m_cdataTagOpen)
1122                     {
1123                         writer.write(CDATA_DELIMITER_OPEN);
1124                         m_cdataTagOpen = true;
1125                     }
1126                     writer.write(c);
1127                 }
1128                 else {
1129                     i = handleEscaping(writer, c, ch, i, end);
1130                 }
1131             }




1132         }





1133 
1134     }
1135 
1136     /**
1137      * Handles escaping, writes either with a surrogate pair or a character
1138      * reference.
1139      *
1140      * @param c the current char
1141      * @param ch the character array
1142      * @param i the current position
1143      * @param end the end index of the array
1144      * @return the next index
1145      *
1146      * @throws IOException
1147      * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
1148      */
1149     private int handleEscaping(Writer writer, char c, char ch[], int i, int end)
1150             throws IOException, SAXException {
1151         if (Encodings.isHighUTF16Surrogate(c) || Encodings.isLowUTF16Surrogate(c))
1152         {
1153             if (writeUTF16Surrogate(c, ch, i, end) >= 0) {
1154                 // move the index if the low surrogate is consumed
1155                 // as writeUTF16Surrogate has written the pair
1156                 if (Encodings.isHighUTF16Surrogate(c)) {
1157                     i++ ;
1158                 }
1159             }
1160         }
1161         else
1162         {
1163             writeCharRef(writer, c);
1164         }
1165         return i;
1166     }
1167 
1168     /**
1169      * Ends an un-escaping section.
1170      *
1171      * @see #startNonEscaping
1172      *
1173      * @throws org.xml.sax.SAXException
1174      */
1175     public void endNonEscaping() throws org.xml.sax.SAXException
1176     {
1177         m_disableOutputEscapingStates.pop();
1178     }
1179 
1180     /**
1181      * Starts an un-escaping section. All characters printed within an un-
1182      * escaping section are printed as is, without escaping special characters
1183      * into entity references. Only XML and HTML serializers need to support
1184      * this method.
1185      * <p> The contents of the un-escaping section will be delivered through the


1214      * @param length The number of characters to read from the array.
1215      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1216      *            wrapping another exception.
1217      * @see #ignorableWhitespace
1218      * @see org.xml.sax.Locator
1219      *
1220      * @throws org.xml.sax.SAXException
1221      */
1222     protected void cdata(char ch[], int start, final int length)
1223         throws org.xml.sax.SAXException
1224     {
1225         try
1226         {
1227             final int old_start = start;
1228             if (m_elemContext.m_startTagOpen)
1229             {
1230                 closeStartTag();
1231                 m_elemContext.m_startTagOpen = false;
1232             }
1233 
1234             if (!m_cdataTagOpen && shouldIndent())
1235                 indent();
1236 
1237             boolean writeCDataBrackets =
1238                 (((length >= 1) && escapingNotNeeded(ch[start])));
1239 
1240             /* Write out the CDATA opening delimiter only if
1241              * we are supposed to, and if we are not already in
1242              * the middle of a CDATA section
1243              */
1244             if (writeCDataBrackets && !m_cdataTagOpen)
1245             {
1246                 m_writer.write(CDATA_DELIMITER_OPEN);
1247                 m_cdataTagOpen = true;
1248             }
1249 
1250             // writer.write(ch, start, length);
1251             if (isEscapingDisabled())
1252             {
1253                 charactersRaw(ch, start, length);
1254             }


1612     }
1613     /**
1614      * Process a dirty character and any preeceding clean characters
1615      * that were not yet processed.
1616      * @param chars array of characters being processed
1617      * @param end one (1) beyond the last character
1618      * in chars to be processed
1619      * @param i the index of the dirty character
1620      * @param ch the character in chars[i]
1621      * @param lastDirty the last dirty character previous to i
1622      * @param fromTextNode true if the characters being processed are
1623      * from a text node, false if they are from an attribute value.
1624      * @return the index of the last character processed
1625      */
1626     private int processDirty(
1627         char[] chars,
1628         int end,
1629         int i,
1630         char ch,
1631         int lastDirty,
1632         boolean fromTextNode) throws IOException, SAXException
1633     {
1634         int startClean = lastDirty + 1;
1635         // if we have some clean characters accumulated
1636         // process them before the dirty one.
1637         if (i > startClean)
1638         {
1639             int lengthClean = i - startClean;
1640             m_writer.write(chars, startClean, lengthClean);
1641         }
1642 
1643         // process the "dirty" character
1644         if (CharInfo.S_LINEFEED == ch && fromTextNode)
1645         {
1646             m_writer.write(m_lineSep, 0, m_lineSepLen);
1647         }
1648         else
1649         {
1650             startClean =
1651                 accumDefaultEscape(
1652                     m_writer,


1691      * @param chars non-null reference to character array.
1692      * @param len length of chars.
1693      * @param fromTextNode true if the characters being processed are
1694      * from a text node, false if the characters being processed are from
1695      * an attribute value.
1696      * @param escLF true if the linefeed should be escaped.
1697      *
1698      * @return i+1 if a character was written, i+2 if two characters
1699      * were written out, else return i.
1700      *
1701      * @throws org.xml.sax.SAXException
1702      */
1703     protected int accumDefaultEscape(
1704         Writer writer,
1705         char ch,
1706         int i,
1707         char[] chars,
1708         int len,
1709         boolean fromTextNode,
1710         boolean escLF)
1711         throws IOException, SAXException
1712     {
1713 
1714         int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
1715 
1716         if (i == pos)
1717         {
1718             if (m_highSurrogate != 0) {
1719                 if (!(Encodings.isLowUTF16Surrogate(ch))) {
1720                     throwIOE(m_highSurrogate, ch);
1721                 }
1722                 writeCharRef(writer, m_highSurrogate, ch);
1723                 m_highSurrogate = 0;
1724                 return ++pos;
1725             }
1726 
1727             if (Encodings.isHighUTF16Surrogate(ch))
1728             {






1729                 if (i + 1 >= len)
1730                 {
1731                     // save for the next read
1732                     m_highSurrogate = ch;
1733                     pos++;




1734                 }
1735                 else
1736                 {
1737                     // the next should be the UTF-16 low surrogate of the hig/low pair.
1738                     char next = chars[++i];
1739                     if (!(Encodings.isLowUTF16Surrogate(next)))
1740                         throwIOE(ch, next);








1741 
1742                     writeCharRef(writer, ch, next);






1743                     pos += 2; // count the two characters that went into writing out this entity
1744                 }
1745             }
1746             else
1747             {
1748                 /*  This if check is added to support control characters in XML 1.1.
1749                  *  If a character is a Control Character within C0 and C1 range, it is desirable
1750                  *  to write it out as Numeric Character Reference(NCR) regardless of XML Version
1751                  *  being used for output document.
1752                  */
1753                 if (isCharacterInC0orC1Range(ch) ||
1754                         (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
1755                 {
1756                     writeCharRef(writer, ch);


1757                 }
1758                 else if ((!escapingNotNeeded(ch) ||
1759                     (  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
1760                      || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
1761                      && m_elemContext.m_currentElemDepth > 0)
1762                 {
1763                     writeCharRef(writer, ch);


1764                 }
1765                 else
1766                 {
1767                     writer.write(ch);
1768                 }
1769                 pos++;  // count the single character that was processed
1770             }
1771 
1772         }
1773         return pos;
1774     }
1775 
1776     /**
1777      * Writes out a character reference.
1778      * @param writer the writer
1779      * @param c the character
1780      * @throws IOException
1781      */
1782     private void writeCharRef(Writer writer, char c) throws IOException, SAXException {
1783         if (m_cdataTagOpen)
1784             closeCDATA();
1785         writer.write("&#");
1786         writer.write(Integer.toString(c));
1787         writer.write(';');
1788     }
1789 
1790     /**
1791      * Writes out a pair of surrogates as a character reference
1792      * @param writer the writer
1793      * @param high the high surrogate
1794      * @param low the low surrogate
1795      * @throws IOException
1796      */
1797     private int writeCharRef(Writer writer, char high, char low) throws IOException, SAXException {
1798         if (m_cdataTagOpen)
1799             closeCDATA();
1800         // Unicode code point formed from the high/low pair.
1801         int codePoint = Encodings.toCodePoint(high, low);
1802         writer.write("&#");
1803         writer.write(Integer.toString(codePoint));
1804         writer.write(';');
1805         return codePoint;
1806     }
1807 
1808     private void throwIOE(char ch, char next) throws IOException {
1809         throw new IOException(Utils.messages.createMessage(
1810                 MsgKey.ER_INVALID_UTF16_SURROGATE,
1811                 new Object[] {Integer.toHexString(ch) + " "
1812                         + Integer.toHexString(next)}));
1813     }
1814 
1815     /**
1816      * Receive notification of the beginning of an element, although this is a
1817      * SAX method additional namespace or attribute information can occur before
1818      * or after this call, that is associated with this element.
1819      *
1820      *
1821      * @param namespaceURI The Namespace URI, or the empty string if the
1822      *        element has no Namespace URI or if Namespace
1823      *        processing is not being performed.
1824      * @param localName The local name (without prefix), or the
1825      *        empty string if Namespace processing is not being
1826      *        performed.
1827      * @param name The element type name.
1828      * @param atts The attributes attached to the element, if any.
1829      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1830      *            wrapping another exception.
1831      * @see org.xml.sax.ContentHandler#startElement
1832      * @see org.xml.sax.ContentHandler#endElement
1833      * @see org.xml.sax.AttributeList
1834      *
1835      * @throws org.xml.sax.SAXException


2042                 writer.write(name);
2043                 writer.write("=\"");
2044                 writeAttrString(writer, value, encoding);
2045                 writer.write('\"');
2046             }
2047     }
2048 
2049     /**
2050      * Returns the specified <var>string</var> after substituting <VAR>specials</VAR>,
2051      * and UTF-16 surrogates for chracter references <CODE>&amp;#xnn</CODE>.
2052      *
2053      * @param   string      String to convert to XML format.
2054      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
2055      *
2056      * @throws java.io.IOException
2057      */
2058     public void writeAttrString(
2059         Writer writer,
2060         String string,
2061         String encoding)
2062         throws IOException, SAXException
2063     {
2064         final int len = string.length();
2065         if (len > m_attrBuff.length)
2066         {
2067            m_attrBuff = new char[len*2 + 1];
2068         }
2069         string.getChars(0,len, m_attrBuff, 0);
2070         final char[] stringChars = m_attrBuff;
2071 
2072         for (int i = 0; i < len; )
2073         {
2074             char ch = stringChars[i];
2075             if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
2076             {
2077                 writer.write(ch);
2078                 i++;
2079             }
2080             else
2081             { // I guess the parser doesn't normalize cr/lf in attributes. -sb
2082 //                if ((CharInfo.S_CARRIAGERETURN == ch)


< prev index next >