< prev index next >
src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java
Print this page
*** 49,59 ****
/**
* This abstract class is a base class for other stream
* serializers (xml, html, text ...) that write output to a stream.
*
* @xsl.usage internal
! * @LastModified: Feb 2018
*/
abstract public class ToStream extends SerializerBase {
private static final String COMMENT_BEGIN = "<!--";
private static final String COMMENT_END = "-->";
--- 49,59 ----
/**
* This abstract class is a base class for other stream
* serializers (xml, html, text ...) that write output to a stream.
*
* @xsl.usage internal
! * @LastModified: Sept 2018
*/
abstract public class ToStream extends SerializerBase {
private static final String COMMENT_BEGIN = "<!--";
private static final String COMMENT_END = "-->";
*** 191,200 ****
--- 191,202 ----
* If this flag is true DTD entity references are not left as-is,
* which is exiting older behavior.
*/
private boolean m_expandDTDEntities = true;
+ private char m_highSurrogate = 0;
+
/**
* Default constructor
*/
public ToStream() { }
*** 951,1024 ****
* @param c the first (high) part of the surrogate, which
* must be confirmed before calling this method.
* @param ch Character array.
* @param i position Where the surrogate was detected.
* @param end The end index of the significant characters.
! * @return 0 if the pair of characters was written out as-is,
! * the unicode code point of the character represented by
! * the surrogate pair if an entity reference with that value
! * was written out.
*
* @throws IOException
* @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
*/
protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
! throws IOException
{
! int codePoint = 0;
if (i + 1 >= end)
{
! throw new IOException(
! Utils.messages.createMessage(
! MsgKey.ER_INVALID_UTF16_SURROGATE,
! new Object[] { Integer.toHexString((int) c)}));
}
- final char high = c;
- final char low = ch[i+1];
if (!Encodings.isLowUTF16Surrogate(low)) {
! throw new IOException(
! Utils.messages.createMessage(
! MsgKey.ER_INVALID_UTF16_SURROGATE,
! new Object[] {
! Integer.toHexString((int) c)
! + " "
! + Integer.toHexString(low)}));
}
final Writer writer = m_writer;
// If we make it to here we have a valid high, low surrogate pair
! if (m_encodingInfo.isInEncoding(c,low)) {
// If the character formed by the surrogate pair
// is in the encoding, so just write it out
! writer.write(ch,i,2);
}
else {
// Don't know what to do with this char, it is
// not in the encoding and not a high char in
// a surrogate pair, so write out as an entity ref
final String encoding = getEncoding();
if (encoding != null) {
! /* The output encoding is known,
! * so somthing is wrong.
! */
! codePoint = Encodings.toCodePoint(high, low);
! // not in the encoding, so write out a character reference
! writer.write('&');
! writer.write('#');
! writer.write(Integer.toString(codePoint));
! writer.write(';');
} else {
/* The output encoding is not known,
* so just write it out as-is.
*/
! writer.write(ch, i, 2);
}
}
// non-zero only if character reference was written out.
! return codePoint;
}
/**
* Handle one of the default entities, return false if it
* is not a default entity.
--- 953,1019 ----
* @param c the first (high) part of the surrogate, which
* must be confirmed before calling this method.
* @param ch Character array.
* @param i position Where the surrogate was detected.
* @param end The end index of the significant characters.
! * @return the status of writing a surrogate pair.
! * -1 -- nothing is written
! * 0 -- the pair is written as-is
! * code point -- the pair is written as an entity reference
*
* @throws IOException
* @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
*/
protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
! throws IOException, SAXException
{
! int status = -1;
if (i + 1 >= end)
{
! m_highSurrogate = c;
! return status;
! }
!
! char high, low;
! if (m_highSurrogate == 0) {
! high = c;
! low = ch[i+1];
! status = 0;
! } else {
! high = m_highSurrogate;
! low = c;
! m_highSurrogate = 0;
}
if (!Encodings.isLowUTF16Surrogate(low)) {
! throwIOE(high, low);
}
final Writer writer = m_writer;
// If we make it to here we have a valid high, low surrogate pair
! if (m_encodingInfo.isInEncoding(high,low)) {
// If the character formed by the surrogate pair
// is in the encoding, so just write it out
! writer.write(new char[]{high, low}, 0, 2);
}
else {
// Don't know what to do with this char, it is
// not in the encoding and not a high char in
// a surrogate pair, so write out as an entity ref
final String encoding = getEncoding();
if (encoding != null) {
! status = writeCharRef(writer, high, low);
} else {
/* The output encoding is not known,
* so just write it out as-is.
*/
! writer.write(new char[]{high, low}, 0, 2);
}
}
// non-zero only if character reference was written out.
! return status;
}
/**
* Handle one of the default entities, return false if it
* is not a default entity.
*** 1104,1139 ****
{
writer.write(m_lineSep, 0, m_lineSepLen);
}
else if (isCData && (!escapingNotNeeded(c)))
{
! // if (i != 0)
! if (m_cdataTagOpen)
! closeCDATA();
!
! // This needs to go into a function...
! if (Encodings.isHighUTF16Surrogate(c))
! {
! writeUTF16Surrogate(c, ch, i, end);
! i++ ; // process two input characters
! }
! else
! {
! writer.write("&#");
!
! String intStr = Integer.toString((int) c);
!
! writer.write(intStr);
! writer.write(';');
! }
!
! // if ((i != 0) && (i < (end - 1)))
! // if (!m_cdataTagOpen && (i < (end - 1)))
! // {
! // writer.write(CDATA_DELIMITER_OPEN);
! // m_cdataTagOpen = true;
! // }
}
else if (
isCData
&& ((i < (end - 2))
&& (']' == c)
--- 1099,1109 ----
{
writer.write(m_lineSep, 0, m_lineSepLen);
}
else if (isCData && (!escapingNotNeeded(c)))
{
! i = handleEscaping(writer, c, ch, i, end);
}
else if (
isCData
&& ((i < (end - 2))
&& (']' == c)
*** 1153,1185 ****
writer.write(CDATA_DELIMITER_OPEN);
m_cdataTagOpen = true;
}
writer.write(c);
}
!
! // This needs to go into a function...
! else if (Encodings.isHighUTF16Surrogate(c))
! {
! if (m_cdataTagOpen)
! closeCDATA();
! writeUTF16Surrogate(c, ch, i, end);
! i++; // process two input characters
}
- else
- {
- if (m_cdataTagOpen)
- closeCDATA();
- writer.write("&#");
! String intStr = Integer.toString((int) c);
! writer.write(intStr);
! writer.write(';');
}
}
}
!
}
/**
* Ends an un-escaping section.
*
--- 1123,1170 ----
writer.write(CDATA_DELIMITER_OPEN);
m_cdataTagOpen = true;
}
writer.write(c);
}
! else {
! i = handleEscaping(writer, c, ch, i, end);
! }
! }
}
! }
! /**
! * Handles escaping, writes either with a surrogate pair or a character
! * reference.
! *
! * @param c the current char
! * @param ch the character array
! * @param i the current position
! * @param end the end index of the array
! * @return the next index
! *
! * @throws IOException
! * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
! */
! private int handleEscaping(Writer writer, char c, char ch[], int i, int end)
! throws IOException, SAXException {
! if (Encodings.isHighUTF16Surrogate(c) || Encodings.isLowUTF16Surrogate(c))
! {
! if (writeUTF16Surrogate(c, ch, i, end) >= 0) {
! // move the index if the low surrogate is consumed
! // as writeUTF16Surrogate has written the pair
! if (Encodings.isHighUTF16Surrogate(c)) {
! i++ ;
}
}
}
! else
! {
! writeCharRef(writer, c);
! }
! return i;
}
/**
* Ends an un-escaping section.
*
*** 1244,1254 ****
{
closeStartTag();
m_elemContext.m_startTagOpen = false;
}
! if (shouldIndent())
indent();
boolean writeCDataBrackets =
(((length >= 1) && escapingNotNeeded(ch[start])));
--- 1229,1239 ----
{
closeStartTag();
m_elemContext.m_startTagOpen = false;
}
! if (!m_cdataTagOpen && shouldIndent())
indent();
boolean writeCDataBrackets =
(((length >= 1) && escapingNotNeeded(ch[start])));
*** 1642,1652 ****
char[] chars,
int end,
int i,
char ch,
int lastDirty,
! boolean fromTextNode) throws IOException
{
int startClean = lastDirty + 1;
// if we have some clean characters accumulated
// process them before the dirty one.
if (i > startClean)
--- 1627,1637 ----
char[] chars,
int end,
int i,
char ch,
int lastDirty,
! boolean fromTextNode) throws IOException, SAXException
{
int startClean = lastDirty + 1;
// if we have some clean characters accumulated
// process them before the dirty one.
if (i > startClean)
*** 1721,1801 ****
int i,
char[] chars,
int len,
boolean fromTextNode,
boolean escLF)
! throws IOException
{
int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
if (i == pos)
{
if (Encodings.isHighUTF16Surrogate(ch))
{
-
- // Should be the UTF-16 low surrogate of the hig/low pair.
- char next;
- // Unicode code point formed from the high/low pair.
- int codePoint = 0;
-
if (i + 1 >= len)
{
! throw new IOException(
! Utils.messages.createMessage(
! MsgKey.ER_INVALID_UTF16_SURROGATE,
! new Object[] { Integer.toHexString(ch)}));
! //"Invalid UTF-16 surrogate detected: "
!
! //+Integer.toHexString(ch)+ " ?");
}
else
{
! next = chars[++i];
!
if (!(Encodings.isLowUTF16Surrogate(next)))
! throw new IOException(
! Utils.messages.createMessage(
! MsgKey
! .ER_INVALID_UTF16_SURROGATE,
! new Object[] {
! Integer.toHexString(ch)
! + " "
! + Integer.toHexString(next)}));
! //"Invalid UTF-16 surrogate detected: "
! //+Integer.toHexString(ch)+" "+Integer.toHexString(next));
! codePoint = Encodings.toCodePoint(ch,next);
! }
!
! writer.write("&#");
! writer.write(Integer.toString(codePoint));
! writer.write(';');
pos += 2; // count the two characters that went into writing out this entity
}
else
{
/* This if check is added to support control characters in XML 1.1.
* If a character is a Control Character within C0 and C1 range, it is desirable
* to write it out as Numeric Character Reference(NCR) regardless of XML Version
* being used for output document.
*/
if (isCharacterInC0orC1Range(ch) ||
(XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
{
! writer.write("&#");
! writer.write(Integer.toString(ch));
! writer.write(';');
}
else if ((!escapingNotNeeded(ch) ||
( (fromTextNode && m_charInfo.isSpecialTextChar(ch))
|| (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
&& m_elemContext.m_currentElemDepth > 0)
{
! writer.write("&#");
! writer.write(Integer.toString(ch));
! writer.write(';');
}
else
{
writer.write(ch);
}
--- 1706,1768 ----
int i,
char[] chars,
int len,
boolean fromTextNode,
boolean escLF)
! throws IOException, SAXException
{
int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
if (i == pos)
{
+ if (m_highSurrogate != 0) {
+ if (!(Encodings.isLowUTF16Surrogate(ch))) {
+ throwIOE(m_highSurrogate, ch);
+ }
+ writeCharRef(writer, m_highSurrogate, ch);
+ m_highSurrogate = 0;
+ return ++pos;
+ }
+
if (Encodings.isHighUTF16Surrogate(ch))
{
if (i + 1 >= len)
{
! // save for the next read
! m_highSurrogate = ch;
! pos++;
}
else
{
! // the next should be the UTF-16 low surrogate of the hig/low pair.
! char next = chars[++i];
if (!(Encodings.isLowUTF16Surrogate(next)))
! throwIOE(ch, next);
! writeCharRef(writer, ch, next);
pos += 2; // count the two characters that went into writing out this entity
}
+ }
else
{
/* This if check is added to support control characters in XML 1.1.
* If a character is a Control Character within C0 and C1 range, it is desirable
* to write it out as Numeric Character Reference(NCR) regardless of XML Version
* being used for output document.
*/
if (isCharacterInC0orC1Range(ch) ||
(XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
{
! writeCharRef(writer, ch);
}
else if ((!escapingNotNeeded(ch) ||
( (fromTextNode && m_charInfo.isSpecialTextChar(ch))
|| (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
&& m_elemContext.m_currentElemDepth > 0)
{
! writeCharRef(writer, ch);
}
else
{
writer.write(ch);
}
*** 1805,1814 ****
--- 1772,1820 ----
}
return pos;
}
/**
+ * Writes out a character reference.
+ * @param writer the writer
+ * @param c the character
+ * @throws IOException
+ */
+ private void writeCharRef(Writer writer, char c) throws IOException, SAXException {
+ if (m_cdataTagOpen)
+ closeCDATA();
+ writer.write("&#");
+ writer.write(Integer.toString(c));
+ writer.write(';');
+ }
+
+ /**
+ * Writes out a pair of surrogates as a character reference
+ * @param writer the writer
+ * @param high the high surrogate
+ * @param low the low surrogate
+ * @throws IOException
+ */
+ private int writeCharRef(Writer writer, char high, char low) throws IOException, SAXException {
+ if (m_cdataTagOpen)
+ closeCDATA();
+ // Unicode code point formed from the high/low pair.
+ int codePoint = Encodings.toCodePoint(high, low);
+ writer.write("&#");
+ writer.write(Integer.toString(codePoint));
+ writer.write(';');
+ return codePoint;
+ }
+
+ private void throwIOE(char ch, char next) throws IOException {
+ throw new IOException(Utils.messages.createMessage(
+ MsgKey.ER_INVALID_UTF16_SURROGATE,
+ new Object[] {Integer.toHexString(ch) + " "
+ + Integer.toHexString(next)}));
+ }
+
+ /**
* Receive notification of the beginning of an element, although this is a
* SAX method additional namespace or attribute information can occur before
* or after this call, that is associated with this element.
*
*
*** 2051,2061 ****
*/
public void writeAttrString(
Writer writer,
String string,
String encoding)
! throws IOException
{
final int len = string.length();
if (len > m_attrBuff.length)
{
m_attrBuff = new char[len*2 + 1];
--- 2057,2067 ----
*/
public void writeAttrString(
Writer writer,
String string,
String encoding)
! throws IOException, SAXException
{
final int len = string.length();
if (len > m_attrBuff.length)
{
m_attrBuff = new char[len*2 + 1];
< prev index next >