open Cdiff src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java

src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java


*** 49,59 ****
  /**
   * This abstract class is a base class for other stream
   * serializers (xml, html, text ...) that write output to a stream.
   *
   * @xsl.usage internal
!  * @LastModified: Feb 2018
   */
  abstract public class ToStream extends SerializerBase {
  
      private static final String COMMENT_BEGIN = "<!--";
      private static final String COMMENT_END = "-->";
--- 49,59 ----
  /**
   * This abstract class is a base class for other stream
   * serializers (xml, html, text ...) that write output to a stream.
   *
   * @xsl.usage internal
!  * @LastModified: Sept 2018
   */
  abstract public class ToStream extends SerializerBase {
  
      private static final String COMMENT_BEGIN = "<!--";
      private static final String COMMENT_END = "-->";
*** 191,200 ****
--- 191,202 ----
       * If this flag is true DTD entity references are not left as-is,
       * which is exiting older behavior.
       */
      private boolean m_expandDTDEntities = true;
  
+     private char m_highSurrogate = 0;
+ 
      /**
       * Default constructor
       */
      public ToStream() { }
  
*** 951,1024 ****
       * @param c the first (high) part of the surrogate, which
       * must be confirmed before calling this method.
       * @param ch Character array.
       * @param i position Where the surrogate was detected.
       * @param end The end index of the significant characters.
!      * @return 0 if the pair of characters was written out as-is,
!      * the unicode code point of the character represented by
!      * the surrogate pair if an entity reference with that value
!      * was written out.
       *
       * @throws IOException
       * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
       */
      protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
!         throws IOException
      {
!         int codePoint = 0;
          if (i + 1 >= end)
          {
!             throw new IOException(
!                 Utils.messages.createMessage(
!                     MsgKey.ER_INVALID_UTF16_SURROGATE,
!                     new Object[] { Integer.toHexString((int) c)}));
          }
  
-         final char high = c;
-         final char low = ch[i+1];
          if (!Encodings.isLowUTF16Surrogate(low)) {
!             throw new IOException(
!                 Utils.messages.createMessage(
!                     MsgKey.ER_INVALID_UTF16_SURROGATE,
!                     new Object[] {
!                         Integer.toHexString((int) c)
!                             + " "
!                             + Integer.toHexString(low)}));
          }
  
          final Writer writer = m_writer;
  
          // If we make it to here we have a valid high, low surrogate pair
!         if (m_encodingInfo.isInEncoding(c,low)) {
              // If the character formed by the surrogate pair
              // is in the encoding, so just write it out
!             writer.write(ch,i,2);
          }
          else {
              // Don't know what to do with this char, it is
              // not in the encoding and not a high char in
              // a surrogate pair, so write out as an entity ref
              final String encoding = getEncoding();
              if (encoding != null) {
!                 /* The output encoding is known,
!                  * so somthing is wrong.
!                   */
!                 codePoint = Encodings.toCodePoint(high, low);
!                 // not in the encoding, so write out a character reference
!                 writer.write('&');
!                 writer.write('#');
!                 writer.write(Integer.toString(codePoint));
!                 writer.write(';');
              } else {
                  /* The output encoding is not known,
                   * so just write it out as-is.
                   */
!                 writer.write(ch, i, 2);
              }
          }
          // non-zero only if character reference was written out.
!         return codePoint;
      }
  
      /**
       * Handle one of the default entities, return false if it
       * is not a default entity.
--- 953,1019 ----
       * @param c the first (high) part of the surrogate, which
       * must be confirmed before calling this method.
       * @param ch Character array.
       * @param i position Where the surrogate was detected.
       * @param end The end index of the significant characters.
!      * @return the status of writing a surrogate pair.
!      *        -1 -- nothing is written
!      *         0 -- the pair is written as-is
!      *         code point -- the pair is written as an entity reference
       *
       * @throws IOException
       * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
       */
      protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
!         throws IOException, SAXException
      {
!         int status = -1;
          if (i + 1 >= end)
          {
!             m_highSurrogate = c;
!             return status;
!         }
! 
!         char high, low;
!         if (m_highSurrogate == 0) {
!             high = c;
!             low = ch[i+1];
!             status = 0;
!         } else {
!             high = m_highSurrogate;
!             low = c;
!             m_highSurrogate = 0;
          }
  
          if (!Encodings.isLowUTF16Surrogate(low)) {
!             throwIOE(high, low);
          }
  
          final Writer writer = m_writer;
  
          // If we make it to here we have a valid high, low surrogate pair
!         if (m_encodingInfo.isInEncoding(high,low)) {
              // If the character formed by the surrogate pair
              // is in the encoding, so just write it out
!             writer.write(new char[]{high, low}, 0, 2);
          }
          else {
              // Don't know what to do with this char, it is
              // not in the encoding and not a high char in
              // a surrogate pair, so write out as an entity ref
              final String encoding = getEncoding();
              if (encoding != null) {
!                 status = writeCharRef(writer, high, low);
              } else {
                  /* The output encoding is not known,
                   * so just write it out as-is.
                   */
!                 writer.write(new char[]{high, low}, 0, 2);
              }
          }
          // non-zero only if character reference was written out.
!         return status;
      }
  
      /**
       * Handle one of the default entities, return false if it
       * is not a default entity.
*** 1104,1139 ****
              {
                  writer.write(m_lineSep, 0, m_lineSepLen);
              }
              else if (isCData && (!escapingNotNeeded(c)))
              {
!                 //                if (i != 0)
!                 if (m_cdataTagOpen)
!                     closeCDATA();
! 
!                 // This needs to go into a function...
!                 if (Encodings.isHighUTF16Surrogate(c))
!                 {
!                     writeUTF16Surrogate(c, ch, i, end);
!                     i++ ; // process two input characters
!                 }
!                 else
!                 {
!                     writer.write("&#");
! 
!                     String intStr = Integer.toString((int) c);
! 
!                     writer.write(intStr);
!                     writer.write(';');
!                 }
! 
!                 //                if ((i != 0) && (i < (end - 1)))
!                 //                if (!m_cdataTagOpen && (i < (end - 1)))
!                 //                {
!                 //                    writer.write(CDATA_DELIMITER_OPEN);
!                 //                    m_cdataTagOpen = true;
!                 //                }
              }
              else if (
                  isCData
                      && ((i < (end - 2))
                          && (']' == c)
--- 1099,1109 ----
              {
                  writer.write(m_lineSep, 0, m_lineSepLen);
              }
              else if (isCData && (!escapingNotNeeded(c)))
              {
!                 i = handleEscaping(writer, c, ch, i, end);
              }
              else if (
                  isCData
                      && ((i < (end - 2))
                          && (']' == c)
*** 1153,1185 ****
                          writer.write(CDATA_DELIMITER_OPEN);
                          m_cdataTagOpen = true;
                      }
                      writer.write(c);
                  }
! 
!                 // This needs to go into a function...
!                 else if (Encodings.isHighUTF16Surrogate(c))
!                 {
!                     if (m_cdataTagOpen)
!                         closeCDATA();
!                     writeUTF16Surrogate(c, ch, i, end);
!                     i++; // process two input characters
                  }
-                 else
-                 {
-                     if (m_cdataTagOpen)
-                         closeCDATA();
-                     writer.write("&#");
  
!                     String intStr = Integer.toString((int) c);
  
!                     writer.write(intStr);
!                     writer.write(';');
                  }
              }
          }
! 
      }
  
      /**
       * Ends an un-escaping section.
       *
--- 1123,1170 ----
                          writer.write(CDATA_DELIMITER_OPEN);
                          m_cdataTagOpen = true;
                      }
                      writer.write(c);
                  }
!                 else {
!                     i = handleEscaping(writer, c, ch, i, end);
!                 }
!             }
          }
  
!     }
  
!     /**
!      * Handles escaping, writes either with a surrogate pair or a character
!      * reference.
!      *
!      * @param c the current char
!      * @param ch the character array
!      * @param i the current position
!      * @param end the end index of the array
!      * @return the next index
!      *
!      * @throws IOException
!      * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
!      */
!     private int handleEscaping(Writer writer, char c, char ch[], int i, int end)
!             throws IOException, SAXException {
!         if (Encodings.isHighUTF16Surrogate(c) || Encodings.isLowUTF16Surrogate(c))
!         {
!             if (writeUTF16Surrogate(c, ch, i, end) >= 0) {
!                 // move the index if the low surrogate is consumed
!                 // as writeUTF16Surrogate has written the pair
!                 if (Encodings.isHighUTF16Surrogate(c)) {
!                     i++ ;
                  }
              }
          }
!         else
!         {
!             writeCharRef(writer, c);
!         }
!         return i;
      }
  
      /**
       * Ends an un-escaping section.
       *
*** 1244,1254 ****
              {
                  closeStartTag();
                  m_elemContext.m_startTagOpen = false;
              }
  
!             if (shouldIndent())
                  indent();
  
              boolean writeCDataBrackets =
                  (((length >= 1) && escapingNotNeeded(ch[start])));
  
--- 1229,1239 ----
              {
                  closeStartTag();
                  m_elemContext.m_startTagOpen = false;
              }
  
!             if (!m_cdataTagOpen && shouldIndent())
                  indent();
  
              boolean writeCDataBrackets =
                  (((length >= 1) && escapingNotNeeded(ch[start])));
  
*** 1642,1652 ****
          char[] chars,
          int end,
          int i,
          char ch,
          int lastDirty,
!         boolean fromTextNode) throws IOException
      {
          int startClean = lastDirty + 1;
          // if we have some clean characters accumulated
          // process them before the dirty one.
          if (i > startClean)
--- 1627,1637 ----
          char[] chars,
          int end,
          int i,
          char ch,
          int lastDirty,
!         boolean fromTextNode) throws IOException, SAXException
      {
          int startClean = lastDirty + 1;
          // if we have some clean characters accumulated
          // process them before the dirty one.
          if (i > startClean)
*** 1721,1801 ****
          int i,
          char[] chars,
          int len,
          boolean fromTextNode,
          boolean escLF)
!         throws IOException
      {
  
          int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
  
          if (i == pos)
          {
              if (Encodings.isHighUTF16Surrogate(ch))
              {
- 
-                 // Should be the UTF-16 low surrogate of the hig/low pair.
-                 char next;
-                 // Unicode code point formed from the high/low pair.
-                 int codePoint = 0;
- 
                  if (i + 1 >= len)
                  {
!                     throw new IOException(
!                         Utils.messages.createMessage(
!                             MsgKey.ER_INVALID_UTF16_SURROGATE,
!                             new Object[] { Integer.toHexString(ch)}));
!                     //"Invalid UTF-16 surrogate detected: "
! 
!                     //+Integer.toHexString(ch)+ " ?");
                  }
                  else
                  {
!                     next = chars[++i];
! 
                      if (!(Encodings.isLowUTF16Surrogate(next)))
!                         throw new IOException(
!                             Utils.messages.createMessage(
!                                 MsgKey
!                                     .ER_INVALID_UTF16_SURROGATE,
!                                 new Object[] {
!                                     Integer.toHexString(ch)
!                                         + " "
!                                         + Integer.toHexString(next)}));
!                     //"Invalid UTF-16 surrogate detected: "
  
!                     //+Integer.toHexString(ch)+" "+Integer.toHexString(next));
!                     codePoint = Encodings.toCodePoint(ch,next);
!                 }
! 
!                 writer.write("&#");
!                 writer.write(Integer.toString(codePoint));
!                 writer.write(';');
                  pos += 2; // count the two characters that went into writing out this entity
              }
              else
              {
                  /*  This if check is added to support control characters in XML 1.1.
                   *  If a character is a Control Character within C0 and C1 range, it is desirable
                   *  to write it out as Numeric Character Reference(NCR) regardless of XML Version
                   *  being used for output document.
                   */
                  if (isCharacterInC0orC1Range(ch) ||
                          (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
                  {
!                     writer.write("&#");
!                     writer.write(Integer.toString(ch));
!                     writer.write(';');
                  }
                  else if ((!escapingNotNeeded(ch) ||
                      (  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
                       || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
                  && m_elemContext.m_currentElemDepth > 0)
                  {
!                     writer.write("&#");
!                     writer.write(Integer.toString(ch));
!                     writer.write(';');
                  }
                  else
                  {
                      writer.write(ch);
                  }
--- 1706,1768 ----
          int i,
          char[] chars,
          int len,
          boolean fromTextNode,
          boolean escLF)
!         throws IOException, SAXException
      {
  
          int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
  
          if (i == pos)
          {
+             if (m_highSurrogate != 0) {
+                 if (!(Encodings.isLowUTF16Surrogate(ch))) {
+                     throwIOE(m_highSurrogate, ch);
+                 }
+                 writeCharRef(writer, m_highSurrogate, ch);
+                 m_highSurrogate = 0;
+                 return ++pos;
+             }
+ 
              if (Encodings.isHighUTF16Surrogate(ch))
              {
                  if (i + 1 >= len)
                  {
!                     // save for the next read
!                     m_highSurrogate = ch;
!                     pos++;
                  }
                  else
                  {
!                     // the next should be the UTF-16 low surrogate of the hig/low pair.
!                     char next = chars[++i];
                      if (!(Encodings.isLowUTF16Surrogate(next)))
!                         throwIOE(ch, next);
  
!                     writeCharRef(writer, ch, next);
                      pos += 2; // count the two characters that went into writing out this entity
                  }
+             }
              else
              {
                  /*  This if check is added to support control characters in XML 1.1.
                   *  If a character is a Control Character within C0 and C1 range, it is desirable
                   *  to write it out as Numeric Character Reference(NCR) regardless of XML Version
                   *  being used for output document.
                   */
                  if (isCharacterInC0orC1Range(ch) ||
                          (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
                  {
!                     writeCharRef(writer, ch);
                  }
                  else if ((!escapingNotNeeded(ch) ||
                      (  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
                       || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
                       && m_elemContext.m_currentElemDepth > 0)
                  {
!                     writeCharRef(writer, ch);
                  }
                  else
                  {
                      writer.write(ch);
                  }
*** 1805,1814 ****
--- 1772,1820 ----
          }
          return pos;
      }
  
      /**
+      * Writes out a character reference.
+      * @param writer the writer
+      * @param c the character
+      * @throws IOException
+      */
+     private void writeCharRef(Writer writer, char c) throws IOException, SAXException {
+         if (m_cdataTagOpen)
+             closeCDATA();
+         writer.write("&#");
+         writer.write(Integer.toString(c));
+         writer.write(';');
+     }
+ 
+     /**
+      * Writes out a pair of surrogates as a character reference
+      * @param writer the writer
+      * @param high the high surrogate
+      * @param low the low surrogate
+      * @throws IOException
+      */
+     private int writeCharRef(Writer writer, char high, char low) throws IOException, SAXException {
+         if (m_cdataTagOpen)
+             closeCDATA();
+         // Unicode code point formed from the high/low pair.
+         int codePoint = Encodings.toCodePoint(high, low);
+         writer.write("&#");
+         writer.write(Integer.toString(codePoint));
+         writer.write(';');
+         return codePoint;
+     }
+ 
+     private void throwIOE(char ch, char next) throws IOException {
+         throw new IOException(Utils.messages.createMessage(
+                 MsgKey.ER_INVALID_UTF16_SURROGATE,
+                 new Object[] {Integer.toHexString(ch) + " "
+                         + Integer.toHexString(next)}));
+     }
+ 
+     /**
       * Receive notification of the beginning of an element, although this is a
       * SAX method additional namespace or attribute information can occur before
       * or after this call, that is associated with this element.
       *
       *
*** 2051,2061 ****
       */
      public void writeAttrString(
          Writer writer,
          String string,
          String encoding)
!         throws IOException
      {
          final int len = string.length();
          if (len > m_attrBuff.length)
          {
             m_attrBuff = new char[len*2 + 1];
--- 2057,2067 ----
       */
      public void writeAttrString(
          Writer writer,
          String string,
          String encoding)
!         throws IOException, SAXException
      {
          final int len = string.length();
          if (len > m_attrBuff.length)
          {
             m_attrBuff = new char[len*2 + 1];
< prev index next >