--- old/src/com/sun/org/apache/xml/internal/serializer/ToTextStream.java 2019-03-26 23:44:34.736889282 +0000 +++ new/src/com/sun/org/apache/xml/internal/serializer/ToTextStream.java 2019-03-26 23:44:34.636888649 +0000 @@ -1,6 +1,5 @@ /* - * reserved comment block - * DO NOT REMOVE OR ALTER! + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. */ /* * Copyright 2001-2004 The Apache Software Foundation. @@ -35,6 +34,7 @@ * This class converts SAX or SAX-like calls to a * serialized document for xsl:output method of "text". * @xsl.usage internal + * @LastModified: Sept 2018 */ public final class ToTextStream extends ToStream { @@ -296,23 +296,32 @@ } else if (m_encodingInfo.isInEncoding(c)) { writer.write(c); // one input char processed - } else if (Encodings.isHighUTF16Surrogate(c)) { + } else if (Encodings.isHighUTF16Surrogate(c) || + Encodings.isLowUTF16Surrogate(c)) { final int codePoint = writeUTF16Surrogate(c, ch, i, end); - if (codePoint != 0) { - // I think we can just emit the message, - // not crash and burn. - final String integralValue = Integer.toString(codePoint); - final String msg = Utils.messages.createMessage( - MsgKey.ER_ILLEGAL_CHARACTER, - new Object[] { integralValue, encoding }); - - //Older behavior was to throw the message, - //but newer gentler behavior is to write a message to System.err - //throw new SAXException(msg); - System.err.println(msg); - + if (codePoint >= 0) { + // move the index if the low surrogate is consumed + // as writeUTF16Surrogate has written the pair + if (Encodings.isHighUTF16Surrogate(c)) { + i++; + } + + // printing to the console is not appropriate, but will leave + // it as is for compatibility. + if (codePoint >0) { + // I think we can just emit the message, + // not crash and burn. + final String integralValue = Integer.toString(codePoint); + final String msg = Utils.messages.createMessage( + MsgKey.ER_ILLEGAL_CHARACTER, + new Object[] { integralValue, encoding }); + + //Older behavior was to throw the message, + //but newer gentler behavior is to write a message to System.err + //throw new SAXException(msg); + System.err.println(msg); + } } - i++; // two input chars processed } else { // Don't know what to do with this char, it is // not in the encoding and not a high char in