1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.serializer;
  22 
  23 import java.io.IOException;
  24 
  25 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
  26 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
  27 import org.xml.sax.Attributes;
  28 import org.xml.sax.SAXException;
  29 
  30 /**
  31  * This class is not a public API.
  32  * It is only public because it is used in other packages.
  33  * This class converts SAX or SAX-like calls to a
  34  * serialized document for xsl:output method of "text".
  35  * @xsl.usage internal
  36  * @LastModified: Sept 2018
  37  */
  38 public final class ToTextStream extends ToStream
  39 {
  40 
  41 
  42   /**
  43    * Default constructor.
  44    */
  45   public ToTextStream()
  46   {
  47     super();
  48   }
  49 
  50 
  51 
  52   /**
  53    * Receive notification of the beginning of a document.
  54    *
  55    * <p>The SAX parser will invoke this method only once, before any
  56    * other methods in this interface or in DTDHandler (except for
  57    * setDocumentLocator).</p>
  58    *
  59    * @throws org.xml.sax.SAXException Any SAX exception, possibly
  60    *            wrapping another exception.
  61    *
  62    * @throws org.xml.sax.SAXException
  63    */
  64   protected void startDocumentInternal() throws org.xml.sax.SAXException
  65   {
  66     super.startDocumentInternal();
  67 
  68     m_needToCallStartDocument = false;
  69 
  70     // No action for the moment.
  71   }
  72 
  73   /**
  74    * Receive notification of the end of a document.
  75    *
  76    * <p>The SAX parser will invoke this method only once, and it will
  77    * be the last method invoked during the parse.  The parser shall
  78    * not invoke this method until it has either abandoned parsing
  79    * (because of an unrecoverable error) or reached the end of
  80    * input.</p>
  81    *
  82    * @throws org.xml.sax.SAXException Any SAX exception, possibly
  83    *            wrapping another exception.
  84    *
  85    * @throws org.xml.sax.SAXException
  86    */
  87   public void endDocument() throws org.xml.sax.SAXException
  88   {
  89     flushPending();
  90     flushWriter();
  91     if (m_tracer != null)
  92         super.fireEndDoc();
  93   }
  94 
  95   /**
  96    * Receive notification of the beginning of an element.
  97    *
  98    * <p>The Parser will invoke this method at the beginning of every
  99    * element in the XML document; there will be a corresponding
 100    * endElement() event for every startElement() event (even when the
 101    * element is empty). All of the element's content will be
 102    * reported, in order, before the corresponding endElement()
 103    * event.</p>
 104    *
 105    * <p>If the element name has a namespace prefix, the prefix will
 106    * still be attached.  Note that the attribute list provided will
 107    * contain only attributes with explicit values (specified or
 108    * defaulted): #IMPLIED attributes will be omitted.</p>
 109    *
 110    *
 111    * @param namespaceURI The Namespace URI, or the empty string if the
 112    *        element has no Namespace URI or if Namespace
 113    *        processing is not being performed.
 114    * @param localName The local name (without prefix), or the
 115    *        empty string if Namespace processing is not being
 116    *        performed.
 117    * @param name The qualified name (with prefix), or the
 118    *        empty string if qualified names are not available.
 119    * @param atts The attributes attached to the element, if any.
 120    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 121    *            wrapping another exception.
 122    * @see #endElement
 123    * @see org.xml.sax.AttributeList
 124    *
 125    * @throws org.xml.sax.SAXException
 126    */
 127   public void startElement(
 128           String namespaceURI, String localName, String name, Attributes atts)
 129             throws org.xml.sax.SAXException
 130   {
 131     // time to fire off startElement event
 132     if (m_tracer != null) {
 133         super.fireStartElem(name);
 134         this.firePseudoAttributes();
 135     }
 136     return;
 137   }
 138 
 139   /**
 140    * Receive notification of the end of an element.
 141    *
 142    * <p>The SAX parser will invoke this method at the end of every
 143    * element in the XML document; there will be a corresponding
 144    * startElement() event for every endElement() event (even when the
 145    * element is empty).</p>
 146    *
 147    * <p>If the element name has a namespace prefix, the prefix will
 148    * still be attached to the name.</p>
 149    *
 150    *
 151    * @param namespaceURI The Namespace URI, or the empty string if the
 152    *        element has no Namespace URI or if Namespace
 153    *        processing is not being performed.
 154    * @param localName The local name (without prefix), or the
 155    *        empty string if Namespace processing is not being
 156    *        performed.
 157    * @param name The qualified name (with prefix), or the
 158    *        empty string if qualified names are not available.
 159    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 160    *            wrapping another exception.
 161    *
 162    * @throws org.xml.sax.SAXException
 163    */
 164   public void endElement(String namespaceURI, String localName, String name)
 165           throws org.xml.sax.SAXException
 166   {
 167         if (m_tracer != null)
 168             super.fireEndElem(name);
 169   }
 170 
 171   /**
 172    * Receive notification of character data.
 173    *
 174    * <p>The Parser will call this method to report each chunk of
 175    * character data.  SAX parsers may return all contiguous character
 176    * data in a single chunk, or they may split it into several
 177    * chunks; however, all of the characters in any single event
 178    * must come from the same external entity, so that the Locator
 179    * provides useful information.</p>
 180    *
 181    * <p>The application must not attempt to read from the array
 182    * outside of the specified range.</p>
 183    *
 184    * <p>Note that some parsers will report whitespace using the
 185    * ignorableWhitespace() method rather than this one (validating
 186    * parsers must do so).</p>
 187    *
 188    * @param ch The characters from the XML document.
 189    * @param start The start position in the array.
 190    * @param length The number of characters to read from the array.
 191    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 192    *            wrapping another exception.
 193    * @see #ignorableWhitespace
 194    * @see org.xml.sax.Locator
 195    */
 196   public void characters(char ch[], int start, int length)
 197           throws org.xml.sax.SAXException
 198   {
 199 
 200     flushPending();
 201 
 202     try
 203     {
 204         if (inTemporaryOutputState()) {
 205             /* leave characters un-processed as we are
 206              * creating temporary output, the output generated by
 207              * this serializer will be input to a final serializer
 208              * later on and it will do the processing in final
 209              * output state (not temporary output state).
 210              *
 211              * A "temporary" ToTextStream serializer is used to
 212              * evaluate attribute value templates (for example),
 213              * and the result of evaluating such a thing
 214              * is fed into a final serializer later on.
 215              */
 216             m_writer.write(ch, start, length);
 217         }
 218         else {
 219             // In final output state we do process the characters!
 220             writeNormalizedChars(ch, start, length, m_lineSepUse);
 221         }
 222 
 223         if (m_tracer != null)
 224             super.fireCharEvent(ch, start, length);
 225     }
 226     catch(IOException ioe)
 227     {
 228       throw new SAXException(ioe);
 229     }
 230   }
 231 
 232   /**
 233    * If available, when the disable-output-escaping attribute is used,
 234    * output raw text without escaping.
 235    *
 236    * @param ch The characters from the XML document.
 237    * @param start The start position in the array.
 238    * @param length The number of characters to read from the array.
 239    *
 240    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 241    *            wrapping another exception.
 242    */
 243   public void charactersRaw(char ch[], int start, int length)
 244           throws org.xml.sax.SAXException
 245   {
 246 
 247     try
 248     {
 249       writeNormalizedChars(ch, start, length, m_lineSepUse);
 250     }
 251     catch(IOException ioe)
 252     {
 253       throw new SAXException(ioe);
 254     }
 255   }
 256 
 257     /**
 258      * Normalize the characters, but don't escape.  Different from
 259      * SerializerToXML#writeNormalizedChars because it does not attempt to do
 260      * XML escaping at all.
 261      *
 262      * @param ch The characters from the XML document.
 263      * @param start The start position in the array.
 264      * @param length The number of characters to read from the array.
 265      * @param useLineSep true if the operating systems
 266      * end-of-line separator should be output rather than a new-line character.
 267      *
 268      * @throws IOException
 269      * @throws org.xml.sax.SAXException
 270      */
 271     void writeNormalizedChars(
 272         final char ch[],
 273             final int start,
 274             final int length,
 275             final boolean useLineSep)
 276             throws IOException, org.xml.sax.SAXException
 277     {
 278         final String encoding = getEncoding();
 279         final java.io.Writer writer = m_writer;
 280         final int end = start + length;
 281 
 282         /* copy a few "constants" before the loop for performance */
 283         final char S_LINEFEED = CharInfo.S_LINEFEED;
 284 
 285         // This for() loop always increments i by one at the end
 286         // of the loop.  Additional increments of i adjust for when
 287         // two input characters (a high/low UTF16 surrogate pair)
 288         // are processed.
 289         for (int i = start; i < end; i++) {
 290             final char c = ch[i];
 291 
 292             if (S_LINEFEED == c && useLineSep) {
 293                 writer.write(m_lineSep, 0, m_lineSepLen);
 294                 // one input char processed
 295             } else if (m_encodingInfo.isInEncoding(c)) {
 296                 writer.write(c);
 297                 // one input char processed
 298             } else if (Encodings.isHighUTF16Surrogate(c) ||
 299                        Encodings.isLowUTF16Surrogate(c)) {
 300                 final int codePoint = writeUTF16Surrogate(c, ch, i, end);
 301                 if (codePoint >= 0) {
 302                     // move the index if the low surrogate is consumed
 303                     // as writeUTF16Surrogate has written the pair
 304                     if (Encodings.isHighUTF16Surrogate(c)) {
 305                         i++;
 306                     }
 307 
 308                     // printing to the console is not appropriate, but will leave
 309                     // it as is for compatibility.
 310                     if (codePoint >0) {
 311                         // I think we can just emit the message,
 312                         // not crash and burn.
 313                         final String integralValue = Integer.toString(codePoint);
 314                         final String msg = Utils.messages.createMessage(
 315                             MsgKey.ER_ILLEGAL_CHARACTER,
 316                             new Object[] { integralValue, encoding });
 317 
 318                         //Older behavior was to throw the message,
 319                         //but newer gentler behavior is to write a message to System.err
 320                         //throw new SAXException(msg);
 321                         System.err.println(msg);
 322                     }
 323                 }
 324             } else {
 325                 // Don't know what to do with this char, it is
 326                 // not in the encoding and not a high char in
 327                 // a surrogate pair, so write out as an entity ref
 328                 if (encoding != null) {
 329                     /* The output encoding is known,
 330                      * so somthing is wrong.
 331                      */
 332 
 333                     // not in the encoding, so write out a character reference
 334                     writer.write('&');
 335                     writer.write('#');
 336                     writer.write(Integer.toString(c));
 337                     writer.write(';');
 338 
 339                     // I think we can just emit the message,
 340                     // not crash and burn.
 341                     final String integralValue = Integer.toString(c);
 342                     final String msg = Utils.messages.createMessage(
 343                         MsgKey.ER_ILLEGAL_CHARACTER,
 344                         new Object[] { integralValue, encoding });
 345 
 346                     //Older behavior was to throw the message,
 347                     //but newer gentler behavior is to write a message to System.err
 348                     //throw new SAXException(msg);
 349                     System.err.println(msg);
 350                 } else {
 351                     /* The output encoding is not known,
 352                      * so just write it out as-is.
 353                      */
 354                     writer.write(c);
 355                 }
 356 
 357                 // one input char was processed
 358             }
 359         }
 360     }
 361 
 362   /**
 363    * Receive notification of cdata.
 364    *
 365    * <p>The Parser will call this method to report each chunk of
 366    * character data.  SAX parsers may return all contiguous character
 367    * data in a single chunk, or they may split it into several
 368    * chunks; however, all of the characters in any single event
 369    * must come from the same external entity, so that the Locator
 370    * provides useful information.</p>
 371    *
 372    * <p>The application must not attempt to read from the array
 373    * outside of the specified range.</p>
 374    *
 375    * <p>Note that some parsers will report whitespace using the
 376    * ignorableWhitespace() method rather than this one (validating
 377    * parsers must do so).</p>
 378    *
 379    * @param ch The characters from the XML document.
 380    * @param start The start position in the array.
 381    * @param length The number of characters to read from the array.
 382    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 383    *            wrapping another exception.
 384    * @see #ignorableWhitespace
 385    * @see org.xml.sax.Locator
 386    */
 387   public void cdata(char ch[], int start, int length)
 388           throws org.xml.sax.SAXException
 389   {
 390     try
 391     {
 392         writeNormalizedChars(ch, start, length, m_lineSepUse);
 393         if (m_tracer != null)
 394             super.fireCDATAEvent(ch, start, length);
 395     }
 396     catch(IOException ioe)
 397     {
 398       throw new SAXException(ioe);
 399     }
 400   }
 401 
 402   /**
 403    * Receive notification of ignorable whitespace in element content.
 404    *
 405    * <p>Validating Parsers must use this method to report each chunk
 406    * of ignorable whitespace (see the W3C XML 1.0 recommendation,
 407    * section 2.10): non-validating parsers may also use this method
 408    * if they are capable of parsing and using content models.</p>
 409    *
 410    * <p>SAX parsers may return all contiguous whitespace in a single
 411    * chunk, or they may split it into several chunks; however, all of
 412    * the characters in any single event must come from the same
 413    * external entity, so that the Locator provides useful
 414    * information.</p>
 415    *
 416    * <p>The application must not attempt to read from the array
 417    * outside of the specified range.</p>
 418    *
 419    * @param ch The characters from the XML document.
 420    * @param start The start position in the array.
 421    * @param length The number of characters to read from the array.
 422    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 423    *            wrapping another exception.
 424    * @see #characters
 425    *
 426    * @throws org.xml.sax.SAXException
 427    */
 428   public void ignorableWhitespace(char ch[], int start, int length)
 429           throws org.xml.sax.SAXException
 430   {
 431 
 432     try
 433     {
 434       writeNormalizedChars(ch, start, length, m_lineSepUse);
 435     }
 436     catch(IOException ioe)
 437     {
 438       throw new SAXException(ioe);
 439     }
 440   }
 441 
 442   /**
 443    * Receive notification of a processing instruction.
 444    *
 445    * <p>The Parser will invoke this method once for each processing
 446    * instruction found: note that processing instructions may occur
 447    * before or after the main document element.</p>
 448    *
 449    * <p>A SAX parser should never report an XML declaration (XML 1.0,
 450    * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
 451    * using this method.</p>
 452    *
 453    * @param target The processing instruction target.
 454    * @param data The processing instruction data, or null if
 455    *        none was supplied.
 456    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 457    *            wrapping another exception.
 458    *
 459    * @throws org.xml.sax.SAXException
 460    */
 461   public void processingInstruction(String target, String data)
 462           throws org.xml.sax.SAXException
 463   {
 464     // flush anything pending first
 465     flushPending();
 466 
 467     if (m_tracer != null)
 468         super.fireEscapingEvent(target, data);
 469   }
 470 
 471   /**
 472    * Called when a Comment is to be constructed.
 473    * Note that Xalan will normally invoke the other version of this method.
 474    * %REVIEW% In fact, is this one ever needed, or was it a mistake?
 475    *
 476    * @param   data  The comment data.
 477    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 478    *            wrapping another exception.
 479    */
 480   public void comment(String data) throws org.xml.sax.SAXException
 481   {
 482       final int length = data.length();
 483       if (length > m_charsBuff.length)
 484       {
 485           m_charsBuff = new char[length*2 + 1];
 486       }
 487       data.getChars(0, length, m_charsBuff, 0);
 488       comment(m_charsBuff, 0, length);
 489   }
 490 
 491   /**
 492    * Report an XML comment anywhere in the document.
 493    *
 494    * This callback will be used for comments inside or outside the
 495    * document element, including comments in the external DTD
 496    * subset (if read).
 497    *
 498    * @param ch An array holding the characters in the comment.
 499    * @param start The starting position in the array.
 500    * @param length The number of characters to use from the array.
 501    * @throws org.xml.sax.SAXException The application may raise an exception.
 502    */
 503   public void comment(char ch[], int start, int length)
 504           throws org.xml.sax.SAXException
 505   {
 506 
 507     flushPending();
 508     if (m_tracer != null)
 509         super.fireCommentEvent(ch, start, length);
 510   }
 511 
 512   /**
 513    * Receive notivication of a entityReference.
 514    *
 515    * @param name non-null reference to the name of the entity.
 516    *
 517    * @throws org.xml.sax.SAXException
 518    */
 519   public void entityReference(String name) throws org.xml.sax.SAXException
 520   {
 521         if (m_tracer != null)
 522             super.fireEntityReference(name);
 523   }
 524 
 525     /**
 526      * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
 527      */
 528     public void addAttribute(
 529         String uri,
 530         String localName,
 531         String rawName,
 532         String type,
 533         String value,
 534         boolean XSLAttribute)
 535     {
 536         // do nothing, just forget all about the attribute
 537     }
 538 
 539     /**
 540      * @see org.xml.sax.ext.LexicalHandler#endCDATA()
 541      */
 542     public void endCDATA() throws SAXException
 543     {
 544         // do nothing
 545     }
 546 
 547     /**
 548      * @see ExtendedContentHandler#endElement(String)
 549      */
 550     public void endElement(String elemName) throws SAXException
 551     {
 552         if (m_tracer != null)
 553             super.fireEndElem(elemName);
 554     }
 555 
 556     /**
 557      * From XSLTC
 558      */
 559     public void startElement(
 560     String elementNamespaceURI,
 561     String elementLocalName,
 562     String elementName)
 563     throws SAXException
 564     {
 565         if (m_needToCallStartDocument)
 566             startDocumentInternal();
 567         // time to fire off startlement event.
 568         if (m_tracer != null) {
 569             super.fireStartElem(elementName);
 570             this.firePseudoAttributes();
 571         }
 572 
 573         return;
 574     }
 575 
 576 
 577     /**
 578      * From XSLTC
 579      */
 580     public void characters(String characters)
 581     throws SAXException
 582     {
 583         final int length = characters.length();
 584         if (length > m_charsBuff.length)
 585         {
 586             m_charsBuff = new char[length*2 + 1];
 587         }
 588         characters.getChars(0, length, m_charsBuff, 0);
 589         characters(m_charsBuff, 0, length);
 590     }
 591 
 592 
 593     /**
 594      * From XSLTC
 595      */
 596     public void addAttribute(String name, String value)
 597     {
 598         // do nothing, forget about the attribute
 599     }
 600 
 601     /**
 602      * Add a unique attribute
 603      */
 604     public void addUniqueAttribute(String qName, String value, int flags)
 605         throws SAXException
 606     {
 607         // do nothing, forget about the attribute
 608     }
 609 
 610     public boolean startPrefixMapping(
 611         String prefix,
 612         String uri,
 613         boolean shouldFlush)
 614         throws SAXException
 615     {
 616         // no namespace support for HTML
 617         return false;
 618     }
 619 
 620 
 621     public void startPrefixMapping(String prefix, String uri)
 622         throws org.xml.sax.SAXException
 623     {
 624         // no namespace support for HTML
 625     }
 626 
 627 
 628     public void namespaceAfterStartElement(
 629         final String prefix,
 630         final String uri)
 631         throws SAXException
 632     {
 633         // no namespace support for HTML
 634     }
 635 
 636     public void flushPending() throws org.xml.sax.SAXException
 637     {
 638             if (m_needToCallStartDocument)
 639             {
 640                 startDocumentInternal();
 641                 m_needToCallStartDocument = false;
 642             }
 643     }
 644 }