1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Copyright 2001-2004 The Apache Software Foundation.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19 /*
  20  * $Id: ToTextStream.java,v 1.2.4.1 2005/09/21 10:35:34 pvedula Exp $
  21  */
  22 package com.sun.org.apache.xml.internal.serializer;
  23 
  24 import java.io.IOException;
  25 
  26 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
  27 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
  28 import org.xml.sax.Attributes;
  29 import org.xml.sax.SAXException;
  30 
  31 /**
  32  * This class is not a public API.
  33  * It is only public because it is used in other packages.
  34  * This class converts SAX or SAX-like calls to a
  35  * serialized document for xsl:output method of "text".
  36  * @xsl.usage internal
  37  * @LastModified: Sept 2018
  38  */
  39 public final class ToTextStream extends ToStream
  40 {
  41 
  42 
  43   /**
  44    * Default constructor.
  45    */
  46   public ToTextStream()
  47   {
  48     super();
  49   }
  50 
  51 
  52 
  53   /**
  54    * Receive notification of the beginning of a document.
  55    *
  56    * <p>The SAX parser will invoke this method only once, before any
  57    * other methods in this interface or in DTDHandler (except for
  58    * setDocumentLocator).</p>
  59    *
  60    * @throws org.xml.sax.SAXException Any SAX exception, possibly
  61    *            wrapping another exception.
  62    *
  63    * @throws org.xml.sax.SAXException
  64    */
  65   protected void startDocumentInternal() throws org.xml.sax.SAXException
  66   {
  67     super.startDocumentInternal();
  68 
  69     m_needToCallStartDocument = false;
  70 
  71     // No action for the moment.
  72   }
  73 
  74   /**
  75    * Receive notification of the end of a document.
  76    *
  77    * <p>The SAX parser will invoke this method only once, and it will
  78    * be the last method invoked during the parse.  The parser shall
  79    * not invoke this method until it has either abandoned parsing
  80    * (because of an unrecoverable error) or reached the end of
  81    * input.</p>
  82    *
  83    * @throws org.xml.sax.SAXException Any SAX exception, possibly
  84    *            wrapping another exception.
  85    *
  86    * @throws org.xml.sax.SAXException
  87    */
  88   public void endDocument() throws org.xml.sax.SAXException
  89   {
  90     flushPending();
  91     flushWriter();
  92     if (m_tracer != null)
  93         super.fireEndDoc();
  94   }
  95 
  96   /**
  97    * Receive notification of the beginning of an element.
  98    *
  99    * <p>The Parser will invoke this method at the beginning of every
 100    * element in the XML document; there will be a corresponding
 101    * endElement() event for every startElement() event (even when the
 102    * element is empty). All of the element's content will be
 103    * reported, in order, before the corresponding endElement()
 104    * event.</p>
 105    *
 106    * <p>If the element name has a namespace prefix, the prefix will
 107    * still be attached.  Note that the attribute list provided will
 108    * contain only attributes with explicit values (specified or
 109    * defaulted): #IMPLIED attributes will be omitted.</p>
 110    *
 111    *
 112    * @param namespaceURI The Namespace URI, or the empty string if the
 113    *        element has no Namespace URI or if Namespace
 114    *        processing is not being performed.
 115    * @param localName The local name (without prefix), or the
 116    *        empty string if Namespace processing is not being
 117    *        performed.
 118    * @param name The qualified name (with prefix), or the
 119    *        empty string if qualified names are not available.
 120    * @param atts The attributes attached to the element, if any.
 121    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 122    *            wrapping another exception.
 123    * @see #endElement
 124    * @see org.xml.sax.AttributeList
 125    *
 126    * @throws org.xml.sax.SAXException
 127    */
 128   public void startElement(
 129           String namespaceURI, String localName, String name, Attributes atts)
 130             throws org.xml.sax.SAXException
 131   {
 132     // time to fire off startElement event
 133     if (m_tracer != null) {
 134         super.fireStartElem(name);
 135         this.firePseudoAttributes();
 136     }
 137     return;
 138   }
 139 
 140   /**
 141    * Receive notification of the end of an element.
 142    *
 143    * <p>The SAX parser will invoke this method at the end of every
 144    * element in the XML document; there will be a corresponding
 145    * startElement() event for every endElement() event (even when the
 146    * element is empty).</p>
 147    *
 148    * <p>If the element name has a namespace prefix, the prefix will
 149    * still be attached to the name.</p>
 150    *
 151    *
 152    * @param namespaceURI The Namespace URI, or the empty string if the
 153    *        element has no Namespace URI or if Namespace
 154    *        processing is not being performed.
 155    * @param localName The local name (without prefix), or the
 156    *        empty string if Namespace processing is not being
 157    *        performed.
 158    * @param name The qualified name (with prefix), or the
 159    *        empty string if qualified names are not available.
 160    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 161    *            wrapping another exception.
 162    *
 163    * @throws org.xml.sax.SAXException
 164    */
 165   public void endElement(String namespaceURI, String localName, String name)
 166           throws org.xml.sax.SAXException
 167   {
 168         if (m_tracer != null)
 169             super.fireEndElem(name);
 170   }
 171 
 172   /**
 173    * Receive notification of character data.
 174    *
 175    * <p>The Parser will call this method to report each chunk of
 176    * character data.  SAX parsers may return all contiguous character
 177    * data in a single chunk, or they may split it into several
 178    * chunks; however, all of the characters in any single event
 179    * must come from the same external entity, so that the Locator
 180    * provides useful information.</p>
 181    *
 182    * <p>The application must not attempt to read from the array
 183    * outside of the specified range.</p>
 184    *
 185    * <p>Note that some parsers will report whitespace using the
 186    * ignorableWhitespace() method rather than this one (validating
 187    * parsers must do so).</p>
 188    *
 189    * @param ch The characters from the XML document.
 190    * @param start The start position in the array.
 191    * @param length The number of characters to read from the array.
 192    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 193    *            wrapping another exception.
 194    * @see #ignorableWhitespace
 195    * @see org.xml.sax.Locator
 196    */
 197   public void characters(char ch[], int start, int length)
 198           throws org.xml.sax.SAXException
 199   {
 200 
 201     flushPending();
 202 
 203     try
 204     {
 205         if (inTemporaryOutputState()) {
 206             /* leave characters un-processed as we are
 207              * creating temporary output, the output generated by
 208              * this serializer will be input to a final serializer
 209              * later on and it will do the processing in final
 210              * output state (not temporary output state).
 211              *
 212              * A "temporary" ToTextStream serializer is used to
 213              * evaluate attribute value templates (for example),
 214              * and the result of evaluating such a thing
 215              * is fed into a final serializer later on.
 216              */
 217             m_writer.write(ch, start, length);
 218         }
 219         else {
 220             // In final output state we do process the characters!
 221             writeNormalizedChars(ch, start, length, m_lineSepUse);
 222         }
 223 
 224         if (m_tracer != null)
 225             super.fireCharEvent(ch, start, length);
 226     }
 227     catch(IOException ioe)
 228     {
 229       throw new SAXException(ioe);
 230     }
 231   }
 232 
 233   /**
 234    * If available, when the disable-output-escaping attribute is used,
 235    * output raw text without escaping.
 236    *
 237    * @param ch The characters from the XML document.
 238    * @param start The start position in the array.
 239    * @param length The number of characters to read from the array.
 240    *
 241    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 242    *            wrapping another exception.
 243    */
 244   public void charactersRaw(char ch[], int start, int length)
 245           throws org.xml.sax.SAXException
 246   {
 247 
 248     try
 249     {
 250       writeNormalizedChars(ch, start, length, m_lineSepUse);
 251     }
 252     catch(IOException ioe)
 253     {
 254       throw new SAXException(ioe);
 255     }
 256   }
 257 
 258     /**
 259      * Normalize the characters, but don't escape.  Different from
 260      * SerializerToXML#writeNormalizedChars because it does not attempt to do
 261      * XML escaping at all.
 262      *
 263      * @param ch The characters from the XML document.
 264      * @param start The start position in the array.
 265      * @param length The number of characters to read from the array.
 266      * @param useLineSep true if the operating systems
 267      * end-of-line separator should be output rather than a new-line character.
 268      *
 269      * @throws IOException
 270      * @throws org.xml.sax.SAXException
 271      */
 272     void writeNormalizedChars(
 273         final char ch[],
 274             final int start,
 275             final int length,
 276             final boolean useLineSep)
 277             throws IOException, org.xml.sax.SAXException
 278     {
 279         final String encoding = getEncoding();
 280         final java.io.Writer writer = m_writer;
 281         final int end = start + length;
 282 
 283         /* copy a few "constants" before the loop for performance */
 284         final char S_LINEFEED = CharInfo.S_LINEFEED;
 285 
 286         // This for() loop always increments i by one at the end
 287         // of the loop.  Additional increments of i adjust for when
 288         // two input characters (a high/low UTF16 surrogate pair)
 289         // are processed.
 290         for (int i = start; i < end; i++) {
 291             final char c = ch[i];
 292 
 293             if (S_LINEFEED == c && useLineSep) {
 294                 writer.write(m_lineSep, 0, m_lineSepLen);
 295                 // one input char processed
 296             } else if (m_encodingInfo.isInEncoding(c)) {
 297                 writer.write(c);
 298                 // one input char processed
 299             } else if (Encodings.isHighUTF16Surrogate(c) ||
 300                        Encodings.isLowUTF16Surrogate(c)) {
 301                 final int codePoint = writeUTF16Surrogate(c, ch, i, end);
 302                 if (codePoint >= 0) {
 303                     // move the index if the low surrogate is consumed
 304                     // as writeUTF16Surrogate has written the pair
 305                     if (Encodings.isHighUTF16Surrogate(c)) {
 306                         i++;
 307                     }
 308 
 309                     // printing to the console is not appropriate, but will leave
 310                     // it as is for compatibility.
 311                     if (codePoint >0) {
 312                         // I think we can just emit the message,
 313                         // not crash and burn.
 314                         final String integralValue = Integer.toString(codePoint);
 315                         final String msg = Utils.messages.createMessage(
 316                             MsgKey.ER_ILLEGAL_CHARACTER,
 317                             new Object[] { integralValue, encoding });
 318  
 319                         //Older behavior was to throw the message,
 320                         //but newer gentler behavior is to write a message to System.err
 321                         //throw new SAXException(msg);
 322                         System.err.println(msg);
 323                     }
 324                 }
 325             } else {
 326                 // Don't know what to do with this char, it is
 327                 // not in the encoding and not a high char in
 328                 // a surrogate pair, so write out as an entity ref
 329                 if (encoding != null) {
 330                     /* The output encoding is known,
 331                      * so somthing is wrong.
 332                      */
 333 
 334                     // not in the encoding, so write out a character reference
 335                     writer.write('&');
 336                     writer.write('#');
 337                     writer.write(Integer.toString(c));
 338                     writer.write(';');
 339 
 340                     // I think we can just emit the message,
 341                     // not crash and burn.
 342                     final String integralValue = Integer.toString(c);
 343                     final String msg = Utils.messages.createMessage(
 344                         MsgKey.ER_ILLEGAL_CHARACTER,
 345                         new Object[] { integralValue, encoding });
 346 
 347                     //Older behavior was to throw the message,
 348                     //but newer gentler behavior is to write a message to System.err
 349                     //throw new SAXException(msg);
 350                     System.err.println(msg);
 351                 } else {
 352                     /* The output encoding is not known,
 353                      * so just write it out as-is.
 354                      */
 355                     writer.write(c);
 356                 }
 357 
 358                 // one input char was processed
 359             }
 360         }
 361     }
 362 
 363   /**
 364    * Receive notification of cdata.
 365    *
 366    * <p>The Parser will call this method to report each chunk of
 367    * character data.  SAX parsers may return all contiguous character
 368    * data in a single chunk, or they may split it into several
 369    * chunks; however, all of the characters in any single event
 370    * must come from the same external entity, so that the Locator
 371    * provides useful information.</p>
 372    *
 373    * <p>The application must not attempt to read from the array
 374    * outside of the specified range.</p>
 375    *
 376    * <p>Note that some parsers will report whitespace using the
 377    * ignorableWhitespace() method rather than this one (validating
 378    * parsers must do so).</p>
 379    *
 380    * @param ch The characters from the XML document.
 381    * @param start The start position in the array.
 382    * @param length The number of characters to read from the array.
 383    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 384    *            wrapping another exception.
 385    * @see #ignorableWhitespace
 386    * @see org.xml.sax.Locator
 387    */
 388   public void cdata(char ch[], int start, int length)
 389           throws org.xml.sax.SAXException
 390   {
 391     try
 392     {
 393         writeNormalizedChars(ch, start, length, m_lineSepUse);
 394         if (m_tracer != null)
 395             super.fireCDATAEvent(ch, start, length);
 396     }
 397     catch(IOException ioe)
 398     {
 399       throw new SAXException(ioe);
 400     }
 401   }
 402 
 403   /**
 404    * Receive notification of ignorable whitespace in element content.
 405    *
 406    * <p>Validating Parsers must use this method to report each chunk
 407    * of ignorable whitespace (see the W3C XML 1.0 recommendation,
 408    * section 2.10): non-validating parsers may also use this method
 409    * if they are capable of parsing and using content models.</p>
 410    *
 411    * <p>SAX parsers may return all contiguous whitespace in a single
 412    * chunk, or they may split it into several chunks; however, all of
 413    * the characters in any single event must come from the same
 414    * external entity, so that the Locator provides useful
 415    * information.</p>
 416    *
 417    * <p>The application must not attempt to read from the array
 418    * outside of the specified range.</p>
 419    *
 420    * @param ch The characters from the XML document.
 421    * @param start The start position in the array.
 422    * @param length The number of characters to read from the array.
 423    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 424    *            wrapping another exception.
 425    * @see #characters
 426    *
 427    * @throws org.xml.sax.SAXException
 428    */
 429   public void ignorableWhitespace(char ch[], int start, int length)
 430           throws org.xml.sax.SAXException
 431   {
 432 
 433     try
 434     {
 435       writeNormalizedChars(ch, start, length, m_lineSepUse);
 436     }
 437     catch(IOException ioe)
 438     {
 439       throw new SAXException(ioe);
 440     }
 441   }
 442 
 443   /**
 444    * Receive notification of a processing instruction.
 445    *
 446    * <p>The Parser will invoke this method once for each processing
 447    * instruction found: note that processing instructions may occur
 448    * before or after the main document element.</p>
 449    *
 450    * <p>A SAX parser should never report an XML declaration (XML 1.0,
 451    * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
 452    * using this method.</p>
 453    *
 454    * @param target The processing instruction target.
 455    * @param data The processing instruction data, or null if
 456    *        none was supplied.
 457    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 458    *            wrapping another exception.
 459    *
 460    * @throws org.xml.sax.SAXException
 461    */
 462   public void processingInstruction(String target, String data)
 463           throws org.xml.sax.SAXException
 464   {
 465     // flush anything pending first
 466     flushPending();
 467 
 468     if (m_tracer != null)
 469         super.fireEscapingEvent(target, data);
 470   }
 471 
 472   /**
 473    * Called when a Comment is to be constructed.
 474    * Note that Xalan will normally invoke the other version of this method.
 475    * %REVIEW% In fact, is this one ever needed, or was it a mistake?
 476    *
 477    * @param   data  The comment data.
 478    * @throws org.xml.sax.SAXException Any SAX exception, possibly
 479    *            wrapping another exception.
 480    */
 481   public void comment(String data) throws org.xml.sax.SAXException
 482   {
 483       final int length = data.length();
 484       if (length > m_charsBuff.length)
 485       {
 486           m_charsBuff = new char[length*2 + 1];
 487       }
 488       data.getChars(0, length, m_charsBuff, 0);
 489       comment(m_charsBuff, 0, length);
 490   }
 491 
 492   /**
 493    * Report an XML comment anywhere in the document.
 494    *
 495    * This callback will be used for comments inside or outside the
 496    * document element, including comments in the external DTD
 497    * subset (if read).
 498    *
 499    * @param ch An array holding the characters in the comment.
 500    * @param start The starting position in the array.
 501    * @param length The number of characters to use from the array.
 502    * @throws org.xml.sax.SAXException The application may raise an exception.
 503    */
 504   public void comment(char ch[], int start, int length)
 505           throws org.xml.sax.SAXException
 506   {
 507 
 508     flushPending();
 509     if (m_tracer != null)
 510         super.fireCommentEvent(ch, start, length);
 511   }
 512 
 513   /**
 514    * Receive notivication of a entityReference.
 515    *
 516    * @param name non-null reference to the name of the entity.
 517    *
 518    * @throws org.xml.sax.SAXException
 519    */
 520   public void entityReference(String name) throws org.xml.sax.SAXException
 521   {
 522         if (m_tracer != null)
 523             super.fireEntityReference(name);
 524   }
 525 
 526     /**
 527      * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
 528      */
 529     public void addAttribute(
 530         String uri,
 531         String localName,
 532         String rawName,
 533         String type,
 534         String value,
 535         boolean XSLAttribute)
 536     {
 537         // do nothing, just forget all about the attribute
 538     }
 539 
 540     /**
 541      * @see org.xml.sax.ext.LexicalHandler#endCDATA()
 542      */
 543     public void endCDATA() throws SAXException
 544     {
 545         // do nothing
 546     }
 547 
 548     /**
 549      * @see ExtendedContentHandler#endElement(String)
 550      */
 551     public void endElement(String elemName) throws SAXException
 552     {
 553         if (m_tracer != null)
 554             super.fireEndElem(elemName);
 555     }
 556 
 557     /**
 558      * From XSLTC
 559      */
 560     public void startElement(
 561     String elementNamespaceURI,
 562     String elementLocalName,
 563     String elementName)
 564     throws SAXException
 565     {
 566         if (m_needToCallStartDocument)
 567             startDocumentInternal();
 568         // time to fire off startlement event.
 569         if (m_tracer != null) {
 570             super.fireStartElem(elementName);
 571             this.firePseudoAttributes();
 572         }
 573 
 574         return;
 575     }
 576 
 577 
 578     /**
 579      * From XSLTC
 580      */
 581     public void characters(String characters)
 582     throws SAXException
 583     {
 584         final int length = characters.length();
 585         if (length > m_charsBuff.length)
 586         {
 587             m_charsBuff = new char[length*2 + 1];
 588         }
 589         characters.getChars(0, length, m_charsBuff, 0);
 590         characters(m_charsBuff, 0, length);
 591     }
 592 
 593 
 594     /**
 595      * From XSLTC
 596      */
 597     public void addAttribute(String name, String value)
 598     {
 599         // do nothing, forget about the attribute
 600     }
 601 
 602     /**
 603      * Add a unique attribute
 604      */
 605     public void addUniqueAttribute(String qName, String value, int flags)
 606         throws SAXException
 607     {
 608         // do nothing, forget about the attribute
 609     }
 610 
 611     public boolean startPrefixMapping(
 612         String prefix,
 613         String uri,
 614         boolean shouldFlush)
 615         throws SAXException
 616     {
 617         // no namespace support for HTML
 618         return false;
 619     }
 620 
 621 
 622     public void startPrefixMapping(String prefix, String uri)
 623         throws org.xml.sax.SAXException
 624     {
 625         // no namespace support for HTML
 626     }
 627 
 628 
 629     public void namespaceAfterStartElement(
 630         final String prefix,
 631         final String uri)
 632         throws SAXException
 633     {
 634         // no namespace support for HTML
 635     }
 636 
 637     public void flushPending() throws org.xml.sax.SAXException
 638     {
 639             if (m_needToCallStartDocument)
 640             {
 641                 startDocumentInternal();
 642                 m_needToCallStartDocument = false;
 643             }
 644     }
 645 }