1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Copyright 2001-2004 The Apache Software Foundation. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 /* 20 * $Id: ToTextStream.java,v 1.2.4.1 2005/09/21 10:35:34 pvedula Exp $ 21 */ 22 package com.sun.org.apache.xml.internal.serializer; 23 24 import java.io.IOException; 25 26 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 27 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 28 import org.xml.sax.Attributes; 29 import org.xml.sax.SAXException; 30 31 /** 32 * This class is not a public API. 33 * It is only public because it is used in other packages. 34 * This class converts SAX or SAX-like calls to a 35 * serialized document for xsl:output method of "text". 36 * @xsl.usage internal 37 * @LastModified: Sept 2018 38 */ 39 public final class ToTextStream extends ToStream 40 { 41 42 43 /** 44 * Default constructor. 45 */ 46 public ToTextStream() 47 { 48 super(); 49 } 50 51 52 53 /** 54 * Receive notification of the beginning of a document. 55 * 56 * <p>The SAX parser will invoke this method only once, before any 57 * other methods in this interface or in DTDHandler (except for 58 * setDocumentLocator).</p> 59 * 60 * @throws org.xml.sax.SAXException Any SAX exception, possibly 61 * wrapping another exception. 62 * 63 * @throws org.xml.sax.SAXException 64 */ 65 protected void startDocumentInternal() throws org.xml.sax.SAXException 66 { 67 super.startDocumentInternal(); 68 69 m_needToCallStartDocument = false; 70 71 // No action for the moment. 72 } 73 74 /** 75 * Receive notification of the end of a document. 76 * 77 * <p>The SAX parser will invoke this method only once, and it will 78 * be the last method invoked during the parse. The parser shall 79 * not invoke this method until it has either abandoned parsing 80 * (because of an unrecoverable error) or reached the end of 81 * input.</p> 82 * 83 * @throws org.xml.sax.SAXException Any SAX exception, possibly 84 * wrapping another exception. 85 * 86 * @throws org.xml.sax.SAXException 87 */ 88 public void endDocument() throws org.xml.sax.SAXException 89 { 90 flushPending(); 91 flushWriter(); 92 if (m_tracer != null) 93 super.fireEndDoc(); 94 } 95 96 /** 97 * Receive notification of the beginning of an element. 98 * 99 * <p>The Parser will invoke this method at the beginning of every 100 * element in the XML document; there will be a corresponding 101 * endElement() event for every startElement() event (even when the 102 * element is empty). All of the element's content will be 103 * reported, in order, before the corresponding endElement() 104 * event.</p> 105 * 106 * <p>If the element name has a namespace prefix, the prefix will 107 * still be attached. Note that the attribute list provided will 108 * contain only attributes with explicit values (specified or 109 * defaulted): #IMPLIED attributes will be omitted.</p> 110 * 111 * 112 * @param namespaceURI The Namespace URI, or the empty string if the 113 * element has no Namespace URI or if Namespace 114 * processing is not being performed. 115 * @param localName The local name (without prefix), or the 116 * empty string if Namespace processing is not being 117 * performed. 118 * @param name The qualified name (with prefix), or the 119 * empty string if qualified names are not available. 120 * @param atts The attributes attached to the element, if any. 121 * @throws org.xml.sax.SAXException Any SAX exception, possibly 122 * wrapping another exception. 123 * @see #endElement 124 * @see org.xml.sax.AttributeList 125 * 126 * @throws org.xml.sax.SAXException 127 */ 128 public void startElement( 129 String namespaceURI, String localName, String name, Attributes atts) 130 throws org.xml.sax.SAXException 131 { 132 // time to fire off startElement event 133 if (m_tracer != null) { 134 super.fireStartElem(name); 135 this.firePseudoAttributes(); 136 } 137 return; 138 } 139 140 /** 141 * Receive notification of the end of an element. 142 * 143 * <p>The SAX parser will invoke this method at the end of every 144 * element in the XML document; there will be a corresponding 145 * startElement() event for every endElement() event (even when the 146 * element is empty).</p> 147 * 148 * <p>If the element name has a namespace prefix, the prefix will 149 * still be attached to the name.</p> 150 * 151 * 152 * @param namespaceURI The Namespace URI, or the empty string if the 153 * element has no Namespace URI or if Namespace 154 * processing is not being performed. 155 * @param localName The local name (without prefix), or the 156 * empty string if Namespace processing is not being 157 * performed. 158 * @param name The qualified name (with prefix), or the 159 * empty string if qualified names are not available. 160 * @throws org.xml.sax.SAXException Any SAX exception, possibly 161 * wrapping another exception. 162 * 163 * @throws org.xml.sax.SAXException 164 */ 165 public void endElement(String namespaceURI, String localName, String name) 166 throws org.xml.sax.SAXException 167 { 168 if (m_tracer != null) 169 super.fireEndElem(name); 170 } 171 172 /** 173 * Receive notification of character data. 174 * 175 * <p>The Parser will call this method to report each chunk of 176 * character data. SAX parsers may return all contiguous character 177 * data in a single chunk, or they may split it into several 178 * chunks; however, all of the characters in any single event 179 * must come from the same external entity, so that the Locator 180 * provides useful information.</p> 181 * 182 * <p>The application must not attempt to read from the array 183 * outside of the specified range.</p> 184 * 185 * <p>Note that some parsers will report whitespace using the 186 * ignorableWhitespace() method rather than this one (validating 187 * parsers must do so).</p> 188 * 189 * @param ch The characters from the XML document. 190 * @param start The start position in the array. 191 * @param length The number of characters to read from the array. 192 * @throws org.xml.sax.SAXException Any SAX exception, possibly 193 * wrapping another exception. 194 * @see #ignorableWhitespace 195 * @see org.xml.sax.Locator 196 */ 197 public void characters(char ch[], int start, int length) 198 throws org.xml.sax.SAXException 199 { 200 201 flushPending(); 202 203 try 204 { 205 if (inTemporaryOutputState()) { 206 /* leave characters un-processed as we are 207 * creating temporary output, the output generated by 208 * this serializer will be input to a final serializer 209 * later on and it will do the processing in final 210 * output state (not temporary output state). 211 * 212 * A "temporary" ToTextStream serializer is used to 213 * evaluate attribute value templates (for example), 214 * and the result of evaluating such a thing 215 * is fed into a final serializer later on. 216 */ 217 m_writer.write(ch, start, length); 218 } 219 else { 220 // In final output state we do process the characters! 221 writeNormalizedChars(ch, start, length, m_lineSepUse); 222 } 223 224 if (m_tracer != null) 225 super.fireCharEvent(ch, start, length); 226 } 227 catch(IOException ioe) 228 { 229 throw new SAXException(ioe); 230 } 231 } 232 233 /** 234 * If available, when the disable-output-escaping attribute is used, 235 * output raw text without escaping. 236 * 237 * @param ch The characters from the XML document. 238 * @param start The start position in the array. 239 * @param length The number of characters to read from the array. 240 * 241 * @throws org.xml.sax.SAXException Any SAX exception, possibly 242 * wrapping another exception. 243 */ 244 public void charactersRaw(char ch[], int start, int length) 245 throws org.xml.sax.SAXException 246 { 247 248 try 249 { 250 writeNormalizedChars(ch, start, length, m_lineSepUse); 251 } 252 catch(IOException ioe) 253 { 254 throw new SAXException(ioe); 255 } 256 } 257 258 /** 259 * Normalize the characters, but don't escape. Different from 260 * SerializerToXML#writeNormalizedChars because it does not attempt to do 261 * XML escaping at all. 262 * 263 * @param ch The characters from the XML document. 264 * @param start The start position in the array. 265 * @param length The number of characters to read from the array. 266 * @param useLineSep true if the operating systems 267 * end-of-line separator should be output rather than a new-line character. 268 * 269 * @throws IOException 270 * @throws org.xml.sax.SAXException 271 */ 272 void writeNormalizedChars( 273 final char ch[], 274 final int start, 275 final int length, 276 final boolean useLineSep) 277 throws IOException, org.xml.sax.SAXException 278 { 279 final String encoding = getEncoding(); 280 final java.io.Writer writer = m_writer; 281 final int end = start + length; 282 283 /* copy a few "constants" before the loop for performance */ 284 final char S_LINEFEED = CharInfo.S_LINEFEED; 285 286 // This for() loop always increments i by one at the end 287 // of the loop. Additional increments of i adjust for when 288 // two input characters (a high/low UTF16 surrogate pair) 289 // are processed. 290 for (int i = start; i < end; i++) { 291 final char c = ch[i]; 292 293 if (S_LINEFEED == c && useLineSep) { 294 writer.write(m_lineSep, 0, m_lineSepLen); 295 // one input char processed 296 } else if (m_encodingInfo.isInEncoding(c)) { 297 writer.write(c); 298 // one input char processed 299 } else if (Encodings.isHighUTF16Surrogate(c) || 300 Encodings.isLowUTF16Surrogate(c)) { 301 final int codePoint = writeUTF16Surrogate(c, ch, i, end); 302 if (codePoint >= 0) { 303 // move the index if the low surrogate is consumed 304 // as writeUTF16Surrogate has written the pair 305 if (Encodings.isHighUTF16Surrogate(c)) { 306 i++; 307 } 308 309 // printing to the console is not appropriate, but will leave 310 // it as is for compatibility. 311 if (codePoint >0) { 312 // I think we can just emit the message, 313 // not crash and burn. 314 final String integralValue = Integer.toString(codePoint); 315 final String msg = Utils.messages.createMessage( 316 MsgKey.ER_ILLEGAL_CHARACTER, 317 new Object[] { integralValue, encoding }); 318 319 //Older behavior was to throw the message, 320 //but newer gentler behavior is to write a message to System.err 321 //throw new SAXException(msg); 322 System.err.println(msg); 323 } 324 } 325 } else { 326 // Don't know what to do with this char, it is 327 // not in the encoding and not a high char in 328 // a surrogate pair, so write out as an entity ref 329 if (encoding != null) { 330 /* The output encoding is known, 331 * so somthing is wrong. 332 */ 333 334 // not in the encoding, so write out a character reference 335 writer.write('&'); 336 writer.write('#'); 337 writer.write(Integer.toString(c)); 338 writer.write(';'); 339 340 // I think we can just emit the message, 341 // not crash and burn. 342 final String integralValue = Integer.toString(c); 343 final String msg = Utils.messages.createMessage( 344 MsgKey.ER_ILLEGAL_CHARACTER, 345 new Object[] { integralValue, encoding }); 346 347 //Older behavior was to throw the message, 348 //but newer gentler behavior is to write a message to System.err 349 //throw new SAXException(msg); 350 System.err.println(msg); 351 } else { 352 /* The output encoding is not known, 353 * so just write it out as-is. 354 */ 355 writer.write(c); 356 } 357 358 // one input char was processed 359 } 360 } 361 } 362 363 /** 364 * Receive notification of cdata. 365 * 366 * <p>The Parser will call this method to report each chunk of 367 * character data. SAX parsers may return all contiguous character 368 * data in a single chunk, or they may split it into several 369 * chunks; however, all of the characters in any single event 370 * must come from the same external entity, so that the Locator 371 * provides useful information.</p> 372 * 373 * <p>The application must not attempt to read from the array 374 * outside of the specified range.</p> 375 * 376 * <p>Note that some parsers will report whitespace using the 377 * ignorableWhitespace() method rather than this one (validating 378 * parsers must do so).</p> 379 * 380 * @param ch The characters from the XML document. 381 * @param start The start position in the array. 382 * @param length The number of characters to read from the array. 383 * @throws org.xml.sax.SAXException Any SAX exception, possibly 384 * wrapping another exception. 385 * @see #ignorableWhitespace 386 * @see org.xml.sax.Locator 387 */ 388 public void cdata(char ch[], int start, int length) 389 throws org.xml.sax.SAXException 390 { 391 try 392 { 393 writeNormalizedChars(ch, start, length, m_lineSepUse); 394 if (m_tracer != null) 395 super.fireCDATAEvent(ch, start, length); 396 } 397 catch(IOException ioe) 398 { 399 throw new SAXException(ioe); 400 } 401 } 402 403 /** 404 * Receive notification of ignorable whitespace in element content. 405 * 406 * <p>Validating Parsers must use this method to report each chunk 407 * of ignorable whitespace (see the W3C XML 1.0 recommendation, 408 * section 2.10): non-validating parsers may also use this method 409 * if they are capable of parsing and using content models.</p> 410 * 411 * <p>SAX parsers may return all contiguous whitespace in a single 412 * chunk, or they may split it into several chunks; however, all of 413 * the characters in any single event must come from the same 414 * external entity, so that the Locator provides useful 415 * information.</p> 416 * 417 * <p>The application must not attempt to read from the array 418 * outside of the specified range.</p> 419 * 420 * @param ch The characters from the XML document. 421 * @param start The start position in the array. 422 * @param length The number of characters to read from the array. 423 * @throws org.xml.sax.SAXException Any SAX exception, possibly 424 * wrapping another exception. 425 * @see #characters 426 * 427 * @throws org.xml.sax.SAXException 428 */ 429 public void ignorableWhitespace(char ch[], int start, int length) 430 throws org.xml.sax.SAXException 431 { 432 433 try 434 { 435 writeNormalizedChars(ch, start, length, m_lineSepUse); 436 } 437 catch(IOException ioe) 438 { 439 throw new SAXException(ioe); 440 } 441 } 442 443 /** 444 * Receive notification of a processing instruction. 445 * 446 * <p>The Parser will invoke this method once for each processing 447 * instruction found: note that processing instructions may occur 448 * before or after the main document element.</p> 449 * 450 * <p>A SAX parser should never report an XML declaration (XML 1.0, 451 * section 2.8) or a text declaration (XML 1.0, section 4.3.1) 452 * using this method.</p> 453 * 454 * @param target The processing instruction target. 455 * @param data The processing instruction data, or null if 456 * none was supplied. 457 * @throws org.xml.sax.SAXException Any SAX exception, possibly 458 * wrapping another exception. 459 * 460 * @throws org.xml.sax.SAXException 461 */ 462 public void processingInstruction(String target, String data) 463 throws org.xml.sax.SAXException 464 { 465 // flush anything pending first 466 flushPending(); 467 468 if (m_tracer != null) 469 super.fireEscapingEvent(target, data); 470 } 471 472 /** 473 * Called when a Comment is to be constructed. 474 * Note that Xalan will normally invoke the other version of this method. 475 * %REVIEW% In fact, is this one ever needed, or was it a mistake? 476 * 477 * @param data The comment data. 478 * @throws org.xml.sax.SAXException Any SAX exception, possibly 479 * wrapping another exception. 480 */ 481 public void comment(String data) throws org.xml.sax.SAXException 482 { 483 final int length = data.length(); 484 if (length > m_charsBuff.length) 485 { 486 m_charsBuff = new char[length*2 + 1]; 487 } 488 data.getChars(0, length, m_charsBuff, 0); 489 comment(m_charsBuff, 0, length); 490 } 491 492 /** 493 * Report an XML comment anywhere in the document. 494 * 495 * This callback will be used for comments inside or outside the 496 * document element, including comments in the external DTD 497 * subset (if read). 498 * 499 * @param ch An array holding the characters in the comment. 500 * @param start The starting position in the array. 501 * @param length The number of characters to use from the array. 502 * @throws org.xml.sax.SAXException The application may raise an exception. 503 */ 504 public void comment(char ch[], int start, int length) 505 throws org.xml.sax.SAXException 506 { 507 508 flushPending(); 509 if (m_tracer != null) 510 super.fireCommentEvent(ch, start, length); 511 } 512 513 /** 514 * Receive notivication of a entityReference. 515 * 516 * @param name non-null reference to the name of the entity. 517 * 518 * @throws org.xml.sax.SAXException 519 */ 520 public void entityReference(String name) throws org.xml.sax.SAXException 521 { 522 if (m_tracer != null) 523 super.fireEntityReference(name); 524 } 525 526 /** 527 * @see ExtendedContentHandler#addAttribute(String, String, String, String, String) 528 */ 529 public void addAttribute( 530 String uri, 531 String localName, 532 String rawName, 533 String type, 534 String value, 535 boolean XSLAttribute) 536 { 537 // do nothing, just forget all about the attribute 538 } 539 540 /** 541 * @see org.xml.sax.ext.LexicalHandler#endCDATA() 542 */ 543 public void endCDATA() throws SAXException 544 { 545 // do nothing 546 } 547 548 /** 549 * @see ExtendedContentHandler#endElement(String) 550 */ 551 public void endElement(String elemName) throws SAXException 552 { 553 if (m_tracer != null) 554 super.fireEndElem(elemName); 555 } 556 557 /** 558 * From XSLTC 559 */ 560 public void startElement( 561 String elementNamespaceURI, 562 String elementLocalName, 563 String elementName) 564 throws SAXException 565 { 566 if (m_needToCallStartDocument) 567 startDocumentInternal(); 568 // time to fire off startlement event. 569 if (m_tracer != null) { 570 super.fireStartElem(elementName); 571 this.firePseudoAttributes(); 572 } 573 574 return; 575 } 576 577 578 /** 579 * From XSLTC 580 */ 581 public void characters(String characters) 582 throws SAXException 583 { 584 final int length = characters.length(); 585 if (length > m_charsBuff.length) 586 { 587 m_charsBuff = new char[length*2 + 1]; 588 } 589 characters.getChars(0, length, m_charsBuff, 0); 590 characters(m_charsBuff, 0, length); 591 } 592 593 594 /** 595 * From XSLTC 596 */ 597 public void addAttribute(String name, String value) 598 { 599 // do nothing, forget about the attribute 600 } 601 602 /** 603 * Add a unique attribute 604 */ 605 public void addUniqueAttribute(String qName, String value, int flags) 606 throws SAXException 607 { 608 // do nothing, forget about the attribute 609 } 610 611 public boolean startPrefixMapping( 612 String prefix, 613 String uri, 614 boolean shouldFlush) 615 throws SAXException 616 { 617 // no namespace support for HTML 618 return false; 619 } 620 621 622 public void startPrefixMapping(String prefix, String uri) 623 throws org.xml.sax.SAXException 624 { 625 // no namespace support for HTML 626 } 627 628 629 public void namespaceAfterStartElement( 630 final String prefix, 631 final String uri) 632 throws SAXException 633 { 634 // no namespace support for HTML 635 } 636 637 public void flushPending() throws org.xml.sax.SAXException 638 { 639 if (m_needToCallStartDocument) 640 { 641 startDocumentInternal(); 642 m_needToCallStartDocument = false; 643 } 644 } 645 }