1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.serializer; 22 23 import java.io.IOException; 24 25 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 26 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 27 import org.xml.sax.Attributes; 28 import org.xml.sax.SAXException; 29 30 /** 31 * This class is not a public API. 32 * It is only public because it is used in other packages. 33 * This class converts SAX or SAX-like calls to a 34 * serialized document for xsl:output method of "text". 35 * @xsl.usage internal 36 * @LastModified: Sept 2018 37 */ 38 public final class ToTextStream extends ToStream 39 { 40 41 42 /** 43 * Default constructor. 44 */ 45 public ToTextStream() 46 { 47 super(); 48 } 49 50 51 52 /** 53 * Receive notification of the beginning of a document. 54 * 55 * <p>The SAX parser will invoke this method only once, before any 56 * other methods in this interface or in DTDHandler (except for 57 * setDocumentLocator).</p> 58 * 59 * @throws org.xml.sax.SAXException Any SAX exception, possibly 60 * wrapping another exception. 61 * 62 * @throws org.xml.sax.SAXException 63 */ 64 protected void startDocumentInternal() throws org.xml.sax.SAXException 65 { 66 super.startDocumentInternal(); 67 68 m_needToCallStartDocument = false; 69 70 // No action for the moment. 71 } 72 73 /** 74 * Receive notification of the end of a document. 75 * 76 * <p>The SAX parser will invoke this method only once, and it will 77 * be the last method invoked during the parse. The parser shall 78 * not invoke this method until it has either abandoned parsing 79 * (because of an unrecoverable error) or reached the end of 80 * input.</p> 81 * 82 * @throws org.xml.sax.SAXException Any SAX exception, possibly 83 * wrapping another exception. 84 * 85 * @throws org.xml.sax.SAXException 86 */ 87 public void endDocument() throws org.xml.sax.SAXException 88 { 89 flushPending(); 90 flushWriter(); 91 if (m_tracer != null) 92 super.fireEndDoc(); 93 } 94 95 /** 96 * Receive notification of the beginning of an element. 97 * 98 * <p>The Parser will invoke this method at the beginning of every 99 * element in the XML document; there will be a corresponding 100 * endElement() event for every startElement() event (even when the 101 * element is empty). All of the element's content will be 102 * reported, in order, before the corresponding endElement() 103 * event.</p> 104 * 105 * <p>If the element name has a namespace prefix, the prefix will 106 * still be attached. Note that the attribute list provided will 107 * contain only attributes with explicit values (specified or 108 * defaulted): #IMPLIED attributes will be omitted.</p> 109 * 110 * 111 * @param namespaceURI The Namespace URI, or the empty string if the 112 * element has no Namespace URI or if Namespace 113 * processing is not being performed. 114 * @param localName The local name (without prefix), or the 115 * empty string if Namespace processing is not being 116 * performed. 117 * @param name The qualified name (with prefix), or the 118 * empty string if qualified names are not available. 119 * @param atts The attributes attached to the element, if any. 120 * @throws org.xml.sax.SAXException Any SAX exception, possibly 121 * wrapping another exception. 122 * @see #endElement 123 * @see org.xml.sax.AttributeList 124 * 125 * @throws org.xml.sax.SAXException 126 */ 127 public void startElement( 128 String namespaceURI, String localName, String name, Attributes atts) 129 throws org.xml.sax.SAXException 130 { 131 // time to fire off startElement event 132 if (m_tracer != null) { 133 super.fireStartElem(name); 134 this.firePseudoAttributes(); 135 } 136 return; 137 } 138 139 /** 140 * Receive notification of the end of an element. 141 * 142 * <p>The SAX parser will invoke this method at the end of every 143 * element in the XML document; there will be a corresponding 144 * startElement() event for every endElement() event (even when the 145 * element is empty).</p> 146 * 147 * <p>If the element name has a namespace prefix, the prefix will 148 * still be attached to the name.</p> 149 * 150 * 151 * @param namespaceURI The Namespace URI, or the empty string if the 152 * element has no Namespace URI or if Namespace 153 * processing is not being performed. 154 * @param localName The local name (without prefix), or the 155 * empty string if Namespace processing is not being 156 * performed. 157 * @param name The qualified name (with prefix), or the 158 * empty string if qualified names are not available. 159 * @throws org.xml.sax.SAXException Any SAX exception, possibly 160 * wrapping another exception. 161 * 162 * @throws org.xml.sax.SAXException 163 */ 164 public void endElement(String namespaceURI, String localName, String name) 165 throws org.xml.sax.SAXException 166 { 167 if (m_tracer != null) 168 super.fireEndElem(name); 169 } 170 171 /** 172 * Receive notification of character data. 173 * 174 * <p>The Parser will call this method to report each chunk of 175 * character data. SAX parsers may return all contiguous character 176 * data in a single chunk, or they may split it into several 177 * chunks; however, all of the characters in any single event 178 * must come from the same external entity, so that the Locator 179 * provides useful information.</p> 180 * 181 * <p>The application must not attempt to read from the array 182 * outside of the specified range.</p> 183 * 184 * <p>Note that some parsers will report whitespace using the 185 * ignorableWhitespace() method rather than this one (validating 186 * parsers must do so).</p> 187 * 188 * @param ch The characters from the XML document. 189 * @param start The start position in the array. 190 * @param length The number of characters to read from the array. 191 * @throws org.xml.sax.SAXException Any SAX exception, possibly 192 * wrapping another exception. 193 * @see #ignorableWhitespace 194 * @see org.xml.sax.Locator 195 */ 196 public void characters(char ch[], int start, int length) 197 throws org.xml.sax.SAXException 198 { 199 200 flushPending(); 201 202 try 203 { 204 if (inTemporaryOutputState()) { 205 /* leave characters un-processed as we are 206 * creating temporary output, the output generated by 207 * this serializer will be input to a final serializer 208 * later on and it will do the processing in final 209 * output state (not temporary output state). 210 * 211 * A "temporary" ToTextStream serializer is used to 212 * evaluate attribute value templates (for example), 213 * and the result of evaluating such a thing 214 * is fed into a final serializer later on. 215 */ 216 m_writer.write(ch, start, length); 217 } 218 else { 219 // In final output state we do process the characters! 220 writeNormalizedChars(ch, start, length, m_lineSepUse); 221 } 222 223 if (m_tracer != null) 224 super.fireCharEvent(ch, start, length); 225 } 226 catch(IOException ioe) 227 { 228 throw new SAXException(ioe); 229 } 230 } 231 232 /** 233 * If available, when the disable-output-escaping attribute is used, 234 * output raw text without escaping. 235 * 236 * @param ch The characters from the XML document. 237 * @param start The start position in the array. 238 * @param length The number of characters to read from the array. 239 * 240 * @throws org.xml.sax.SAXException Any SAX exception, possibly 241 * wrapping another exception. 242 */ 243 public void charactersRaw(char ch[], int start, int length) 244 throws org.xml.sax.SAXException 245 { 246 247 try 248 { 249 writeNormalizedChars(ch, start, length, m_lineSepUse); 250 } 251 catch(IOException ioe) 252 { 253 throw new SAXException(ioe); 254 } 255 } 256 257 /** 258 * Normalize the characters, but don't escape. Different from 259 * SerializerToXML#writeNormalizedChars because it does not attempt to do 260 * XML escaping at all. 261 * 262 * @param ch The characters from the XML document. 263 * @param start The start position in the array. 264 * @param length The number of characters to read from the array. 265 * @param useLineSep true if the operating systems 266 * end-of-line separator should be output rather than a new-line character. 267 * 268 * @throws IOException 269 * @throws org.xml.sax.SAXException 270 */ 271 void writeNormalizedChars( 272 final char ch[], 273 final int start, 274 final int length, 275 final boolean useLineSep) 276 throws IOException, org.xml.sax.SAXException 277 { 278 final String encoding = getEncoding(); 279 final java.io.Writer writer = m_writer; 280 final int end = start + length; 281 282 /* copy a few "constants" before the loop for performance */ 283 final char S_LINEFEED = CharInfo.S_LINEFEED; 284 285 // This for() loop always increments i by one at the end 286 // of the loop. Additional increments of i adjust for when 287 // two input characters (a high/low UTF16 surrogate pair) 288 // are processed. 289 for (int i = start; i < end; i++) { 290 final char c = ch[i]; 291 292 if (S_LINEFEED == c && useLineSep) { 293 writer.write(m_lineSep, 0, m_lineSepLen); 294 // one input char processed 295 } else if (m_encodingInfo.isInEncoding(c)) { 296 writer.write(c); 297 // one input char processed 298 } else if (Encodings.isHighUTF16Surrogate(c) || 299 Encodings.isLowUTF16Surrogate(c)) { 300 final int codePoint = writeUTF16Surrogate(c, ch, i, end); 301 if (codePoint >= 0) { 302 // move the index if the low surrogate is consumed 303 // as writeUTF16Surrogate has written the pair 304 if (Encodings.isHighUTF16Surrogate(c)) { 305 i++; 306 } 307 308 // printing to the console is not appropriate, but will leave 309 // it as is for compatibility. 310 if (codePoint >0) { 311 // I think we can just emit the message, 312 // not crash and burn. 313 final String integralValue = Integer.toString(codePoint); 314 final String msg = Utils.messages.createMessage( 315 MsgKey.ER_ILLEGAL_CHARACTER, 316 new Object[] { integralValue, encoding }); 317 318 //Older behavior was to throw the message, 319 //but newer gentler behavior is to write a message to System.err 320 //throw new SAXException(msg); 321 System.err.println(msg); 322 } 323 } 324 } else { 325 // Don't know what to do with this char, it is 326 // not in the encoding and not a high char in 327 // a surrogate pair, so write out as an entity ref 328 if (encoding != null) { 329 /* The output encoding is known, 330 * so somthing is wrong. 331 */ 332 333 // not in the encoding, so write out a character reference 334 writer.write('&'); 335 writer.write('#'); 336 writer.write(Integer.toString(c)); 337 writer.write(';'); 338 339 // I think we can just emit the message, 340 // not crash and burn. 341 final String integralValue = Integer.toString(c); 342 final String msg = Utils.messages.createMessage( 343 MsgKey.ER_ILLEGAL_CHARACTER, 344 new Object[] { integralValue, encoding }); 345 346 //Older behavior was to throw the message, 347 //but newer gentler behavior is to write a message to System.err 348 //throw new SAXException(msg); 349 System.err.println(msg); 350 } else { 351 /* The output encoding is not known, 352 * so just write it out as-is. 353 */ 354 writer.write(c); 355 } 356 357 // one input char was processed 358 } 359 } 360 } 361 362 /** 363 * Receive notification of cdata. 364 * 365 * <p>The Parser will call this method to report each chunk of 366 * character data. SAX parsers may return all contiguous character 367 * data in a single chunk, or they may split it into several 368 * chunks; however, all of the characters in any single event 369 * must come from the same external entity, so that the Locator 370 * provides useful information.</p> 371 * 372 * <p>The application must not attempt to read from the array 373 * outside of the specified range.</p> 374 * 375 * <p>Note that some parsers will report whitespace using the 376 * ignorableWhitespace() method rather than this one (validating 377 * parsers must do so).</p> 378 * 379 * @param ch The characters from the XML document. 380 * @param start The start position in the array. 381 * @param length The number of characters to read from the array. 382 * @throws org.xml.sax.SAXException Any SAX exception, possibly 383 * wrapping another exception. 384 * @see #ignorableWhitespace 385 * @see org.xml.sax.Locator 386 */ 387 public void cdata(char ch[], int start, int length) 388 throws org.xml.sax.SAXException 389 { 390 try 391 { 392 writeNormalizedChars(ch, start, length, m_lineSepUse); 393 if (m_tracer != null) 394 super.fireCDATAEvent(ch, start, length); 395 } 396 catch(IOException ioe) 397 { 398 throw new SAXException(ioe); 399 } 400 } 401 402 /** 403 * Receive notification of ignorable whitespace in element content. 404 * 405 * <p>Validating Parsers must use this method to report each chunk 406 * of ignorable whitespace (see the W3C XML 1.0 recommendation, 407 * section 2.10): non-validating parsers may also use this method 408 * if they are capable of parsing and using content models.</p> 409 * 410 * <p>SAX parsers may return all contiguous whitespace in a single 411 * chunk, or they may split it into several chunks; however, all of 412 * the characters in any single event must come from the same 413 * external entity, so that the Locator provides useful 414 * information.</p> 415 * 416 * <p>The application must not attempt to read from the array 417 * outside of the specified range.</p> 418 * 419 * @param ch The characters from the XML document. 420 * @param start The start position in the array. 421 * @param length The number of characters to read from the array. 422 * @throws org.xml.sax.SAXException Any SAX exception, possibly 423 * wrapping another exception. 424 * @see #characters 425 * 426 * @throws org.xml.sax.SAXException 427 */ 428 public void ignorableWhitespace(char ch[], int start, int length) 429 throws org.xml.sax.SAXException 430 { 431 432 try 433 { 434 writeNormalizedChars(ch, start, length, m_lineSepUse); 435 } 436 catch(IOException ioe) 437 { 438 throw new SAXException(ioe); 439 } 440 } 441 442 /** 443 * Receive notification of a processing instruction. 444 * 445 * <p>The Parser will invoke this method once for each processing 446 * instruction found: note that processing instructions may occur 447 * before or after the main document element.</p> 448 * 449 * <p>A SAX parser should never report an XML declaration (XML 1.0, 450 * section 2.8) or a text declaration (XML 1.0, section 4.3.1) 451 * using this method.</p> 452 * 453 * @param target The processing instruction target. 454 * @param data The processing instruction data, or null if 455 * none was supplied. 456 * @throws org.xml.sax.SAXException Any SAX exception, possibly 457 * wrapping another exception. 458 * 459 * @throws org.xml.sax.SAXException 460 */ 461 public void processingInstruction(String target, String data) 462 throws org.xml.sax.SAXException 463 { 464 // flush anything pending first 465 flushPending(); 466 467 if (m_tracer != null) 468 super.fireEscapingEvent(target, data); 469 } 470 471 /** 472 * Called when a Comment is to be constructed. 473 * Note that Xalan will normally invoke the other version of this method. 474 * %REVIEW% In fact, is this one ever needed, or was it a mistake? 475 * 476 * @param data The comment data. 477 * @throws org.xml.sax.SAXException Any SAX exception, possibly 478 * wrapping another exception. 479 */ 480 public void comment(String data) throws org.xml.sax.SAXException 481 { 482 final int length = data.length(); 483 if (length > m_charsBuff.length) 484 { 485 m_charsBuff = new char[length*2 + 1]; 486 } 487 data.getChars(0, length, m_charsBuff, 0); 488 comment(m_charsBuff, 0, length); 489 } 490 491 /** 492 * Report an XML comment anywhere in the document. 493 * 494 * This callback will be used for comments inside or outside the 495 * document element, including comments in the external DTD 496 * subset (if read). 497 * 498 * @param ch An array holding the characters in the comment. 499 * @param start The starting position in the array. 500 * @param length The number of characters to use from the array. 501 * @throws org.xml.sax.SAXException The application may raise an exception. 502 */ 503 public void comment(char ch[], int start, int length) 504 throws org.xml.sax.SAXException 505 { 506 507 flushPending(); 508 if (m_tracer != null) 509 super.fireCommentEvent(ch, start, length); 510 } 511 512 /** 513 * Receive notivication of a entityReference. 514 * 515 * @param name non-null reference to the name of the entity. 516 * 517 * @throws org.xml.sax.SAXException 518 */ 519 public void entityReference(String name) throws org.xml.sax.SAXException 520 { 521 if (m_tracer != null) 522 super.fireEntityReference(name); 523 } 524 525 /** 526 * @see ExtendedContentHandler#addAttribute(String, String, String, String, String) 527 */ 528 public void addAttribute( 529 String uri, 530 String localName, 531 String rawName, 532 String type, 533 String value, 534 boolean XSLAttribute) 535 { 536 // do nothing, just forget all about the attribute 537 } 538 539 /** 540 * @see org.xml.sax.ext.LexicalHandler#endCDATA() 541 */ 542 public void endCDATA() throws SAXException 543 { 544 // do nothing 545 } 546 547 /** 548 * @see ExtendedContentHandler#endElement(String) 549 */ 550 public void endElement(String elemName) throws SAXException 551 { 552 if (m_tracer != null) 553 super.fireEndElem(elemName); 554 } 555 556 /** 557 * From XSLTC 558 */ 559 public void startElement( 560 String elementNamespaceURI, 561 String elementLocalName, 562 String elementName) 563 throws SAXException 564 { 565 if (m_needToCallStartDocument) 566 startDocumentInternal(); 567 // time to fire off startlement event. 568 if (m_tracer != null) { 569 super.fireStartElem(elementName); 570 this.firePseudoAttributes(); 571 } 572 573 return; 574 } 575 576 577 /** 578 * From XSLTC 579 */ 580 public void characters(String characters) 581 throws SAXException 582 { 583 final int length = characters.length(); 584 if (length > m_charsBuff.length) 585 { 586 m_charsBuff = new char[length*2 + 1]; 587 } 588 characters.getChars(0, length, m_charsBuff, 0); 589 characters(m_charsBuff, 0, length); 590 } 591 592 593 /** 594 * From XSLTC 595 */ 596 public void addAttribute(String name, String value) 597 { 598 // do nothing, forget about the attribute 599 } 600 601 /** 602 * Add a unique attribute 603 */ 604 public void addUniqueAttribute(String qName, String value, int flags) 605 throws SAXException 606 { 607 // do nothing, forget about the attribute 608 } 609 610 public boolean startPrefixMapping( 611 String prefix, 612 String uri, 613 boolean shouldFlush) 614 throws SAXException 615 { 616 // no namespace support for HTML 617 return false; 618 } 619 620 621 public void startPrefixMapping(String prefix, String uri) 622 throws org.xml.sax.SAXException 623 { 624 // no namespace support for HTML 625 } 626 627 628 public void namespaceAfterStartElement( 629 final String prefix, 630 final String uri) 631 throws SAXException 632 { 633 // no namespace support for HTML 634 } 635 636 public void flushPending() throws org.xml.sax.SAXException 637 { 638 if (m_needToCallStartDocument) 639 { 640 startDocumentInternal(); 641 m_needToCallStartDocument = false; 642 } 643 } 644 }