1 /* 2 * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.dtdparser; 27 28 import org.xml.sax.InputSource; 29 import org.xml.sax.SAXException; 30 import org.xml.sax.SAXParseException; 31 32 import java.io.CharConversionException; 33 import java.io.IOException; 34 import java.io.InputStream; 35 import java.io.InputStreamReader; 36 import java.io.Reader; 37 import java.io.UnsupportedEncodingException; 38 import java.net.URL; 39 import java.util.Arrays; 40 import java.util.Locale; 41 42 /** 43 * This is how the parser talks to its input entities, of all kinds. 44 * The entities are in a stack. 45 * <p> 46 * <P> For internal entities, the character arrays are referenced here, 47 * and read from as needed (they're read-only). External entities have 48 * mutable buffers, that are read into as needed. 49 * <p> 50 * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for 51 * whether it's in an external (parsed) entity or not. The XML 1.0 spec 52 * is inconsistent in explaining EOL handling; this is the sensible way. 53 * 54 * @author David Brownell 55 * @author Janet Koenig 56 * @version 1.4 00/08/05 57 */ 58 public class InputEntity { 59 private int start, finish; 60 private char buf []; 61 private int lineNumber = 1; 62 private boolean returnedFirstHalf = false; 63 private boolean maybeInCRLF = false; 64 65 // name of entity (never main document or unnamed DTD PE) 66 private String name; 67 68 private InputEntity next; 69 70 // for system and public IDs in diagnostics 71 private InputSource input; 72 73 // this is a buffer; some buffers can be replenished. 74 private Reader reader; 75 private boolean isClosed; 76 77 private DTDEventListener errHandler; 78 private Locale locale; 79 80 private StringBuffer rememberedText; 81 private int startRemember; 82 83 // record if this is a PE, so endParsedEntity won't be called 84 private boolean isPE; 85 86 // InputStreamReader throws an internal per-read exception, so 87 // we minimize reads. We also add a byte to compensate for the 88 // "ungetc" byte we keep, so that our downstream reads are as 89 // nicely sized as we can make them. 90 final private static int BUFSIZ = 8 * 1024 + 1; 91 92 final private static char newline [] = {'\n'}; 93 94 public static InputEntity getInputEntity(DTDEventListener h, Locale l) { 95 InputEntity retval = new InputEntity(); 96 retval.errHandler = h; 97 retval.locale = l; 98 return retval; 99 } 100 101 private InputEntity() { 102 } 103 104 // 105 // predicate: return true iff this is an internal entity reader, 106 // and so may safely be "popped" as needed. external entities have 107 // syntax to uphold; internal parameter entities have at most validity 108 // constraints to monitor. also, only external entities get decent 109 // location diagnostics. 110 // 111 public boolean isInternal() { 112 return reader == null; 113 } 114 115 // 116 // predicate: return true iff this is the toplevel document 117 // 118 public boolean isDocument() { 119 return next == null; 120 } 121 122 // 123 // predicate: return true iff this is a PE expansion (so that 124 // LexicalEventListner.endParsedEntity won't be called) 125 // 126 public boolean isParameterEntity() { 127 return isPE; 128 } 129 130 // 131 // return name of current entity 132 // 133 public String getName() { 134 return name; 135 } 136 137 // 138 // use this for an external parsed entity 139 // 140 public void init(InputSource in, String name, InputEntity stack, 141 boolean isPE) 142 throws IOException, SAXException { 143 144 input = in; 145 this.isPE = isPE; 146 reader = in.getCharacterStream(); 147 148 if (reader == null) { 149 InputStream bytes = in.getByteStream(); 150 151 if (bytes == null) 152 if (Boolean.valueOf(System.getProperty("enableExternalEntityProcessing"))) 153 reader = XmlReader.createReader(new URL(in.getSystemId()).openStream()); 154 else 155 fatal("P-082", new Object[] {in.getSystemId()}); 156 else if (in.getEncoding() != null) 157 reader = XmlReader.createReader(in.getByteStream(), in.getEncoding()); 158 else 159 reader = XmlReader.createReader(in.getByteStream()); 160 } 161 next = stack; 162 buf = new char[BUFSIZ]; 163 this.name = name; 164 checkRecursion(stack); 165 } 166 167 // 168 // use this for an internal parsed entity; buffer is readonly 169 // 170 public void init(char b [], String name, InputEntity stack, boolean isPE) 171 throws SAXException { 172 173 next = stack; 174 buf = Arrays.copyOf(b, b.length); 175 finish = b.length; 176 this.name = name; 177 this.isPE = isPE; 178 checkRecursion(stack); 179 } 180 181 private void checkRecursion(InputEntity stack) 182 throws SAXException { 183 184 if (stack == null) 185 return; 186 for (stack = stack.next; stack != null; stack = stack.next) { 187 if (stack.name != null && stack.name.equals(name)) 188 fatal("P-069", new Object[]{name}); 189 } 190 } 191 192 public InputEntity pop() throws IOException { 193 194 // caller has ensured there's nothing left to read 195 close(); 196 return next; 197 } 198 199 /** 200 * returns true iff there's no more data to consume ... 201 */ 202 public boolean isEOF() throws IOException, SAXException { 203 204 // called to ensure WF-ness of included entities and to pop 205 // input entities appropriately ... EOF is not always legal. 206 if (start >= finish) { 207 fillbuf(); 208 return start >= finish; 209 } else 210 return false; 211 } 212 213 /** 214 * Returns the name of the encoding in use, else null; the name 215 * returned is in as standard a form as we can get. 216 */ 217 public String getEncoding() { 218 219 if (reader == null) 220 return null; 221 if (reader instanceof XmlReader) 222 return ((XmlReader) reader).getEncoding(); 223 224 // XXX prefer a java2std() call to normalize names... 225 226 if (reader instanceof InputStreamReader) 227 return ((InputStreamReader) reader).getEncoding(); 228 return null; 229 } 230 231 232 /** 233 * returns the next name char, or NUL ... faster than getc(), 234 * and the common "name or nmtoken must be next" case won't 235 * need ungetc(). 236 */ 237 public char getNameChar() throws IOException, SAXException { 238 239 if (finish <= start) 240 fillbuf(); 241 if (finish > start) { 242 char c = buf[start++]; 243 if (XmlChars.isNameChar(c)) 244 return c; 245 start--; 246 } 247 return 0; 248 } 249 250 /** 251 * gets the next Java character -- might be part of an XML 252 * text character represented by a surrogate pair, or be 253 * the end of the entity. 254 */ 255 public char getc() throws IOException, SAXException { 256 257 if (finish <= start) 258 fillbuf(); 259 if (finish > start) { 260 char c = buf[start++]; 261 262 // [2] Char ::= #x0009 | #x000A | #x000D 263 // | [#x0020-#xD7FF] 264 // | [#xE000-#xFFFD] 265 // plus surrogate _pairs_ representing [#x10000-#x10ffff] 266 if (returnedFirstHalf) { 267 if (c >= 0xdc00 && c <= 0xdfff) { 268 returnedFirstHalf = false; 269 return c; 270 } else 271 fatal("P-070", new Object[]{Integer.toHexString(c)}); 272 } 273 if ((c >= 0x0020 && c <= 0xD7FF) 274 || c == 0x0009 275 // no surrogates! 276 || (c >= 0xE000 && c <= 0xFFFD)) 277 return c; 278 279 // 280 // CRLF and CR are both line ends; map both to LF, and 281 // keep line count correct. 282 // 283 else if (c == '\r' && !isInternal()) { 284 maybeInCRLF = true; 285 c = getc(); 286 if (c != '\n') 287 ungetc(); 288 maybeInCRLF = false; 289 290 lineNumber++; 291 return '\n'; 292 293 } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF 294 if (!isInternal() && !maybeInCRLF) 295 lineNumber++; 296 return c; 297 } 298 299 // surrogates... 300 if (c >= 0xd800 && c < 0xdc00) { 301 returnedFirstHalf = true; 302 return c; 303 } 304 305 fatal("P-071", new Object[]{Integer.toHexString(c)}); 306 } 307 throw new EndOfInputException(); 308 } 309 310 311 /** 312 * lookahead one character 313 */ 314 public boolean peekc(char c) throws IOException, SAXException { 315 316 if (finish <= start) 317 fillbuf(); 318 if (finish > start) { 319 if (buf[start] == c) { 320 start++; 321 return true; 322 } else 323 return false; 324 } 325 return false; 326 } 327 328 329 /** 330 * two character pushback is guaranteed 331 */ 332 public void ungetc() { 333 334 if (start == 0) 335 throw new InternalError("ungetc"); 336 start--; 337 338 if (buf[start] == '\n' || buf[start] == '\r') { 339 if (!isInternal()) 340 lineNumber--; 341 } else if (returnedFirstHalf) 342 returnedFirstHalf = false; 343 } 344 345 346 /** 347 * optional grammatical whitespace (discarded) 348 */ 349 public boolean maybeWhitespace() 350 throws IOException, SAXException { 351 352 char c; 353 boolean isSpace = false; 354 boolean sawCR = false; 355 356 // [3] S ::= #20 | #09 | #0D | #0A 357 for (; ;) { 358 if (finish <= start) 359 fillbuf(); 360 if (finish <= start) 361 return isSpace; 362 363 c = buf[start++]; 364 if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') { 365 isSpace = true; 366 367 // 368 // CR, LF are line endings ... CLRF is one, not two! 369 // 370 if ((c == '\n' || c == '\r') && !isInternal()) { 371 if (!(c == '\n' && sawCR)) { 372 lineNumber++; 373 sawCR = false; 374 } 375 if (c == '\r') 376 sawCR = true; 377 } 378 } else { 379 start--; 380 return isSpace; 381 } 382 } 383 } 384 385 386 /** 387 * normal content; whitespace in markup may be handled 388 * specially if the parser uses the content model. 389 * <p> 390 * <P> content terminates with markup delimiter characters, 391 * namely ampersand (&amp;) and left angle bracket (&lt;). 392 * <p> 393 * <P> the document handler's characters() method is called 394 * on all the content found 395 */ 396 public boolean parsedContent(DTDEventListener docHandler 397 /*ElementValidator validator*/) 398 throws IOException, SAXException { 399 400 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 401 402 int first; // first char to return 403 int last; // last char to return 404 boolean sawContent; // sent any chars? 405 char c; 406 407 // deliver right out of the buffer, until delimiter, EOF, 408 // or error, refilling as we go 409 for (first = last = start, sawContent = false; ; last++) { 410 411 // buffer empty? 412 if (last >= finish) { 413 if (last > first) { 414 // validator.text (); 415 docHandler.characters(buf, first, last - first); 416 sawContent = true; 417 start = last; 418 } 419 if (isEOF()) // calls fillbuf 420 return sawContent; 421 first = start; 422 last = first - 1; // incremented in loop 423 continue; 424 } 425 426 c = buf[last]; 427 428 // 429 // pass most chars through ASAP; this inlines the code of 430 // [2] !XmlChars.isChar(c) leaving only characters needing 431 // special treatment ... line ends, surrogates, and: 432 // 0x0026 == '&' 433 // 0x003C == '<' 434 // 0x005D == ']' 435 // Comparisons ordered for speed on 'typical' text 436 // 437 if ((c > 0x005D && c <= 0xD7FF) // a-z and more 438 || (c < 0x0026 && c >= 0x0020) // space & punct 439 || (c > 0x003C && c < 0x005D) // A-Z & punct 440 || (c > 0x0026 && c < 0x003C) // 0-9 & punct 441 || c == 0x0009 442 || (c >= 0xE000 && c <= 0xFFFD) 443 ) 444 continue; 445 446 // terminate on markup delimiters 447 if (c == '<' || c == '&') 448 break; 449 450 // count lines 451 if (c == '\n') { 452 if (!isInternal()) 453 lineNumber++; 454 continue; 455 } 456 457 // External entities get CR, CRLF --> LF mapping 458 // Internal ones got it already, and we can't repeat 459 // else we break char ref handling!! 460 if (c == '\r') { 461 if (isInternal()) 462 continue; 463 464 docHandler.characters(buf, first, last - first); 465 docHandler.characters(newline, 0, 1); 466 sawContent = true; 467 lineNumber++; 468 if (finish > (last + 1)) { 469 if (buf[last + 1] == '\n') 470 last++; 471 } else { // CR at end of buffer 472 // XXX case not yet handled: CRLF here will look like two lines 473 } 474 first = start = last + 1; 475 continue; 476 } 477 478 // ']]>' is a WF error -- must fail if we see it 479 if (c == ']') { 480 switch (finish - last) { 481 // for suspicious end-of-buffer cases, get more data 482 // into the buffer to rule out this sequence. 483 case 2: 484 if (buf[last + 1] != ']') 485 continue; 486 // FALLTHROUGH 487 488 case 1: 489 if (reader == null || isClosed) 490 continue; 491 if (last == first) 492 throw new InternalError("fillbuf"); 493 last--; 494 if (last > first) { 495 // validator.text (); 496 docHandler.characters(buf, first, last - first); 497 sawContent = true; 498 start = last; 499 } 500 fillbuf(); 501 first = last = start; 502 continue; 503 504 // otherwise any "]]>" would be buffered, and we can 505 // see right away if that's what we have 506 default: 507 if (buf[last + 1] == ']' && buf[last + 2] == '>') 508 fatal("P-072", null); 509 continue; 510 } 511 } 512 513 // correctly paired surrogates are OK 514 if (c >= 0xd800 && c <= 0xdfff) { 515 if ((last + 1) >= finish) { 516 if (last > first) { 517 // validator.text (); 518 docHandler.characters(buf, first, last - first); 519 sawContent = true; 520 start = last + 1; 521 } 522 if (isEOF()) { // calls fillbuf 523 fatal("P-081", 524 new Object[]{Integer.toHexString(c)}); 525 } 526 first = start; 527 last = first; 528 continue; 529 } 530 if (checkSurrogatePair(last)) 531 last++; 532 else { 533 last--; 534 // also terminate on surrogate pair oddities 535 break; 536 } 537 continue; 538 } 539 540 fatal("P-071", new Object[]{Integer.toHexString(c)}); 541 } 542 if (last == first) 543 return sawContent; 544 // validator.text (); 545 docHandler.characters(buf, first, last - first); 546 start = last; 547 return true; 548 } 549 550 551 /** 552 * CDATA -- character data, terminated by {@code "]]>"} and optionally 553 * including unescaped markup delimiters (ampersand and left angle 554 * bracket). This should otherwise be exactly like character data, 555 * modulo differences in error report details. 556 * <p> 557 * <P> The document handler's characters() or ignorableWhitespace() 558 * methods are invoked on all the character data found 559 * 560 * @param docHandler gets callbacks for character data 561 * @param ignorableWhitespace if true, whitespace characters will 562 * be reported using docHandler.ignorableWhitespace(); implicitly, 563 * non-whitespace characters will cause validation errors 564 * @param whitespaceInvalidMessage if true, ignorable whitespace 565 * causes a validity error report as well as a callback 566 */ 567 public boolean unparsedContent(DTDEventListener docHandler, 568 /*ElementValidator validator,*/ 569 boolean ignorableWhitespace, 570 String whitespaceInvalidMessage) 571 throws IOException, SAXException { 572 573 // [18] CDSect ::= CDStart CData CDEnd 574 // [19] CDStart ::= '<![CDATA[' 575 // [20] CData ::= (Char* - (Char* ']]>' Char*)) 576 // [21] CDEnd ::= ']]>' 577 578 // caller peeked the leading '<' ... 579 if (!peek("![CDATA[", null)) 580 return false; 581 docHandler.startCDATA(); 582 583 // only a literal ']]>' stops this ... 584 int last; 585 586 for (; ;) { // until ']]>' seen 587 boolean done = false; 588 char c; 589 590 // don't report ignorable whitespace as "text" for 591 // validation purposes. 592 boolean white = ignorableWhitespace; 593 594 for (last = start; last < finish; last++) { 595 c = buf[last]; 596 597 // 598 // Reject illegal characters. 599 // 600 if (!XmlChars.isChar(c)) { 601 white = false; 602 if (c >= 0xd800 && c <= 0xdfff) { 603 if (checkSurrogatePair(last)) { 604 last++; 605 continue; 606 } else { 607 last--; 608 break; 609 } 610 } 611 fatal("P-071", new Object[] 612 {Integer.toHexString(buf[last])}); 613 } 614 if (c == '\n') { 615 if (!isInternal()) 616 lineNumber++; 617 continue; 618 } 619 if (c == '\r') { 620 // As above, we can't repeat CR/CRLF --> LF mapping 621 if (isInternal()) 622 continue; 623 624 if (white) { 625 if (whitespaceInvalidMessage != null && errHandler != null) 626 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, 627 whitespaceInvalidMessage), null)); 628 docHandler.ignorableWhitespace(buf, start, 629 last - start); 630 docHandler.ignorableWhitespace(newline, 0, 1); 631 } else { 632 // validator.text (); 633 docHandler.characters(buf, start, last - start); 634 docHandler.characters(newline, 0, 1); 635 } 636 lineNumber++; 637 if (finish > (last + 1)) { 638 if (buf[last + 1] == '\n') 639 last++; 640 } else { // CR at end of buffer 641 // XXX case not yet handled ... as above 642 } 643 start = last + 1; 644 continue; 645 } 646 if (c != ']') { 647 if (c != ' ' && c != '\t') 648 white = false; 649 continue; 650 } 651 if ((last + 2) < finish) { 652 if (buf[last + 1] == ']' && buf[last + 2] == '>') { 653 done = true; 654 break; 655 } 656 white = false; 657 continue; 658 } else { 659 //last--; 660 break; 661 } 662 } 663 if (white) { 664 if (whitespaceInvalidMessage != null && errHandler != null) 665 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, 666 whitespaceInvalidMessage), null)); 667 docHandler.ignorableWhitespace(buf, start, last - start); 668 } else { 669 // validator.text (); 670 docHandler.characters(buf, start, last - start); 671 } 672 if (done) { 673 start = last + 3; 674 break; 675 } 676 start = last; 677 if (isEOF()) 678 fatal("P-073", null); 679 } 680 docHandler.endCDATA(); 681 return true; 682 } 683 684 // return false to backstep at end of buffer) 685 private boolean checkSurrogatePair(int offset) 686 throws SAXException { 687 688 if ((offset + 1) >= finish) 689 return false; 690 691 char c1 = buf[offset++]; 692 char c2 = buf[offset]; 693 694 if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff)) 695 return true; 696 fatal("P-074", new Object[]{ 697 Integer.toHexString(c1 & 0x0ffff), 698 Integer.toHexString(c2 & 0x0ffff) 699 }); 700 return false; 701 } 702 703 704 /** 705 * whitespace in markup (flagged to app, discardable) 706 * <p> 707 * <P> the document handler's ignorableWhitespace() method 708 * is called on all the whitespace found 709 */ 710 public boolean ignorableWhitespace(DTDEventListener handler) 711 throws IOException, SAXException { 712 713 char c; 714 boolean isSpace = false; 715 int first; 716 717 // [3] S ::= #20 | #09 | #0D | #0A 718 for (first = start; ;) { 719 if (finish <= start) { 720 if (isSpace) 721 handler.ignorableWhitespace(buf, first, start - first); 722 fillbuf(); 723 first = start; 724 } 725 if (finish <= start) 726 return isSpace; 727 728 c = buf[start++]; 729 switch (c) { 730 case '\n': 731 if (!isInternal()) 732 lineNumber++; 733 // XXX handles Macintosh line endings wrong 734 // fallthrough 735 case 0x09: 736 case 0x20: 737 isSpace = true; 738 continue; 739 740 case '\r': 741 isSpace = true; 742 if (!isInternal()) 743 lineNumber++; 744 handler.ignorableWhitespace(buf, first, 745 (start - 1) - first); 746 handler.ignorableWhitespace(newline, 0, 1); 747 if (start < finish && buf[start] == '\n') 748 ++start; 749 first = start; 750 continue; 751 752 default: 753 ungetc(); 754 if (isSpace) 755 handler.ignorableWhitespace(buf, first, start - first); 756 return isSpace; 757 } 758 } 759 } 760 761 /** 762 * returns false iff 'next' string isn't as provided, 763 * else skips that text and returns true. 764 * <p> 765 * <P> NOTE: two alternative string representations are 766 * both passed in, since one is faster. 767 */ 768 public boolean peek(String next, char chars []) 769 throws IOException, SAXException { 770 771 int len; 772 int i; 773 774 if (chars != null) 775 len = chars.length; 776 else 777 len = next.length(); 778 779 // buffer should hold the whole thing ... give it a 780 // chance for the end-of-buffer case and cope with EOF 781 // by letting fillbuf compact and fill 782 if (finish <= start || (finish - start) < len) 783 fillbuf(); 784 785 // can't peek past EOF 786 if (finish <= start) 787 return false; 788 789 // compare the string; consume iff it matches 790 if (chars != null) { 791 for (i = 0; i < len && (start + i) < finish; i++) { 792 if (buf[start + i] != chars[i]) 793 return false; 794 } 795 } else { 796 for (i = 0; i < len && (start + i) < finish; i++) { 797 if (buf[start + i] != next.charAt(i)) 798 return false; 799 } 800 } 801 802 // if the first fillbuf didn't get enough data, give 803 // fillbuf another chance to read 804 if (i < len) { 805 if (reader == null || isClosed) 806 return false; 807 808 // 809 // This diagnostic "knows" that the only way big strings would 810 // fail to be peeked is where it's a symbol ... e.g. for an 811 // </EndTag> construct. That knowledge could also be applied 812 // to get rid of the symbol length constraint, since having 813 // the wrong symbol is a fatal error anyway ... 814 // 815 if (len > buf.length) { 816 fatal("P-077", new Object[]{Integer.valueOf(buf.length)}); 817 } 818 819 fillbuf(); 820 return peek(next, chars); 821 } 822 823 start += len; 824 return true; 825 } 826 827 828 // 829 // Support for reporting the internal DTD subset, so <!DOCTYPE...> 830 // declarations can be recreated. This is collected as a single 831 // string; such subsets are normally small, and many applications 832 // don't even care about this. 833 // 834 public void startRemembering() { 835 836 if (startRemember != 0) 837 throw new InternalError(); 838 startRemember = start; 839 } 840 841 public String rememberText() { 842 843 String retval; 844 845 // If the internal subset crossed a buffer boundary, we 846 // created a temporary buffer. 847 if (rememberedText != null) { 848 rememberedText.append(buf, startRemember, 849 start - startRemember); 850 retval = rememberedText.toString(); 851 } else 852 retval = new String(buf, startRemember, 853 start - startRemember); 854 855 startRemember = 0; 856 rememberedText = null; 857 return retval; 858 } 859 860 private InputEntity getTopEntity() { 861 862 InputEntity current = this; 863 864 // don't report locations within internal entities! 865 866 while (current != null && current.input == null) 867 current = current.next; 868 return current == null ? this : current; 869 } 870 871 /** 872 * Returns the public ID of this input source, if known 873 */ 874 public String getPublicId() { 875 876 InputEntity where = getTopEntity(); 877 if (where == this) 878 return input.getPublicId(); 879 return where.getPublicId(); 880 } 881 882 /** 883 * Returns the system ID of this input source, if known 884 */ 885 public String getSystemId() { 886 887 InputEntity where = getTopEntity(); 888 if (where == this) 889 return input.getSystemId(); 890 return where.getSystemId(); 891 } 892 893 /** 894 * Returns the current line number in this input source 895 */ 896 public int getLineNumber() { 897 898 InputEntity where = getTopEntity(); 899 if (where == this) 900 return lineNumber; 901 return where.getLineNumber(); 902 } 903 904 /** 905 * returns -1; maintaining column numbers hurts performance 906 */ 907 public int getColumnNumber() { 908 909 return -1; // not maintained (speed) 910 } 911 912 913 // 914 // n.b. for non-EOF end-of-buffer cases, reader should return 915 // at least a handful of bytes so various lookaheads behave. 916 // 917 // two character pushback exists except at first; characters 918 // represented by surrogate pairs can't be pushed back (they'd 919 // only be in character data anyway). 920 // 921 // DTD exception thrown on char conversion problems; line number 922 // will be low, as a rule. 923 // 924 private void fillbuf() throws IOException, SAXException { 925 926 // don't touched fixed buffers, that'll usually 927 // change entity values (and isn't needed anyway) 928 // likewise, ignore closed streams 929 if (reader == null || isClosed) 930 return; 931 932 // if remembering DTD text, copy! 933 if (startRemember != 0) { 934 if (rememberedText == null) 935 rememberedText = new StringBuffer(buf.length); 936 rememberedText.append(buf, startRemember, 937 start - startRemember); 938 } 939 940 boolean extra = (finish > 0) && (start > 0); 941 int len; 942 943 if (extra) // extra pushback 944 start--; 945 len = finish - start; 946 947 System.arraycopy(buf, start, buf, 0, len); 948 start = 0; 949 finish = len; 950 951 try { 952 len = buf.length - len; 953 len = reader.read(buf, finish, len); 954 } catch (UnsupportedEncodingException e) { 955 fatal("P-075", new Object[]{e.getMessage()}); 956 } catch (CharConversionException e) { 957 fatal("P-076", new Object[]{e.getMessage()}); 958 } 959 if (len >= 0) 960 finish += len; 961 else 962 close(); 963 if (extra) // extra pushback 964 start++; 965 966 if (startRemember != 0) 967 // assert extra == true 968 startRemember = 1; 969 } 970 971 public void close() { 972 973 try { 974 if (reader != null && !isClosed) 975 reader.close(); 976 isClosed = true; 977 } catch (IOException e) { 978 /* NOTHING */ 979 } 980 } 981 982 983 private void fatal(String messageId, Object params []) 984 throws SAXException { 985 986 SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null); 987 988 // not continuable ... e.g. WF errors 989 close(); 990 if (errHandler != null) { 991 errHandler.fatalError(x); 992 } 993 throw x; 994 } 995 }