1 /* 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.dtdparser; 27 28 import org.xml.sax.InputSource; 29 import org.xml.sax.SAXException; 30 import org.xml.sax.SAXParseException; 31 32 import java.io.CharConversionException; 33 import java.io.IOException; 34 import java.io.InputStream; 35 import java.io.InputStreamReader; 36 import java.io.Reader; 37 import java.io.UnsupportedEncodingException; 38 import java.net.URL; 39 import java.util.Locale; 40 41 /** 42 * This is how the parser talks to its input entities, of all kinds. 43 * The entities are in a stack. 44 * <p/> 45 * <P> For internal entities, the character arrays are referenced here, 46 * and read from as needed (they're read-only). External entities have 47 * mutable buffers, that are read into as needed. 48 * <p/> 49 * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for 50 * whether it's in an external (parsed) entity or not. The XML 1.0 spec 51 * is inconsistent in explaining EOL handling; this is the sensible way. 52 * 53 * @author David Brownell 54 * @author Janet Koenig 55 * @version 1.4 00/08/05 56 */ 57 public class InputEntity { 58 private int start, finish; 59 private char buf []; 60 private int lineNumber = 1; 61 private boolean returnedFirstHalf = false; 62 private boolean maybeInCRLF = false; 63 64 // name of entity (never main document or unnamed DTD PE) 65 private String name; 66 67 private InputEntity next; 68 69 // for system and public IDs in diagnostics 70 private InputSource input; 71 72 // this is a buffer; some buffers can be replenished. 73 private Reader reader; 74 private boolean isClosed; 75 76 private DTDEventListener errHandler; 77 private Locale locale; 78 79 private StringBuffer rememberedText; 80 private int startRemember; 81 82 // record if this is a PE, so endParsedEntity won't be called 83 private boolean isPE; 84 85 // InputStreamReader throws an internal per-read exception, so 86 // we minimize reads. We also add a byte to compensate for the 87 // "ungetc" byte we keep, so that our downstream reads are as 88 // nicely sized as we can make them. 89 final private static int BUFSIZ = 8 * 1024 + 1; 90 91 final private static char newline [] = {'\n'}; 92 93 public static InputEntity getInputEntity(DTDEventListener h, Locale l) { 94 InputEntity retval = new InputEntity(); 95 retval.errHandler = h; 96 retval.locale = l; 97 return retval; 98 } 99 100 private InputEntity() { 101 } 102 103 // 104 // predicate: return true iff this is an internal entity reader, 105 // and so may safely be "popped" as needed. external entities have 106 // syntax to uphold; internal parameter entities have at most validity 107 // constraints to monitor. also, only external entities get decent 108 // location diagnostics. 109 // 110 public boolean isInternal() { 111 return reader == null; 112 } 113 114 // 115 // predicate: return true iff this is the toplevel document 116 // 117 public boolean isDocument() { 118 return next == null; 119 } 120 121 // 122 // predicate: return true iff this is a PE expansion (so that 123 // LexicalEventListner.endParsedEntity won't be called) 124 // 125 public boolean isParameterEntity() { 126 return isPE; 127 } 128 129 // 130 // return name of current entity 131 // 132 public String getName() { 133 return name; 134 } 135 136 // 137 // use this for an external parsed entity 138 // 139 public void init(InputSource in, String name, InputEntity stack, 140 boolean isPE) 141 throws IOException, SAXException { 142 143 input = in; 144 this.isPE = isPE; 145 reader = in.getCharacterStream(); 146 147 if (reader == null) { 148 InputStream bytes = in.getByteStream(); 149 150 if (bytes == null) 151 reader = XmlReader.createReader(new URL(in.getSystemId()) 152 .openStream()); 153 else if (in.getEncoding() != null) 154 reader = XmlReader.createReader(in.getByteStream(), 155 in.getEncoding()); 156 else 157 reader = XmlReader.createReader(in.getByteStream()); 158 } 159 next = stack; 160 buf = new char[BUFSIZ]; 161 this.name = name; 162 checkRecursion(stack); 163 } 164 165 // 166 // use this for an internal parsed entity; buffer is readonly 167 // 168 public void init(char b [], String name, InputEntity stack, boolean isPE) 169 throws SAXException { 170 171 next = stack; 172 buf = b; 173 finish = b.length; 174 this.name = name; 175 this.isPE = isPE; 176 checkRecursion(stack); 177 } 178 179 private void checkRecursion(InputEntity stack) 180 throws SAXException { 181 182 if (stack == null) 183 return; 184 for (stack = stack.next; stack != null; stack = stack.next) { 185 if (stack.name != null && stack.name.equals(name)) 186 fatal("P-069", new Object[]{name}); 187 } 188 } 189 190 public InputEntity pop() throws IOException { 191 192 // caller has ensured there's nothing left to read 193 close(); 194 return next; 195 } 196 197 /** 198 * returns true iff there's no more data to consume ... 199 */ 200 public boolean isEOF() throws IOException, SAXException { 201 202 // called to ensure WF-ness of included entities and to pop 203 // input entities appropriately ... EOF is not always legal. 204 if (start >= finish) { 205 fillbuf(); 206 return start >= finish; 207 } else 208 return false; 209 } 210 211 /** 212 * Returns the name of the encoding in use, else null; the name 213 * returned is in as standard a form as we can get. 214 */ 215 public String getEncoding() { 216 217 if (reader == null) 218 return null; 219 if (reader instanceof XmlReader) 220 return ((XmlReader) reader).getEncoding(); 221 222 // XXX prefer a java2std() call to normalize names... 223 224 if (reader instanceof InputStreamReader) 225 return ((InputStreamReader) reader).getEncoding(); 226 return null; 227 } 228 229 230 /** 231 * returns the next name char, or NUL ... faster than getc(), 232 * and the common "name or nmtoken must be next" case won't 233 * need ungetc(). 234 */ 235 public char getNameChar() throws IOException, SAXException { 236 237 if (finish <= start) 238 fillbuf(); 239 if (finish > start) { 240 char c = buf[start++]; 241 if (XmlChars.isNameChar(c)) 242 return c; 243 start--; 244 } 245 return 0; 246 } 247 248 /** 249 * gets the next Java character -- might be part of an XML 250 * text character represented by a surrogate pair, or be 251 * the end of the entity. 252 */ 253 public char getc() throws IOException, SAXException { 254 255 if (finish <= start) 256 fillbuf(); 257 if (finish > start) { 258 char c = buf[start++]; 259 260 // [2] Char ::= #x0009 | #x000A | #x000D 261 // | [#x0020-#xD7FF] 262 // | [#xE000-#xFFFD] 263 // plus surrogate _pairs_ representing [#x10000-#x10ffff] 264 if (returnedFirstHalf) { 265 if (c >= 0xdc00 && c <= 0xdfff) { 266 returnedFirstHalf = false; 267 return c; 268 } else 269 fatal("P-070", new Object[]{Integer.toHexString(c)}); 270 } 271 if ((c >= 0x0020 && c <= 0xD7FF) 272 || c == 0x0009 273 // no surrogates! 274 || (c >= 0xE000 && c <= 0xFFFD)) 275 return c; 276 277 // 278 // CRLF and CR are both line ends; map both to LF, and 279 // keep line count correct. 280 // 281 else if (c == '\r' && !isInternal()) { 282 maybeInCRLF = true; 283 c = getc(); 284 if (c != '\n') 285 ungetc(); 286 maybeInCRLF = false; 287 288 lineNumber++; 289 return '\n'; 290 291 } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF 292 if (!isInternal() && !maybeInCRLF) 293 lineNumber++; 294 return c; 295 } 296 297 // surrogates... 298 if (c >= 0xd800 && c < 0xdc00) { 299 returnedFirstHalf = true; 300 return c; 301 } 302 303 fatal("P-071", new Object[]{Integer.toHexString(c)}); 304 } 305 throw new EndOfInputException(); 306 } 307 308 309 /** 310 * lookahead one character 311 */ 312 public boolean peekc(char c) throws IOException, SAXException { 313 314 if (finish <= start) 315 fillbuf(); 316 if (finish > start) { 317 if (buf[start] == c) { 318 start++; 319 return true; 320 } else 321 return false; 322 } 323 return false; 324 } 325 326 327 /** 328 * two character pushback is guaranteed 329 */ 330 public void ungetc() { 331 332 if (start == 0) 333 throw new InternalError("ungetc"); 334 start--; 335 336 if (buf[start] == '\n' || buf[start] == '\r') { 337 if (!isInternal()) 338 lineNumber--; 339 } else if (returnedFirstHalf) 340 returnedFirstHalf = false; 341 } 342 343 344 /** 345 * optional grammatical whitespace (discarded) 346 */ 347 public boolean maybeWhitespace() 348 throws IOException, SAXException { 349 350 char c; 351 boolean isSpace = false; 352 boolean sawCR = false; 353 354 // [3] S ::= #20 | #09 | #0D | #0A 355 for (; ;) { 356 if (finish <= start) 357 fillbuf(); 358 if (finish <= start) 359 return isSpace; 360 361 c = buf[start++]; 362 if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') { 363 isSpace = true; 364 365 // 366 // CR, LF are line endings ... CLRF is one, not two! 367 // 368 if ((c == '\n' || c == '\r') && !isInternal()) { 369 if (!(c == '\n' && sawCR)) { 370 lineNumber++; 371 sawCR = false; 372 } 373 if (c == '\r') 374 sawCR = true; 375 } 376 } else { 377 start--; 378 return isSpace; 379 } 380 } 381 } 382 383 384 /** 385 * normal content; whitespace in markup may be handled 386 * specially if the parser uses the content model. 387 * <p/> 388 * <P> content terminates with markup delimiter characters, 389 * namely ampersand (&amp;) and left angle bracket (&lt;). 390 * <p/> 391 * <P> the document handler's characters() method is called 392 * on all the content found 393 */ 394 public boolean parsedContent(DTDEventListener docHandler 395 /*ElementValidator validator*/) 396 throws IOException, SAXException { 397 398 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 399 400 int first; // first char to return 401 int last; // last char to return 402 boolean sawContent; // sent any chars? 403 char c; 404 405 // deliver right out of the buffer, until delimiter, EOF, 406 // or error, refilling as we go 407 for (first = last = start, sawContent = false; ; last++) { 408 409 // buffer empty? 410 if (last >= finish) { 411 if (last > first) { 412 // validator.text (); 413 docHandler.characters(buf, first, last - first); 414 sawContent = true; 415 start = last; 416 } 417 if (isEOF()) // calls fillbuf 418 return sawContent; 419 first = start; 420 last = first - 1; // incremented in loop 421 continue; 422 } 423 424 c = buf[last]; 425 426 // 427 // pass most chars through ASAP; this inlines the code of 428 // [2] !XmlChars.isChar(c) leaving only characters needing 429 // special treatment ... line ends, surrogates, and: 430 // 0x0026 == '&' 431 // 0x003C == '<' 432 // 0x005D == ']' 433 // Comparisons ordered for speed on 'typical' text 434 // 435 if ((c > 0x005D && c <= 0xD7FF) // a-z and more 436 || (c < 0x0026 && c >= 0x0020) // space & punct 437 || (c > 0x003C && c < 0x005D) // A-Z & punct 438 || (c > 0x0026 && c < 0x003C) // 0-9 & punct 439 || c == 0x0009 440 || (c >= 0xE000 && c <= 0xFFFD) 441 ) 442 continue; 443 444 // terminate on markup delimiters 445 if (c == '<' || c == '&') 446 break; 447 448 // count lines 449 if (c == '\n') { 450 if (!isInternal()) 451 lineNumber++; 452 continue; 453 } 454 455 // External entities get CR, CRLF --> LF mapping 456 // Internal ones got it already, and we can't repeat 457 // else we break char ref handling!! 458 if (c == '\r') { 459 if (isInternal()) 460 continue; 461 462 docHandler.characters(buf, first, last - first); 463 docHandler.characters(newline, 0, 1); 464 sawContent = true; 465 lineNumber++; 466 if (finish > (last + 1)) { 467 if (buf[last + 1] == '\n') 468 last++; 469 } else { // CR at end of buffer 470 // XXX case not yet handled: CRLF here will look like two lines 471 } 472 first = start = last + 1; 473 continue; 474 } 475 476 // ']]>' is a WF error -- must fail if we see it 477 if (c == ']') { 478 switch (finish - last) { 479 // for suspicious end-of-buffer cases, get more data 480 // into the buffer to rule out this sequence. 481 case 2: 482 if (buf[last + 1] != ']') 483 continue; 484 // FALLTHROUGH 485 486 case 1: 487 if (reader == null || isClosed) 488 continue; 489 if (last == first) 490 throw new InternalError("fillbuf"); 491 last--; 492 if (last > first) { 493 // validator.text (); 494 docHandler.characters(buf, first, last - first); 495 sawContent = true; 496 start = last; 497 } 498 fillbuf(); 499 first = last = start; 500 continue; 501 502 // otherwise any "]]>" would be buffered, and we can 503 // see right away if that's what we have 504 default: 505 if (buf[last + 1] == ']' && buf[last + 2] == '>') 506 fatal("P-072", null); 507 continue; 508 } 509 } 510 511 // correctly paired surrogates are OK 512 if (c >= 0xd800 && c <= 0xdfff) { 513 if ((last + 1) >= finish) { 514 if (last > first) { 515 // validator.text (); 516 docHandler.characters(buf, first, last - first); 517 sawContent = true; 518 start = last + 1; 519 } 520 if (isEOF()) { // calls fillbuf 521 fatal("P-081", 522 new Object[]{Integer.toHexString(c)}); 523 } 524 first = start; 525 last = first; 526 continue; 527 } 528 if (checkSurrogatePair(last)) 529 last++; 530 else { 531 last--; 532 // also terminate on surrogate pair oddities 533 break; 534 } 535 continue; 536 } 537 538 fatal("P-071", new Object[]{Integer.toHexString(c)}); 539 } 540 if (last == first) 541 return sawContent; 542 // validator.text (); 543 docHandler.characters(buf, first, last - first); 544 start = last; 545 return true; 546 } 547 548 549 /** 550 * CDATA -- character data, terminated by "]]>" and optionally 551 * including unescaped markup delimiters (ampersand and left angle 552 * bracket). This should otherwise be exactly like character data, 553 * modulo differences in error report details. 554 * <p/> 555 * <P> The document handler's characters() or ignorableWhitespace() 556 * methods are invoked on all the character data found 557 * 558 * @param docHandler gets callbacks for character data 559 * @param ignorableWhitespace if true, whitespace characters will 560 * be reported using docHandler.ignorableWhitespace(); implicitly, 561 * non-whitespace characters will cause validation errors 562 * @param whitespaceInvalidMessage if true, ignorable whitespace 563 * causes a validity error report as well as a callback 564 */ 565 public boolean unparsedContent(DTDEventListener docHandler, 566 /*ElementValidator validator,*/ 567 boolean ignorableWhitespace, 568 String whitespaceInvalidMessage) 569 throws IOException, SAXException { 570 571 // [18] CDSect ::= CDStart CData CDEnd 572 // [19] CDStart ::= '<![CDATA[' 573 // [20] CData ::= (Char* - (Char* ']]>' Char*)) 574 // [21] CDEnd ::= ']]>' 575 576 // caller peeked the leading '<' ... 577 if (!peek("![CDATA[", null)) 578 return false; 579 docHandler.startCDATA(); 580 581 // only a literal ']]>' stops this ... 582 int last; 583 584 for (; ;) { // until ']]>' seen 585 boolean done = false; 586 char c; 587 588 // don't report ignorable whitespace as "text" for 589 // validation purposes. 590 boolean white = ignorableWhitespace; 591 592 for (last = start; last < finish; last++) { 593 c = buf[last]; 594 595 // 596 // Reject illegal characters. 597 // 598 if (!XmlChars.isChar(c)) { 599 white = false; 600 if (c >= 0xd800 && c <= 0xdfff) { 601 if (checkSurrogatePair(last)) { 602 last++; 603 continue; 604 } else { 605 last--; 606 break; 607 } 608 } 609 fatal("P-071", new Object[] 610 {Integer.toHexString(buf[last])}); 611 } 612 if (c == '\n') { 613 if (!isInternal()) 614 lineNumber++; 615 continue; 616 } 617 if (c == '\r') { 618 // As above, we can't repeat CR/CRLF --> LF mapping 619 if (isInternal()) 620 continue; 621 622 if (white) { 623 if (whitespaceInvalidMessage != null) 624 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, 625 whitespaceInvalidMessage), null)); 626 docHandler.ignorableWhitespace(buf, start, 627 last - start); 628 docHandler.ignorableWhitespace(newline, 0, 1); 629 } else { 630 // validator.text (); 631 docHandler.characters(buf, start, last - start); 632 docHandler.characters(newline, 0, 1); 633 } 634 lineNumber++; 635 if (finish > (last + 1)) { 636 if (buf[last + 1] == '\n') 637 last++; 638 } else { // CR at end of buffer 639 // XXX case not yet handled ... as above 640 } 641 start = last + 1; 642 continue; 643 } 644 if (c != ']') { 645 if (c != ' ' && c != '\t') 646 white = false; 647 continue; 648 } 649 if ((last + 2) < finish) { 650 if (buf[last + 1] == ']' && buf[last + 2] == '>') { 651 done = true; 652 break; 653 } 654 white = false; 655 continue; 656 } else { 657 //last--; 658 break; 659 } 660 } 661 if (white) { 662 if (whitespaceInvalidMessage != null) 663 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, 664 whitespaceInvalidMessage), null)); 665 docHandler.ignorableWhitespace(buf, start, last - start); 666 } else { 667 // validator.text (); 668 docHandler.characters(buf, start, last - start); 669 } 670 if (done) { 671 start = last + 3; 672 break; 673 } 674 start = last; 675 if (isEOF()) 676 fatal("P-073", null); 677 } 678 docHandler.endCDATA(); 679 return true; 680 } 681 682 // return false to backstep at end of buffer) 683 private boolean checkSurrogatePair(int offset) 684 throws SAXException { 685 686 if ((offset + 1) >= finish) 687 return false; 688 689 char c1 = buf[offset++]; 690 char c2 = buf[offset]; 691 692 if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff)) 693 return true; 694 fatal("P-074", new Object[]{ 695 Integer.toHexString(c1 & 0x0ffff), 696 Integer.toHexString(c2 & 0x0ffff) 697 }); 698 return false; 699 } 700 701 702 /** 703 * whitespace in markup (flagged to app, discardable) 704 * <p/> 705 * <P> the document handler's ignorableWhitespace() method 706 * is called on all the whitespace found 707 */ 708 public boolean ignorableWhitespace(DTDEventListener handler) 709 throws IOException, SAXException { 710 711 char c; 712 boolean isSpace = false; 713 int first; 714 715 // [3] S ::= #20 | #09 | #0D | #0A 716 for (first = start; ;) { 717 if (finish <= start) { 718 if (isSpace) 719 handler.ignorableWhitespace(buf, first, start - first); 720 fillbuf(); 721 first = start; 722 } 723 if (finish <= start) 724 return isSpace; 725 726 c = buf[start++]; 727 switch (c) { 728 case '\n': 729 if (!isInternal()) 730 lineNumber++; 731 // XXX handles Macintosh line endings wrong 732 // fallthrough 733 case 0x09: 734 case 0x20: 735 isSpace = true; 736 continue; 737 738 case '\r': 739 isSpace = true; 740 if (!isInternal()) 741 lineNumber++; 742 handler.ignorableWhitespace(buf, first, 743 (start - 1) - first); 744 handler.ignorableWhitespace(newline, 0, 1); 745 if (start < finish && buf[start] == '\n') 746 ++start; 747 first = start; 748 continue; 749 750 default: 751 ungetc(); 752 if (isSpace) 753 handler.ignorableWhitespace(buf, first, start - first); 754 return isSpace; 755 } 756 } 757 } 758 759 /** 760 * returns false iff 'next' string isn't as provided, 761 * else skips that text and returns true. 762 * <p/> 763 * <P> NOTE: two alternative string representations are 764 * both passed in, since one is faster. 765 */ 766 public boolean peek(String next, char chars []) 767 throws IOException, SAXException { 768 769 int len; 770 int i; 771 772 if (chars != null) 773 len = chars.length; 774 else 775 len = next.length(); 776 777 // buffer should hold the whole thing ... give it a 778 // chance for the end-of-buffer case and cope with EOF 779 // by letting fillbuf compact and fill 780 if (finish <= start || (finish - start) < len) 781 fillbuf(); 782 783 // can't peek past EOF 784 if (finish <= start) 785 return false; 786 787 // compare the string; consume iff it matches 788 if (chars != null) { 789 for (i = 0; i < len && (start + i) < finish; i++) { 790 if (buf[start + i] != chars[i]) 791 return false; 792 } 793 } else { 794 for (i = 0; i < len && (start + i) < finish; i++) { 795 if (buf[start + i] != next.charAt(i)) 796 return false; 797 } 798 } 799 800 // if the first fillbuf didn't get enough data, give 801 // fillbuf another chance to read 802 if (i < len) { 803 if (reader == null || isClosed) 804 return false; 805 806 // 807 // This diagnostic "knows" that the only way big strings would 808 // fail to be peeked is where it's a symbol ... e.g. for an 809 // </EndTag> construct. That knowledge could also be applied 810 // to get rid of the symbol length constraint, since having 811 // the wrong symbol is a fatal error anyway ... 812 // 813 if (len > buf.length) 814 fatal("P-077", new Object[]{new Integer(buf.length)}); 815 816 fillbuf(); 817 return peek(next, chars); 818 } 819 820 start += len; 821 return true; 822 } 823 824 825 // 826 // Support for reporting the internal DTD subset, so <!DOCTYPE...> 827 // declarations can be recreated. This is collected as a single 828 // string; such subsets are normally small, and many applications 829 // don't even care about this. 830 // 831 public void startRemembering() { 832 833 if (startRemember != 0) 834 throw new InternalError(); 835 startRemember = start; 836 } 837 838 public String rememberText() { 839 840 String retval; 841 842 // If the internal subset crossed a buffer boundary, we 843 // created a temporary buffer. 844 if (rememberedText != null) { 845 rememberedText.append(buf, startRemember, 846 start - startRemember); 847 retval = rememberedText.toString(); 848 } else 849 retval = new String(buf, startRemember, 850 start - startRemember); 851 852 startRemember = 0; 853 rememberedText = null; 854 return retval; 855 } 856 857 private InputEntity getTopEntity() { 858 859 InputEntity current = this; 860 861 // don't report locations within internal entities! 862 863 while (current != null && current.input == null) 864 current = current.next; 865 return current == null ? this : current; 866 } 867 868 /** 869 * Returns the public ID of this input source, if known 870 */ 871 public String getPublicId() { 872 873 InputEntity where = getTopEntity(); 874 if (where == this) 875 return input.getPublicId(); 876 return where.getPublicId(); 877 } 878 879 /** 880 * Returns the system ID of this input source, if known 881 */ 882 public String getSystemId() { 883 884 InputEntity where = getTopEntity(); 885 if (where == this) 886 return input.getSystemId(); 887 return where.getSystemId(); 888 } 889 890 /** 891 * Returns the current line number in this input source 892 */ 893 public int getLineNumber() { 894 895 InputEntity where = getTopEntity(); 896 if (where == this) 897 return lineNumber; 898 return where.getLineNumber(); 899 } 900 901 /** 902 * returns -1; maintaining column numbers hurts performance 903 */ 904 public int getColumnNumber() { 905 906 return -1; // not maintained (speed) 907 } 908 909 910 // 911 // n.b. for non-EOF end-of-buffer cases, reader should return 912 // at least a handful of bytes so various lookaheads behave. 913 // 914 // two character pushback exists except at first; characters 915 // represented by surrogate pairs can't be pushed back (they'd 916 // only be in character data anyway). 917 // 918 // DTD exception thrown on char conversion problems; line number 919 // will be low, as a rule. 920 // 921 private void fillbuf() throws IOException, SAXException { 922 923 // don't touched fixed buffers, that'll usually 924 // change entity values (and isn't needed anyway) 925 // likewise, ignore closed streams 926 if (reader == null || isClosed) 927 return; 928 929 // if remembering DTD text, copy! 930 if (startRemember != 0) { 931 if (rememberedText == null) 932 rememberedText = new StringBuffer(buf.length); 933 rememberedText.append(buf, startRemember, 934 start - startRemember); 935 } 936 937 boolean extra = (finish > 0) && (start > 0); 938 int len; 939 940 if (extra) // extra pushback 941 start--; 942 len = finish - start; 943 944 System.arraycopy(buf, start, buf, 0, len); 945 start = 0; 946 finish = len; 947 948 try { 949 len = buf.length - len; 950 len = reader.read(buf, finish, len); 951 } catch (UnsupportedEncodingException e) { 952 fatal("P-075", new Object[]{e.getMessage()}); 953 } catch (CharConversionException e) { 954 fatal("P-076", new Object[]{e.getMessage()}); 955 } 956 if (len >= 0) 957 finish += len; 958 else 959 close(); 960 if (extra) // extra pushback 961 start++; 962 963 if (startRemember != 0) 964 // assert extra == true 965 startRemember = 1; 966 } 967 968 public void close() { 969 970 try { 971 if (reader != null && !isClosed) 972 reader.close(); 973 isClosed = true; 974 } catch (IOException e) { 975 /* NOTHING */ 976 } 977 } 978 979 980 private void fatal(String messageId, Object params []) 981 throws SAXException { 982 983 SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null); 984 985 // not continuable ... e.g. WF errors 986 close(); 987 errHandler.fatalError(x); 988 throw x; 989 } 990 }