1 /*
   2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.dtdparser;
  27 
  28 import org.xml.sax.InputSource;
  29 import org.xml.sax.SAXException;
  30 import org.xml.sax.SAXParseException;
  31 
  32 import java.io.CharConversionException;
  33 import java.io.IOException;
  34 import java.io.InputStream;
  35 import java.io.InputStreamReader;
  36 import java.io.Reader;
  37 import java.io.UnsupportedEncodingException;
  38 import java.net.URL;
  39 import java.util.Locale;
  40 
  41 /**
  42  * This is how the parser talks to its input entities, of all kinds.
  43  * The entities are in a stack.
  44  * <p/>
  45  * <P> For internal entities, the character arrays are referenced here,
  46  * and read from as needed (they're read-only).  External entities have
  47  * mutable buffers, that are read into as needed.
  48  * <p/>
  49  * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
  50  * whether it's in an external (parsed) entity or not.  The XML 1.0 spec
  51  * is inconsistent in explaining EOL handling; this is the sensible way.
  52  *
  53  * @author David Brownell
  54  * @author Janet Koenig
  55  * @version 1.4 00/08/05
  56  */
  57 public class InputEntity {
  58     private int start, finish;
  59     private char buf [];
  60     private int lineNumber = 1;
  61     private boolean returnedFirstHalf = false;
  62     private boolean maybeInCRLF = false;
  63 
  64     // name of entity (never main document or unnamed DTD PE)
  65     private String name;
  66 
  67     private InputEntity next;
  68 
  69     // for system and public IDs in diagnostics
  70     private InputSource input;
  71 
  72     // this is a buffer; some buffers can be replenished.
  73     private Reader reader;
  74     private boolean isClosed;
  75 
  76     private DTDEventListener errHandler;
  77     private Locale locale;
  78 
  79     private StringBuffer rememberedText;
  80     private int startRemember;
  81 
  82     // record if this is a PE, so endParsedEntity won't be called
  83     private boolean isPE;
  84 
  85     // InputStreamReader throws an internal per-read exception, so
  86     // we minimize reads.  We also add a byte to compensate for the
  87     // "ungetc" byte we keep, so that our downstream reads are as
  88     // nicely sized as we can make them.
  89     final private static int BUFSIZ = 8 * 1024 + 1;
  90 
  91     final private static char newline [] = {'\n'};
  92 
  93     public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
  94         InputEntity retval = new InputEntity();
  95         retval.errHandler = h;
  96         retval.locale = l;
  97         return retval;
  98     }
  99 
 100     private InputEntity() {
 101     }
 102 
 103     //
 104     // predicate:  return true iff this is an internal entity reader,
 105     // and so may safely be "popped" as needed.  external entities have
 106     // syntax to uphold; internal parameter entities have at most validity
 107     // constraints to monitor.  also, only external entities get decent
 108     // location diagnostics.
 109     //
 110     public boolean isInternal() {
 111         return reader == null;
 112     }
 113 
 114     //
 115     // predicate:  return true iff this is the toplevel document
 116     //
 117     public boolean isDocument() {
 118         return next == null;
 119     }
 120 
 121     //
 122     // predicate:  return true iff this is a PE expansion (so that
 123     // LexicalEventListner.endParsedEntity won't be called)
 124     //
 125     public boolean isParameterEntity() {
 126         return isPE;
 127     }
 128 
 129     //
 130     // return name of current entity
 131     //
 132     public String getName() {
 133         return name;
 134     }
 135 
 136     //
 137     // use this for an external parsed entity
 138     //
 139     public void init(InputSource in, String name, InputEntity stack,
 140                      boolean isPE)
 141             throws IOException, SAXException {
 142 
 143         input = in;
 144         this.isPE = isPE;
 145         reader = in.getCharacterStream();
 146 
 147         if (reader == null) {
 148             InputStream bytes = in.getByteStream();
 149 
 150             if (bytes == null)
 151                 reader = XmlReader.createReader(new URL(in.getSystemId())
 152                         .openStream());
 153             else if (in.getEncoding() != null)
 154                 reader = XmlReader.createReader(in.getByteStream(),
 155                         in.getEncoding());
 156             else
 157                 reader = XmlReader.createReader(in.getByteStream());
 158         }
 159         next = stack;
 160         buf = new char[BUFSIZ];
 161         this.name = name;
 162         checkRecursion(stack);
 163     }
 164 
 165     //
 166     // use this for an internal parsed entity; buffer is readonly
 167     //
 168     public void init(char b [], String name, InputEntity stack, boolean isPE)
 169             throws SAXException {
 170 
 171         next = stack;
 172         buf = b;
 173         finish = b.length;
 174         this.name = name;
 175         this.isPE = isPE;
 176         checkRecursion(stack);
 177     }
 178 
 179     private void checkRecursion(InputEntity stack)
 180             throws SAXException {
 181 
 182         if (stack == null)
 183             return;
 184         for (stack = stack.next; stack != null; stack = stack.next) {
 185             if (stack.name != null && stack.name.equals(name))
 186                 fatal("P-069", new Object[]{name});
 187         }
 188     }
 189 
 190     public InputEntity pop() throws IOException {
 191 
 192         // caller has ensured there's nothing left to read
 193         close();
 194         return next;
 195     }
 196 
 197     /**
 198      * returns true iff there's no more data to consume ...
 199      */
 200     public boolean isEOF() throws IOException, SAXException {
 201 
 202         // called to ensure WF-ness of included entities and to pop
 203         // input entities appropriately ... EOF is not always legal.
 204         if (start >= finish) {
 205             fillbuf();
 206             return start >= finish;
 207         } else
 208             return false;
 209     }
 210 
 211     /**
 212      * Returns the name of the encoding in use, else null; the name
 213      * returned is in as standard a form as we can get.
 214      */
 215     public String getEncoding() {
 216 
 217         if (reader == null)
 218             return null;
 219         if (reader instanceof XmlReader)
 220             return ((XmlReader) reader).getEncoding();
 221 
 222         // XXX prefer a java2std() call to normalize names...
 223 
 224         if (reader instanceof InputStreamReader)
 225             return ((InputStreamReader) reader).getEncoding();
 226         return null;
 227     }
 228 
 229 
 230     /**
 231      * returns the next name char, or NUL ... faster than getc(),
 232      * and the common "name or nmtoken must be next" case won't
 233      * need ungetc().
 234      */
 235     public char getNameChar() throws IOException, SAXException {
 236 
 237         if (finish <= start)
 238             fillbuf();
 239         if (finish > start) {
 240             char c = buf[start++];
 241             if (XmlChars.isNameChar(c))
 242                 return c;
 243             start--;
 244         }
 245         return 0;
 246     }
 247 
 248     /**
 249      * gets the next Java character -- might be part of an XML
 250      * text character represented by a surrogate pair, or be
 251      * the end of the entity.
 252      */
 253     public char getc() throws IOException, SAXException {
 254 
 255         if (finish <= start)
 256             fillbuf();
 257         if (finish > start) {
 258             char c = buf[start++];
 259 
 260             // [2] Char ::= #x0009 | #x000A | #x000D
 261             //            | [#x0020-#xD7FF]
 262             //            | [#xE000-#xFFFD]
 263             // plus surrogate _pairs_ representing [#x10000-#x10ffff]
 264             if (returnedFirstHalf) {
 265                 if (c >= 0xdc00 && c <= 0xdfff) {
 266                     returnedFirstHalf = false;
 267                     return c;
 268                 } else
 269                     fatal("P-070", new Object[]{Integer.toHexString(c)});
 270             }
 271             if ((c >= 0x0020 && c <= 0xD7FF)
 272                     || c == 0x0009
 273                     // no surrogates!
 274                     || (c >= 0xE000 && c <= 0xFFFD))
 275                 return c;
 276 
 277             //
 278             // CRLF and CR are both line ends; map both to LF, and
 279             // keep line count correct.
 280             //
 281             else if (c == '\r' && !isInternal()) {
 282                 maybeInCRLF = true;
 283                 c = getc();
 284                 if (c != '\n')
 285                     ungetc();
 286                 maybeInCRLF = false;
 287 
 288                 lineNumber++;
 289                 return '\n';
 290 
 291             } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
 292                 if (!isInternal() && !maybeInCRLF)
 293                     lineNumber++;
 294                 return c;
 295             }
 296 
 297             // surrogates...
 298             if (c >= 0xd800 && c < 0xdc00) {
 299                 returnedFirstHalf = true;
 300                 return c;
 301             }
 302 
 303             fatal("P-071", new Object[]{Integer.toHexString(c)});
 304         }
 305         throw new EndOfInputException();
 306     }
 307 
 308 
 309     /**
 310      * lookahead one character
 311      */
 312     public boolean peekc(char c) throws IOException, SAXException {
 313 
 314         if (finish <= start)
 315             fillbuf();
 316         if (finish > start) {
 317             if (buf[start] == c) {
 318                 start++;
 319                 return true;
 320             } else
 321                 return false;
 322         }
 323         return false;
 324     }
 325 
 326 
 327     /**
 328      * two character pushback is guaranteed
 329      */
 330     public void ungetc() {
 331 
 332         if (start == 0)
 333             throw new InternalError("ungetc");
 334         start--;
 335 
 336         if (buf[start] == '\n' || buf[start] == '\r') {
 337             if (!isInternal())
 338                 lineNumber--;
 339         } else if (returnedFirstHalf)
 340             returnedFirstHalf = false;
 341     }
 342 
 343 
 344     /**
 345      * optional grammatical whitespace (discarded)
 346      */
 347     public boolean maybeWhitespace()
 348             throws IOException, SAXException {
 349 
 350         char c;
 351         boolean isSpace = false;
 352         boolean sawCR = false;
 353 
 354         // [3] S ::= #20 | #09 | #0D | #0A
 355         for (; ;) {
 356             if (finish <= start)
 357                 fillbuf();
 358             if (finish <= start)
 359                 return isSpace;
 360 
 361             c = buf[start++];
 362             if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
 363                 isSpace = true;
 364 
 365                 //
 366                 // CR, LF are line endings ... CLRF is one, not two!
 367                 //
 368                 if ((c == '\n' || c == '\r') && !isInternal()) {
 369                     if (!(c == '\n' && sawCR)) {
 370                         lineNumber++;
 371                         sawCR = false;
 372                     }
 373                     if (c == '\r')
 374                         sawCR = true;
 375                 }
 376             } else {
 377                 start--;
 378                 return isSpace;
 379             }
 380         }
 381     }
 382 
 383 
 384     /**
 385      * normal content; whitespace in markup may be handled
 386      * specially if the parser uses the content model.
 387      * <p/>
 388      * <P> content terminates with markup delimiter characters,
 389      * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
 390      * <p/>
 391      * <P> the document handler's characters() method is called
 392      * on all the content found
 393      */
 394     public boolean parsedContent(DTDEventListener docHandler
 395                                  /*ElementValidator validator*/)
 396             throws IOException, SAXException {
 397 
 398         // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
 399 
 400         int first;        // first char to return
 401         int last;        // last char to return
 402         boolean sawContent;    // sent any chars?
 403         char c;
 404 
 405         // deliver right out of the buffer, until delimiter, EOF,
 406         // or error, refilling as we go
 407         for (first = last = start, sawContent = false; ; last++) {
 408 
 409             // buffer empty?
 410             if (last >= finish) {
 411                 if (last > first) {
 412 //            validator.text ();
 413                     docHandler.characters(buf, first, last - first);
 414                     sawContent = true;
 415                     start = last;
 416                 }
 417                 if (isEOF())    // calls fillbuf
 418                     return sawContent;
 419                 first = start;
 420                 last = first - 1;    // incremented in loop
 421                 continue;
 422             }
 423 
 424             c = buf[last];
 425 
 426             //
 427             // pass most chars through ASAP; this inlines the code of
 428             // [2] !XmlChars.isChar(c) leaving only characters needing
 429             // special treatment ... line ends, surrogates, and:
 430             //    0x0026 == '&'
 431             //    0x003C == '<'
 432             //    0x005D == ']'
 433             // Comparisons ordered for speed on 'typical' text
 434             //
 435             if ((c > 0x005D && c <= 0xD7FF)    // a-z and more
 436                     || (c < 0x0026 && c >= 0x0020)    // space & punct
 437                     || (c > 0x003C && c < 0x005D)    // A-Z & punct
 438                     || (c > 0x0026 && c < 0x003C)    // 0-9 & punct
 439                     || c == 0x0009
 440                     || (c >= 0xE000 && c <= 0xFFFD)
 441             )
 442                 continue;
 443 
 444             // terminate on markup delimiters
 445             if (c == '<' || c == '&')
 446                 break;
 447 
 448             // count lines
 449             if (c == '\n') {
 450                 if (!isInternal())
 451                     lineNumber++;
 452                 continue;
 453             }
 454 
 455             // External entities get CR, CRLF --> LF mapping
 456             // Internal ones got it already, and we can't repeat
 457             // else we break char ref handling!!
 458             if (c == '\r') {
 459                 if (isInternal())
 460                     continue;
 461 
 462                 docHandler.characters(buf, first, last - first);
 463                 docHandler.characters(newline, 0, 1);
 464                 sawContent = true;
 465                 lineNumber++;
 466                 if (finish > (last + 1)) {
 467                     if (buf[last + 1] == '\n')
 468                         last++;
 469                 } else {    // CR at end of buffer
 470 // XXX case not yet handled:  CRLF here will look like two lines
 471                 }
 472                 first = start = last + 1;
 473                 continue;
 474             }
 475 
 476             // ']]>' is a WF error -- must fail if we see it
 477             if (c == ']') {
 478                 switch (finish - last) {
 479                 // for suspicious end-of-buffer cases, get more data
 480                 // into the buffer to rule out this sequence.
 481                 case 2:
 482                     if (buf[last + 1] != ']')
 483                         continue;
 484                     // FALLTHROUGH
 485 
 486                 case 1:
 487                     if (reader == null || isClosed)
 488                         continue;
 489                     if (last == first)
 490                         throw new InternalError("fillbuf");
 491                     last--;
 492                     if (last > first) {
 493 //            validator.text ();
 494                         docHandler.characters(buf, first, last - first);
 495                         sawContent = true;
 496                         start = last;
 497                     }
 498                     fillbuf();
 499                     first = last = start;
 500                     continue;
 501 
 502                     // otherwise any "]]>" would be buffered, and we can
 503                     // see right away if that's what we have
 504                 default:
 505                     if (buf[last + 1] == ']' && buf[last + 2] == '>')
 506                         fatal("P-072", null);
 507                     continue;
 508                 }
 509             }
 510 
 511             // correctly paired surrogates are OK
 512             if (c >= 0xd800 && c <= 0xdfff) {
 513                 if ((last + 1) >= finish) {
 514                     if (last > first) {
 515 //            validator.text ();
 516                         docHandler.characters(buf, first, last - first);
 517                         sawContent = true;
 518                         start = last + 1;
 519                     }
 520                     if (isEOF()) {    // calls fillbuf
 521                         fatal("P-081",
 522                                 new Object[]{Integer.toHexString(c)});
 523                     }
 524                     first = start;
 525                     last = first;
 526                     continue;
 527                 }
 528                 if (checkSurrogatePair(last))
 529                     last++;
 530                 else {
 531                     last--;
 532                     // also terminate on surrogate pair oddities
 533                     break;
 534                 }
 535                 continue;
 536             }
 537 
 538             fatal("P-071", new Object[]{Integer.toHexString(c)});
 539         }
 540         if (last == first)
 541             return sawContent;
 542 //    validator.text ();
 543         docHandler.characters(buf, first, last - first);
 544         start = last;
 545         return true;
 546     }
 547 
 548 
 549     /**
 550      * CDATA -- character data, terminated by "]]>" and optionally
 551      * including unescaped markup delimiters (ampersand and left angle
 552      * bracket).  This should otherwise be exactly like character data,
 553      * modulo differences in error report details.
 554      * <p/>
 555      * <P> The document handler's characters() or ignorableWhitespace()
 556      * methods are invoked on all the character data found
 557      *
 558      * @param docHandler               gets callbacks for character data
 559      * @param ignorableWhitespace      if true, whitespace characters will
 560      *                                 be reported using docHandler.ignorableWhitespace(); implicitly,
 561      *                                 non-whitespace characters will cause validation errors
 562      * @param whitespaceInvalidMessage if true, ignorable whitespace
 563      *                                 causes a validity error report as well as a callback
 564      */
 565     public boolean unparsedContent(DTDEventListener docHandler,
 566                                    /*ElementValidator validator,*/
 567                                    boolean ignorableWhitespace,
 568                                    String whitespaceInvalidMessage)
 569             throws IOException, SAXException {
 570 
 571         // [18] CDSect ::= CDStart CData CDEnd
 572         // [19] CDStart ::= '<![CDATA['
 573         // [20] CData ::= (Char* - (Char* ']]>' Char*))
 574         // [21] CDEnd ::= ']]>'
 575 
 576         // caller peeked the leading '<' ...
 577         if (!peek("![CDATA[", null))
 578             return false;
 579         docHandler.startCDATA();
 580 
 581         // only a literal ']]>' stops this ...
 582         int last;
 583 
 584         for (; ;) {        // until ']]>' seen
 585             boolean done = false;
 586             char c;
 587 
 588             // don't report ignorable whitespace as "text" for
 589             // validation purposes.
 590             boolean white = ignorableWhitespace;
 591 
 592             for (last = start; last < finish; last++) {
 593                 c = buf[last];
 594 
 595                 //
 596                 // Reject illegal characters.
 597                 //
 598                 if (!XmlChars.isChar(c)) {
 599                     white = false;
 600                     if (c >= 0xd800 && c <= 0xdfff) {
 601                         if (checkSurrogatePair(last)) {
 602                             last++;
 603                             continue;
 604                         } else {
 605                             last--;
 606                             break;
 607                         }
 608                     }
 609                     fatal("P-071", new Object[]
 610                     {Integer.toHexString(buf[last])});
 611                 }
 612                 if (c == '\n') {
 613                     if (!isInternal())
 614                         lineNumber++;
 615                     continue;
 616                 }
 617                 if (c == '\r') {
 618                     // As above, we can't repeat CR/CRLF --> LF mapping
 619                     if (isInternal())
 620                         continue;
 621 
 622                     if (white) {
 623                         if (whitespaceInvalidMessage != null)
 624                             errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
 625                                     whitespaceInvalidMessage), null));
 626                         docHandler.ignorableWhitespace(buf, start,
 627                                 last - start);
 628                         docHandler.ignorableWhitespace(newline, 0, 1);
 629                     } else {
 630 //            validator.text ();
 631                         docHandler.characters(buf, start, last - start);
 632                         docHandler.characters(newline, 0, 1);
 633                     }
 634                     lineNumber++;
 635                     if (finish > (last + 1)) {
 636                         if (buf[last + 1] == '\n')
 637                             last++;
 638                     } else {    // CR at end of buffer
 639 // XXX case not yet handled ... as above
 640                     }
 641                     start = last + 1;
 642                     continue;
 643                 }
 644                 if (c != ']') {
 645                     if (c != ' ' && c != '\t')
 646                         white = false;
 647                     continue;
 648                 }
 649                 if ((last + 2) < finish) {
 650                     if (buf[last + 1] == ']' && buf[last + 2] == '>') {
 651                         done = true;
 652                         break;
 653                     }
 654                     white = false;
 655                     continue;
 656                 } else {
 657                     //last--;
 658                     break;
 659                 }
 660             }
 661             if (white) {
 662                 if (whitespaceInvalidMessage != null)
 663                     errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
 664                             whitespaceInvalidMessage), null));
 665                 docHandler.ignorableWhitespace(buf, start, last - start);
 666             } else {
 667 //        validator.text ();
 668                 docHandler.characters(buf, start, last - start);
 669             }
 670             if (done) {
 671                 start = last + 3;
 672                 break;
 673             }
 674             start = last;
 675             if (isEOF())
 676                 fatal("P-073", null);
 677         }
 678         docHandler.endCDATA();
 679         return true;
 680     }
 681 
 682     // return false to backstep at end of buffer)
 683     private boolean checkSurrogatePair(int offset)
 684             throws SAXException {
 685 
 686         if ((offset + 1) >= finish)
 687             return false;
 688 
 689         char c1 = buf[offset++];
 690         char c2 = buf[offset];
 691 
 692         if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
 693             return true;
 694         fatal("P-074", new Object[]{
 695             Integer.toHexString(c1 & 0x0ffff),
 696             Integer.toHexString(c2 & 0x0ffff)
 697         });
 698         return false;
 699     }
 700 
 701 
 702     /**
 703      * whitespace in markup (flagged to app, discardable)
 704      * <p/>
 705      * <P> the document handler's ignorableWhitespace() method
 706      * is called on all the whitespace found
 707      */
 708     public boolean ignorableWhitespace(DTDEventListener handler)
 709             throws IOException, SAXException {
 710 
 711         char c;
 712         boolean isSpace = false;
 713         int first;
 714 
 715         // [3] S ::= #20 | #09 | #0D | #0A
 716         for (first = start; ;) {
 717             if (finish <= start) {
 718                 if (isSpace)
 719                     handler.ignorableWhitespace(buf, first, start - first);
 720                 fillbuf();
 721                 first = start;
 722             }
 723             if (finish <= start)
 724                 return isSpace;
 725 
 726             c = buf[start++];
 727             switch (c) {
 728             case '\n':
 729                 if (!isInternal())
 730                     lineNumber++;
 731 // XXX handles Macintosh line endings wrong
 732                 // fallthrough
 733             case 0x09:
 734             case 0x20:
 735                 isSpace = true;
 736                 continue;
 737 
 738             case '\r':
 739                 isSpace = true;
 740                 if (!isInternal())
 741                     lineNumber++;
 742                 handler.ignorableWhitespace(buf, first,
 743                         (start - 1) - first);
 744                 handler.ignorableWhitespace(newline, 0, 1);
 745                 if (start < finish && buf[start] == '\n')
 746                     ++start;
 747                 first = start;
 748                 continue;
 749 
 750             default:
 751                 ungetc();
 752                 if (isSpace)
 753                     handler.ignorableWhitespace(buf, first, start - first);
 754                 return isSpace;
 755             }
 756         }
 757     }
 758 
 759     /**
 760      * returns false iff 'next' string isn't as provided,
 761      * else skips that text and returns true.
 762      * <p/>
 763      * <P> NOTE:  two alternative string representations are
 764      * both passed in, since one is faster.
 765      */
 766     public boolean peek(String next, char chars [])
 767             throws IOException, SAXException {
 768 
 769         int len;
 770         int i;
 771 
 772         if (chars != null)
 773             len = chars.length;
 774         else
 775             len = next.length();
 776 
 777         // buffer should hold the whole thing ... give it a
 778         // chance for the end-of-buffer case and cope with EOF
 779         // by letting fillbuf compact and fill
 780         if (finish <= start || (finish - start) < len)
 781             fillbuf();
 782 
 783         // can't peek past EOF
 784         if (finish <= start)
 785             return false;
 786 
 787         // compare the string; consume iff it matches
 788         if (chars != null) {
 789             for (i = 0; i < len && (start + i) < finish; i++) {
 790                 if (buf[start + i] != chars[i])
 791                     return false;
 792             }
 793         } else {
 794             for (i = 0; i < len && (start + i) < finish; i++) {
 795                 if (buf[start + i] != next.charAt(i))
 796                     return false;
 797             }
 798         }
 799 
 800         // if the first fillbuf didn't get enough data, give
 801         // fillbuf another chance to read
 802         if (i < len) {
 803             if (reader == null || isClosed)
 804                 return false;
 805 
 806             //
 807             // This diagnostic "knows" that the only way big strings would
 808             // fail to be peeked is where it's a symbol ... e.g. for an
 809             // </EndTag> construct.  That knowledge could also be applied
 810             // to get rid of the symbol length constraint, since having
 811             // the wrong symbol is a fatal error anyway ...
 812             //
 813             if (len > buf.length)
 814                 fatal("P-077", new Object[]{new Integer(buf.length)});
 815 
 816             fillbuf();
 817             return peek(next, chars);
 818         }
 819 
 820         start += len;
 821         return true;
 822     }
 823 
 824 
 825     //
 826     // Support for reporting the internal DTD subset, so <!DOCTYPE...>
 827     // declarations can be recreated.  This is collected as a single
 828     // string; such subsets are normally small, and many applications
 829     // don't even care about this.
 830     //
 831     public void startRemembering() {
 832 
 833         if (startRemember != 0)
 834             throw new InternalError();
 835         startRemember = start;
 836     }
 837 
 838     public String rememberText() {
 839 
 840         String retval;
 841 
 842         // If the internal subset crossed a buffer boundary, we
 843         // created a temporary buffer.
 844         if (rememberedText != null) {
 845             rememberedText.append(buf, startRemember,
 846                     start - startRemember);
 847             retval = rememberedText.toString();
 848         } else
 849             retval = new String(buf, startRemember,
 850                     start - startRemember);
 851 
 852         startRemember = 0;
 853         rememberedText = null;
 854         return retval;
 855     }
 856 
 857     private InputEntity getTopEntity() {
 858 
 859         InputEntity current = this;
 860 
 861         // don't report locations within internal entities!
 862 
 863         while (current != null && current.input == null)
 864             current = current.next;
 865         return current == null ? this : current;
 866     }
 867 
 868     /**
 869      * Returns the public ID of this input source, if known
 870      */
 871     public String getPublicId() {
 872 
 873         InputEntity where = getTopEntity();
 874         if (where == this)
 875             return input.getPublicId();
 876         return where.getPublicId();
 877     }
 878 
 879     /**
 880      * Returns the system ID of this input source, if known
 881      */
 882     public String getSystemId() {
 883 
 884         InputEntity where = getTopEntity();
 885         if (where == this)
 886             return input.getSystemId();
 887         return where.getSystemId();
 888     }
 889 
 890     /**
 891      * Returns the current line number in this input source
 892      */
 893     public int getLineNumber() {
 894 
 895         InputEntity where = getTopEntity();
 896         if (where == this)
 897             return lineNumber;
 898         return where.getLineNumber();
 899     }
 900 
 901     /**
 902      * returns -1; maintaining column numbers hurts performance
 903      */
 904     public int getColumnNumber() {
 905 
 906         return -1;        // not maintained (speed)
 907     }
 908 
 909 
 910     //
 911     // n.b. for non-EOF end-of-buffer cases, reader should return
 912     // at least a handful of bytes so various lookaheads behave.
 913     //
 914     // two character pushback exists except at first; characters
 915     // represented by surrogate pairs can't be pushed back (they'd
 916     // only be in character data anyway).
 917     //
 918     // DTD exception thrown on char conversion problems; line number
 919     // will be low, as a rule.
 920     //
 921     private void fillbuf() throws IOException, SAXException {
 922 
 923         // don't touched fixed buffers, that'll usually
 924         // change entity values (and isn't needed anyway)
 925         // likewise, ignore closed streams
 926         if (reader == null || isClosed)
 927             return;
 928 
 929         // if remembering DTD text, copy!
 930         if (startRemember != 0) {
 931             if (rememberedText == null)
 932                 rememberedText = new StringBuffer(buf.length);
 933             rememberedText.append(buf, startRemember,
 934                     start - startRemember);
 935         }
 936 
 937         boolean extra = (finish > 0) && (start > 0);
 938         int len;
 939 
 940         if (extra)        // extra pushback
 941             start--;
 942         len = finish - start;
 943 
 944         System.arraycopy(buf, start, buf, 0, len);
 945         start = 0;
 946         finish = len;
 947 
 948         try {
 949             len = buf.length - len;
 950             len = reader.read(buf, finish, len);
 951         } catch (UnsupportedEncodingException e) {
 952             fatal("P-075", new Object[]{e.getMessage()});
 953         } catch (CharConversionException e) {
 954             fatal("P-076", new Object[]{e.getMessage()});
 955         }
 956         if (len >= 0)
 957             finish += len;
 958         else
 959             close();
 960         if (extra)        // extra pushback
 961             start++;
 962 
 963         if (startRemember != 0)
 964         // assert extra == true
 965             startRemember = 1;
 966     }
 967 
 968     public void close() {
 969 
 970         try {
 971             if (reader != null && !isClosed)
 972                 reader.close();
 973             isClosed = true;
 974         } catch (IOException e) {
 975             /* NOTHING */
 976         }
 977     }
 978 
 979 
 980     private void fatal(String messageId, Object params [])
 981             throws SAXException {
 982 
 983         SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
 984 
 985         // not continuable ... e.g. WF errors
 986         close();
 987         errHandler.fatalError(x);
 988         throw x;
 989     }
 990 }