1 /*
   2  * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.dtdparser;
  27 
  28 import org.xml.sax.InputSource;
  29 import org.xml.sax.SAXException;
  30 import org.xml.sax.SAXParseException;
  31 
  32 import java.io.CharConversionException;
  33 import java.io.IOException;
  34 import java.io.InputStream;
  35 import java.io.InputStreamReader;
  36 import java.io.Reader;
  37 import java.io.UnsupportedEncodingException;
  38 import java.net.URL;
  39 import java.util.Arrays;
  40 import java.util.Locale;
  41 
  42 /**
  43  * This is how the parser talks to its input entities, of all kinds.
  44  * The entities are in a stack.
  45  * <p>
  46  * <P> For internal entities, the character arrays are referenced here,
  47  * and read from as needed (they're read-only).  External entities have
  48  * mutable buffers, that are read into as needed.
  49  * <p>
  50  * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
  51  * whether it's in an external (parsed) entity or not.  The XML 1.0 spec
  52  * is inconsistent in explaining EOL handling; this is the sensible way.
  53  *
  54  * @author David Brownell
  55  * @author Janet Koenig
  56  * @version 1.4 00/08/05
  57  */
  58 public class InputEntity {
  59     private int start, finish;
  60     private char buf [];
  61     private int lineNumber = 1;
  62     private boolean returnedFirstHalf = false;
  63     private boolean maybeInCRLF = false;
  64 
  65     // name of entity (never main document or unnamed DTD PE)
  66     private String name;
  67 
  68     private InputEntity next;
  69 
  70     // for system and public IDs in diagnostics
  71     private InputSource input;
  72 
  73     // this is a buffer; some buffers can be replenished.
  74     private Reader reader;
  75     private boolean isClosed;
  76 
  77     private DTDEventListener errHandler;
  78     private Locale locale;
  79 
  80     private StringBuffer rememberedText;
  81     private int startRemember;
  82 
  83     // record if this is a PE, so endParsedEntity won't be called
  84     private boolean isPE;
  85 
  86     // InputStreamReader throws an internal per-read exception, so
  87     // we minimize reads.  We also add a byte to compensate for the
  88     // "ungetc" byte we keep, so that our downstream reads are as
  89     // nicely sized as we can make them.
  90     final private static int BUFSIZ = 8 * 1024 + 1;
  91 
  92     final private static char newline [] = {'\n'};
  93 
  94     public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
  95         InputEntity retval = new InputEntity();
  96         retval.errHandler = h;
  97         retval.locale = l;
  98         return retval;
  99     }
 100 
 101     private InputEntity() {
 102     }
 103 
 104     //
 105     // predicate:  return true iff this is an internal entity reader,
 106     // and so may safely be "popped" as needed.  external entities have
 107     // syntax to uphold; internal parameter entities have at most validity
 108     // constraints to monitor.  also, only external entities get decent
 109     // location diagnostics.
 110     //
 111     public boolean isInternal() {
 112         return reader == null;
 113     }
 114 
 115     //
 116     // predicate:  return true iff this is the toplevel document
 117     //
 118     public boolean isDocument() {
 119         return next == null;
 120     }
 121 
 122     //
 123     // predicate:  return true iff this is a PE expansion (so that
 124     // LexicalEventListner.endParsedEntity won't be called)
 125     //
 126     public boolean isParameterEntity() {
 127         return isPE;
 128     }
 129 
 130     //
 131     // return name of current entity
 132     //
 133     public String getName() {
 134         return name;
 135     }
 136 
 137     //
 138     // use this for an external parsed entity
 139     //
 140     public void init(InputSource in, String name, InputEntity stack,
 141                      boolean isPE)
 142             throws IOException, SAXException {
 143 
 144         input = in;
 145         this.isPE = isPE;
 146         reader = in.getCharacterStream();
 147 
 148         if (reader == null) {
 149             InputStream bytes = in.getByteStream();
 150 
 151             if (bytes == null)
 152                 if (Boolean.valueOf(System.getProperty("enableExternalEntityProcessing")))
 153                     reader = XmlReader.createReader(new URL(in.getSystemId()).openStream());
 154                 else
 155                     fatal("P-082", new Object[] {in.getSystemId()});
 156             else if (in.getEncoding() != null)
 157                 reader = XmlReader.createReader(in.getByteStream(), in.getEncoding());
 158             else
 159                 reader = XmlReader.createReader(in.getByteStream());
 160         }
 161         next = stack;
 162         buf = new char[BUFSIZ];
 163         this.name = name;
 164         checkRecursion(stack);
 165     }
 166 
 167     //
 168     // use this for an internal parsed entity; buffer is readonly
 169     //
 170     public void init(char b [], String name, InputEntity stack, boolean isPE)
 171             throws SAXException {
 172 
 173         next = stack;
 174         buf = Arrays.copyOf(b, b.length);
 175         finish = b.length;
 176         this.name = name;
 177         this.isPE = isPE;
 178         checkRecursion(stack);
 179     }
 180 
 181     private void checkRecursion(InputEntity stack)
 182             throws SAXException {
 183 
 184         if (stack == null)
 185             return;
 186         for (stack = stack.next; stack != null; stack = stack.next) {
 187             if (stack.name != null && stack.name.equals(name))
 188                 fatal("P-069", new Object[]{name});
 189         }
 190     }
 191 
 192     public InputEntity pop() throws IOException {
 193 
 194         // caller has ensured there's nothing left to read
 195         close();
 196         return next;
 197     }
 198 
 199     /**
 200      * returns true iff there's no more data to consume ...
 201      */
 202     public boolean isEOF() throws IOException, SAXException {
 203 
 204         // called to ensure WF-ness of included entities and to pop
 205         // input entities appropriately ... EOF is not always legal.
 206         if (start >= finish) {
 207             fillbuf();
 208             return start >= finish;
 209         } else
 210             return false;
 211     }
 212 
 213     /**
 214      * Returns the name of the encoding in use, else null; the name
 215      * returned is in as standard a form as we can get.
 216      */
 217     public String getEncoding() {
 218 
 219         if (reader == null)
 220             return null;
 221         if (reader instanceof XmlReader)
 222             return ((XmlReader) reader).getEncoding();
 223 
 224         // XXX prefer a java2std() call to normalize names...
 225 
 226         if (reader instanceof InputStreamReader)
 227             return ((InputStreamReader) reader).getEncoding();
 228         return null;
 229     }
 230 
 231 
 232     /**
 233      * returns the next name char, or NUL ... faster than getc(),
 234      * and the common "name or nmtoken must be next" case won't
 235      * need ungetc().
 236      */
 237     public char getNameChar() throws IOException, SAXException {
 238 
 239         if (finish <= start)
 240             fillbuf();
 241         if (finish > start) {
 242             char c = buf[start++];
 243             if (XmlChars.isNameChar(c))
 244                 return c;
 245             start--;
 246         }
 247         return 0;
 248     }
 249 
 250     /**
 251      * gets the next Java character -- might be part of an XML
 252      * text character represented by a surrogate pair, or be
 253      * the end of the entity.
 254      */
 255     public char getc() throws IOException, SAXException {
 256 
 257         if (finish <= start)
 258             fillbuf();
 259         if (finish > start) {
 260             char c = buf[start++];
 261 
 262             // [2] Char ::= #x0009 | #x000A | #x000D
 263             //            | [#x0020-#xD7FF]
 264             //            | [#xE000-#xFFFD]
 265             // plus surrogate _pairs_ representing [#x10000-#x10ffff]
 266             if (returnedFirstHalf) {
 267                 if (c >= 0xdc00 && c <= 0xdfff) {
 268                     returnedFirstHalf = false;
 269                     return c;
 270                 } else
 271                     fatal("P-070", new Object[]{Integer.toHexString(c)});
 272             }
 273             if ((c >= 0x0020 && c <= 0xD7FF)
 274                     || c == 0x0009
 275                     // no surrogates!
 276                     || (c >= 0xE000 && c <= 0xFFFD))
 277                 return c;
 278 
 279             //
 280             // CRLF and CR are both line ends; map both to LF, and
 281             // keep line count correct.
 282             //
 283             else if (c == '\r' && !isInternal()) {
 284                 maybeInCRLF = true;
 285                 c = getc();
 286                 if (c != '\n')
 287                     ungetc();
 288                 maybeInCRLF = false;
 289 
 290                 lineNumber++;
 291                 return '\n';
 292 
 293             } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
 294                 if (!isInternal() && !maybeInCRLF)
 295                     lineNumber++;
 296                 return c;
 297             }
 298 
 299             // surrogates...
 300             if (c >= 0xd800 && c < 0xdc00) {
 301                 returnedFirstHalf = true;
 302                 return c;
 303             }
 304 
 305             fatal("P-071", new Object[]{Integer.toHexString(c)});
 306         }
 307         throw new EndOfInputException();
 308     }
 309 
 310 
 311     /**
 312      * lookahead one character
 313      */
 314     public boolean peekc(char c) throws IOException, SAXException {
 315 
 316         if (finish <= start)
 317             fillbuf();
 318         if (finish > start) {
 319             if (buf[start] == c) {
 320                 start++;
 321                 return true;
 322             } else
 323                 return false;
 324         }
 325         return false;
 326     }
 327 
 328 
 329     /**
 330      * two character pushback is guaranteed
 331      */
 332     public void ungetc() {
 333 
 334         if (start == 0)
 335             throw new InternalError("ungetc");
 336         start--;
 337 
 338         if (buf[start] == '\n' || buf[start] == '\r') {
 339             if (!isInternal())
 340                 lineNumber--;
 341         } else if (returnedFirstHalf)
 342             returnedFirstHalf = false;
 343     }
 344 
 345 
 346     /**
 347      * optional grammatical whitespace (discarded)
 348      */
 349     public boolean maybeWhitespace()
 350             throws IOException, SAXException {
 351 
 352         char c;
 353         boolean isSpace = false;
 354         boolean sawCR = false;
 355 
 356         // [3] S ::= #20 | #09 | #0D | #0A
 357         for (; ;) {
 358             if (finish <= start)
 359                 fillbuf();
 360             if (finish <= start)
 361                 return isSpace;
 362 
 363             c = buf[start++];
 364             if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
 365                 isSpace = true;
 366 
 367                 //
 368                 // CR, LF are line endings ... CLRF is one, not two!
 369                 //
 370                 if ((c == '\n' || c == '\r') && !isInternal()) {
 371                     if (!(c == '\n' && sawCR)) {
 372                         lineNumber++;
 373                         sawCR = false;
 374                     }
 375                     if (c == '\r')
 376                         sawCR = true;
 377                 }
 378             } else {
 379                 start--;
 380                 return isSpace;
 381             }
 382         }
 383     }
 384 
 385 
 386     /**
 387      * normal content; whitespace in markup may be handled
 388      * specially if the parser uses the content model.
 389      * <p>
 390      * <P> content terminates with markup delimiter characters,
 391      * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
 392      * <p>
 393      * <P> the document handler's characters() method is called
 394      * on all the content found
 395      */
 396     public boolean parsedContent(DTDEventListener docHandler
 397                                  /*ElementValidator validator*/)
 398             throws IOException, SAXException {
 399 
 400         // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
 401 
 402         int first;        // first char to return
 403         int last;        // last char to return
 404         boolean sawContent;    // sent any chars?
 405         char c;
 406 
 407         // deliver right out of the buffer, until delimiter, EOF,
 408         // or error, refilling as we go
 409         for (first = last = start, sawContent = false; ; last++) {
 410 
 411             // buffer empty?
 412             if (last >= finish) {
 413                 if (last > first) {
 414 //            validator.text ();
 415                     docHandler.characters(buf, first, last - first);
 416                     sawContent = true;
 417                     start = last;
 418                 }
 419                 if (isEOF())    // calls fillbuf
 420                     return sawContent;
 421                 first = start;
 422                 last = first - 1;    // incremented in loop
 423                 continue;
 424             }
 425 
 426             c = buf[last];
 427 
 428             //
 429             // pass most chars through ASAP; this inlines the code of
 430             // [2] !XmlChars.isChar(c) leaving only characters needing
 431             // special treatment ... line ends, surrogates, and:
 432             //    0x0026 == '&'
 433             //    0x003C == '<'
 434             //    0x005D == ']'
 435             // Comparisons ordered for speed on 'typical' text
 436             //
 437             if ((c > 0x005D && c <= 0xD7FF)    // a-z and more
 438                     || (c < 0x0026 && c >= 0x0020)    // space & punct
 439                     || (c > 0x003C && c < 0x005D)    // A-Z & punct
 440                     || (c > 0x0026 && c < 0x003C)    // 0-9 & punct
 441                     || c == 0x0009
 442                     || (c >= 0xE000 && c <= 0xFFFD)
 443             )
 444                 continue;
 445 
 446             // terminate on markup delimiters
 447             if (c == '<' || c == '&')
 448                 break;
 449 
 450             // count lines
 451             if (c == '\n') {
 452                 if (!isInternal())
 453                     lineNumber++;
 454                 continue;
 455             }
 456 
 457             // External entities get CR, CRLF --> LF mapping
 458             // Internal ones got it already, and we can't repeat
 459             // else we break char ref handling!!
 460             if (c == '\r') {
 461                 if (isInternal())
 462                     continue;
 463 
 464                 docHandler.characters(buf, first, last - first);
 465                 docHandler.characters(newline, 0, 1);
 466                 sawContent = true;
 467                 lineNumber++;
 468                 if (finish > (last + 1)) {
 469                     if (buf[last + 1] == '\n')
 470                         last++;
 471                 } else {    // CR at end of buffer
 472 // XXX case not yet handled:  CRLF here will look like two lines
 473                 }
 474                 first = start = last + 1;
 475                 continue;
 476             }
 477 
 478             // ']]>' is a WF error -- must fail if we see it
 479             if (c == ']') {
 480                 switch (finish - last) {
 481                 // for suspicious end-of-buffer cases, get more data
 482                 // into the buffer to rule out this sequence.
 483                 case 2:
 484                     if (buf[last + 1] != ']')
 485                         continue;
 486                     // FALLTHROUGH
 487 
 488                 case 1:
 489                     if (reader == null || isClosed)
 490                         continue;
 491                     if (last == first)
 492                         throw new InternalError("fillbuf");
 493                     last--;
 494                     if (last > first) {
 495 //            validator.text ();
 496                         docHandler.characters(buf, first, last - first);
 497                         sawContent = true;
 498                         start = last;
 499                     }
 500                     fillbuf();
 501                     first = last = start;
 502                     continue;
 503 
 504                     // otherwise any "]]>" would be buffered, and we can
 505                     // see right away if that's what we have
 506                 default:
 507                     if (buf[last + 1] == ']' && buf[last + 2] == '>')
 508                         fatal("P-072", null);
 509                     continue;
 510                 }
 511             }
 512 
 513             // correctly paired surrogates are OK
 514             if (c >= 0xd800 && c <= 0xdfff) {
 515                 if ((last + 1) >= finish) {
 516                     if (last > first) {
 517 //            validator.text ();
 518                         docHandler.characters(buf, first, last - first);
 519                         sawContent = true;
 520                         start = last + 1;
 521                     }
 522                     if (isEOF()) {    // calls fillbuf
 523                         fatal("P-081",
 524                                 new Object[]{Integer.toHexString(c)});
 525                     }
 526                     first = start;
 527                     last = first;
 528                     continue;
 529                 }
 530                 if (checkSurrogatePair(last))
 531                     last++;
 532                 else {
 533                     last--;
 534                     // also terminate on surrogate pair oddities
 535                     break;
 536                 }
 537                 continue;
 538             }
 539 
 540             fatal("P-071", new Object[]{Integer.toHexString(c)});
 541         }
 542         if (last == first)
 543             return sawContent;
 544 //    validator.text ();
 545         docHandler.characters(buf, first, last - first);
 546         start = last;
 547         return true;
 548     }
 549 
 550 
 551     /**
 552      * CDATA -- character data, terminated by {@code "]]>"} and optionally
 553      * including unescaped markup delimiters (ampersand and left angle
 554      * bracket).  This should otherwise be exactly like character data,
 555      * modulo differences in error report details.
 556      * <p>
 557      * <P> The document handler's characters() or ignorableWhitespace()
 558      * methods are invoked on all the character data found
 559      *
 560      * @param docHandler               gets callbacks for character data
 561      * @param ignorableWhitespace      if true, whitespace characters will
 562      *                                 be reported using docHandler.ignorableWhitespace(); implicitly,
 563      *                                 non-whitespace characters will cause validation errors
 564      * @param whitespaceInvalidMessage if true, ignorable whitespace
 565      *                                 causes a validity error report as well as a callback
 566      */
 567     public boolean unparsedContent(DTDEventListener docHandler,
 568                                    /*ElementValidator validator,*/
 569                                    boolean ignorableWhitespace,
 570                                    String whitespaceInvalidMessage)
 571             throws IOException, SAXException {
 572 
 573         // [18] CDSect ::= CDStart CData CDEnd
 574         // [19] CDStart ::= '<![CDATA['
 575         // [20] CData ::= (Char* - (Char* ']]>' Char*))
 576         // [21] CDEnd ::= ']]>'
 577 
 578         // caller peeked the leading '<' ...
 579         if (!peek("![CDATA[", null))
 580             return false;
 581         docHandler.startCDATA();
 582 
 583         // only a literal ']]>' stops this ...
 584         int last;
 585 
 586         for (; ;) {        // until ']]>' seen
 587             boolean done = false;
 588             char c;
 589 
 590             // don't report ignorable whitespace as "text" for
 591             // validation purposes.
 592             boolean white = ignorableWhitespace;
 593 
 594             for (last = start; last < finish; last++) {
 595                 c = buf[last];
 596 
 597                 //
 598                 // Reject illegal characters.
 599                 //
 600                 if (!XmlChars.isChar(c)) {
 601                     white = false;
 602                     if (c >= 0xd800 && c <= 0xdfff) {
 603                         if (checkSurrogatePair(last)) {
 604                             last++;
 605                             continue;
 606                         } else {
 607                             last--;
 608                             break;
 609                         }
 610                     }
 611                     fatal("P-071", new Object[]
 612                     {Integer.toHexString(buf[last])});
 613                 }
 614                 if (c == '\n') {
 615                     if (!isInternal())
 616                         lineNumber++;
 617                     continue;
 618                 }
 619                 if (c == '\r') {
 620                     // As above, we can't repeat CR/CRLF --> LF mapping
 621                     if (isInternal())
 622                         continue;
 623 
 624                     if (white) {
 625                         if (whitespaceInvalidMessage != null && errHandler != null)
 626                             errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
 627                                     whitespaceInvalidMessage), null));
 628                         docHandler.ignorableWhitespace(buf, start,
 629                                 last - start);
 630                         docHandler.ignorableWhitespace(newline, 0, 1);
 631                     } else {
 632 //            validator.text ();
 633                         docHandler.characters(buf, start, last - start);
 634                         docHandler.characters(newline, 0, 1);
 635                     }
 636                     lineNumber++;
 637                     if (finish > (last + 1)) {
 638                         if (buf[last + 1] == '\n')
 639                             last++;
 640                     } else {    // CR at end of buffer
 641 // XXX case not yet handled ... as above
 642                     }
 643                     start = last + 1;
 644                     continue;
 645                 }
 646                 if (c != ']') {
 647                     if (c != ' ' && c != '\t')
 648                         white = false;
 649                     continue;
 650                 }
 651                 if ((last + 2) < finish) {
 652                     if (buf[last + 1] == ']' && buf[last + 2] == '>') {
 653                         done = true;
 654                         break;
 655                     }
 656                     white = false;
 657                     continue;
 658                 } else {
 659                     //last--;
 660                     break;
 661                 }
 662             }
 663             if (white) {
 664                 if (whitespaceInvalidMessage != null && errHandler != null)
 665                     errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
 666                             whitespaceInvalidMessage), null));
 667                 docHandler.ignorableWhitespace(buf, start, last - start);
 668             } else {
 669 //        validator.text ();
 670                 docHandler.characters(buf, start, last - start);
 671             }
 672             if (done) {
 673                 start = last + 3;
 674                 break;
 675             }
 676             start = last;
 677             if (isEOF())
 678                 fatal("P-073", null);
 679         }
 680         docHandler.endCDATA();
 681         return true;
 682     }
 683 
 684     // return false to backstep at end of buffer)
 685     private boolean checkSurrogatePair(int offset)
 686             throws SAXException {
 687 
 688         if ((offset + 1) >= finish)
 689             return false;
 690 
 691         char c1 = buf[offset++];
 692         char c2 = buf[offset];
 693 
 694         if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
 695             return true;
 696         fatal("P-074", new Object[]{
 697             Integer.toHexString(c1 & 0x0ffff),
 698             Integer.toHexString(c2 & 0x0ffff)
 699         });
 700         return false;
 701     }
 702 
 703 
 704     /**
 705      * whitespace in markup (flagged to app, discardable)
 706      * <p>
 707      * <P> the document handler's ignorableWhitespace() method
 708      * is called on all the whitespace found
 709      */
 710     public boolean ignorableWhitespace(DTDEventListener handler)
 711             throws IOException, SAXException {
 712 
 713         char c;
 714         boolean isSpace = false;
 715         int first;
 716 
 717         // [3] S ::= #20 | #09 | #0D | #0A
 718         for (first = start; ;) {
 719             if (finish <= start) {
 720                 if (isSpace)
 721                     handler.ignorableWhitespace(buf, first, start - first);
 722                 fillbuf();
 723                 first = start;
 724             }
 725             if (finish <= start)
 726                 return isSpace;
 727 
 728             c = buf[start++];
 729             switch (c) {
 730             case '\n':
 731                 if (!isInternal())
 732                     lineNumber++;
 733 // XXX handles Macintosh line endings wrong
 734                 // fallthrough
 735             case 0x09:
 736             case 0x20:
 737                 isSpace = true;
 738                 continue;
 739 
 740             case '\r':
 741                 isSpace = true;
 742                 if (!isInternal())
 743                     lineNumber++;
 744                 handler.ignorableWhitespace(buf, first,
 745                         (start - 1) - first);
 746                 handler.ignorableWhitespace(newline, 0, 1);
 747                 if (start < finish && buf[start] == '\n')
 748                     ++start;
 749                 first = start;
 750                 continue;
 751 
 752             default:
 753                 ungetc();
 754                 if (isSpace)
 755                     handler.ignorableWhitespace(buf, first, start - first);
 756                 return isSpace;
 757             }
 758         }
 759     }
 760 
 761     /**
 762      * returns false iff 'next' string isn't as provided,
 763      * else skips that text and returns true.
 764      * <p>
 765      * <P> NOTE:  two alternative string representations are
 766      * both passed in, since one is faster.
 767      */
 768     public boolean peek(String next, char chars [])
 769             throws IOException, SAXException {
 770 
 771         int len;
 772         int i;
 773 
 774         if (chars != null)
 775             len = chars.length;
 776         else
 777             len = next.length();
 778 
 779         // buffer should hold the whole thing ... give it a
 780         // chance for the end-of-buffer case and cope with EOF
 781         // by letting fillbuf compact and fill
 782         if (finish <= start || (finish - start) < len)
 783             fillbuf();
 784 
 785         // can't peek past EOF
 786         if (finish <= start)
 787             return false;
 788 
 789         // compare the string; consume iff it matches
 790         if (chars != null) {
 791             for (i = 0; i < len && (start + i) < finish; i++) {
 792                 if (buf[start + i] != chars[i])
 793                     return false;
 794             }
 795         } else {
 796             for (i = 0; i < len && (start + i) < finish; i++) {
 797                 if (buf[start + i] != next.charAt(i))
 798                     return false;
 799             }
 800         }
 801 
 802         // if the first fillbuf didn't get enough data, give
 803         // fillbuf another chance to read
 804         if (i < len) {
 805             if (reader == null || isClosed)
 806                 return false;
 807 
 808             //
 809             // This diagnostic "knows" that the only way big strings would
 810             // fail to be peeked is where it's a symbol ... e.g. for an
 811             // </EndTag> construct.  That knowledge could also be applied
 812             // to get rid of the symbol length constraint, since having
 813             // the wrong symbol is a fatal error anyway ...
 814             //
 815             if (len > buf.length) {
 816                 fatal("P-077", new Object[]{Integer.valueOf(buf.length)});
 817             }
 818 
 819             fillbuf();
 820             return peek(next, chars);
 821         }
 822 
 823         start += len;
 824         return true;
 825     }
 826 
 827 
 828     //
 829     // Support for reporting the internal DTD subset, so <!DOCTYPE...>
 830     // declarations can be recreated.  This is collected as a single
 831     // string; such subsets are normally small, and many applications
 832     // don't even care about this.
 833     //
 834     public void startRemembering() {
 835 
 836         if (startRemember != 0)
 837             throw new InternalError();
 838         startRemember = start;
 839     }
 840 
 841     public String rememberText() {
 842 
 843         String retval;
 844 
 845         // If the internal subset crossed a buffer boundary, we
 846         // created a temporary buffer.
 847         if (rememberedText != null) {
 848             rememberedText.append(buf, startRemember,
 849                     start - startRemember);
 850             retval = rememberedText.toString();
 851         } else
 852             retval = new String(buf, startRemember,
 853                     start - startRemember);
 854 
 855         startRemember = 0;
 856         rememberedText = null;
 857         return retval;
 858     }
 859 
 860     private InputEntity getTopEntity() {
 861 
 862         InputEntity current = this;
 863 
 864         // don't report locations within internal entities!
 865 
 866         while (current != null && current.input == null)
 867             current = current.next;
 868         return current == null ? this : current;
 869     }
 870 
 871     /**
 872      * Returns the public ID of this input source, if known
 873      */
 874     public String getPublicId() {
 875 
 876         InputEntity where = getTopEntity();
 877         if (where == this)
 878             return input.getPublicId();
 879         return where.getPublicId();
 880     }
 881 
 882     /**
 883      * Returns the system ID of this input source, if known
 884      */
 885     public String getSystemId() {
 886 
 887         InputEntity where = getTopEntity();
 888         if (where == this)
 889             return input.getSystemId();
 890         return where.getSystemId();
 891     }
 892 
 893     /**
 894      * Returns the current line number in this input source
 895      */
 896     public int getLineNumber() {
 897 
 898         InputEntity where = getTopEntity();
 899         if (where == this)
 900             return lineNumber;
 901         return where.getLineNumber();
 902     }
 903 
 904     /**
 905      * returns -1; maintaining column numbers hurts performance
 906      */
 907     public int getColumnNumber() {
 908 
 909         return -1;        // not maintained (speed)
 910     }
 911 
 912 
 913     //
 914     // n.b. for non-EOF end-of-buffer cases, reader should return
 915     // at least a handful of bytes so various lookaheads behave.
 916     //
 917     // two character pushback exists except at first; characters
 918     // represented by surrogate pairs can't be pushed back (they'd
 919     // only be in character data anyway).
 920     //
 921     // DTD exception thrown on char conversion problems; line number
 922     // will be low, as a rule.
 923     //
 924     private void fillbuf() throws IOException, SAXException {
 925 
 926         // don't touched fixed buffers, that'll usually
 927         // change entity values (and isn't needed anyway)
 928         // likewise, ignore closed streams
 929         if (reader == null || isClosed)
 930             return;
 931 
 932         // if remembering DTD text, copy!
 933         if (startRemember != 0) {
 934             if (rememberedText == null)
 935                 rememberedText = new StringBuffer(buf.length);
 936             rememberedText.append(buf, startRemember,
 937                     start - startRemember);
 938         }
 939 
 940         boolean extra = (finish > 0) && (start > 0);
 941         int len;
 942 
 943         if (extra)        // extra pushback
 944             start--;
 945         len = finish - start;
 946 
 947         System.arraycopy(buf, start, buf, 0, len);
 948         start = 0;
 949         finish = len;
 950 
 951         try {
 952             len = buf.length - len;
 953             len = reader.read(buf, finish, len);
 954         } catch (UnsupportedEncodingException e) {
 955             fatal("P-075", new Object[]{e.getMessage()});
 956         } catch (CharConversionException e) {
 957             fatal("P-076", new Object[]{e.getMessage()});
 958         }
 959         if (len >= 0)
 960             finish += len;
 961         else
 962             close();
 963         if (extra)        // extra pushback
 964             start++;
 965 
 966         if (startRemember != 0)
 967         // assert extra == true
 968             startRemember = 1;
 969     }
 970 
 971     public void close() {
 972 
 973         try {
 974             if (reader != null && !isClosed)
 975                 reader.close();
 976             isClosed = true;
 977         } catch (IOException e) {
 978             /* NOTHING */
 979         }
 980     }
 981 
 982 
 983     private void fatal(String messageId, Object params [])
 984             throws SAXException {
 985 
 986         SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
 987 
 988         // not continuable ... e.g. WF errors
 989         close();
 990         if (errHandler != null) {
 991             errHandler.fatalError(x);
 992         }
 993         throw x;
 994     }
 995 }