1 /*
   2  * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 
   5 /*
   6  * Copyright 2005 The Apache Software Foundation.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl;
  22 
  23 import java.io.EOFException;
  24 import java.io.IOException;
  25 import java.util.Locale;
  26 import java.util.Vector;
  27 
  28 import com.sun.xml.internal.stream.Entity;
  29 import com.sun.xml.internal.stream.XMLBufferListener;
  30 import java.io.InputStream;
  31 import java.io.InputStreamReader;
  32 import java.io.Reader;
  33 
  34 
  35 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
  36 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
  37 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
  38 
  39 
  40 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  41 import com.sun.org.apache.xerces.internal.util.EncodingMap;
  42 
  43 import com.sun.org.apache.xerces.internal.util.SymbolTable;
  44 import com.sun.org.apache.xerces.internal.util.XMLChar;
  45 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  46 import com.sun.org.apache.xerces.internal.xni.QName;
  47 import com.sun.org.apache.xerces.internal.xni.XMLString;
  48 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  49 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  50 import com.sun.org.apache.xerces.internal.xni.*;
  51 
  52 /**
  53  * Implements the entity scanner methods.
  54  *
  55  * @author Neeraj Bajaj, Sun Microsystems
  56  * @author Andy Clark, IBM
  57  * @author Arnaud  Le Hors, IBM
  58  * @author K.Venugopal Sun Microsystems
  59  *
  60  */
  61 public class XMLEntityScanner implements XMLLocator  {
  62 
  63 
  64     protected Entity.ScannedEntity fCurrentEntity = null ;
  65     protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
  66 
  67     protected XMLEntityManager fEntityManager ;
  68 
  69     /** Debug switching readers for encodings. */
  70     private static final boolean DEBUG_ENCODINGS = false;
  71     /** Listeners which should know when load is being called */
  72     private Vector listeners = new Vector();
  73 
  74     private static final boolean [] VALID_NAMES = new boolean[127];
  75 
  76     /**
  77      * Debug printing of buffer. This debugging flag works best when you
  78      * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
  79      * 64 characters.
  80      */
  81     private static final boolean DEBUG_BUFFER = false;
  82     private static final boolean DEBUG_SKIP_STRING = false;
  83     /**
  84      * To signal the end of the document entity, this exception will be thrown.
  85      */
  86     private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
  87         private static final long serialVersionUID = 980337771224675268L;
  88         public Throwable fillInStackTrace() {
  89             return this;
  90         }
  91     };
  92 
  93     protected SymbolTable fSymbolTable = null;
  94     protected XMLErrorReporter fErrorReporter = null;
  95     int [] whiteSpaceLookup = new int[100];
  96     int whiteSpaceLen = 0;
  97     boolean whiteSpaceInfoNeeded = true;
  98 
  99     /**
 100      * Allow Java encoding names. This feature identifier is:
 101      * http://apache.org/xml/features/allow-java-encodings
 102      */
 103     protected boolean fAllowJavaEncodings;
 104 
 105     //Will be used only during internal subsets.
 106     //for appending data.
 107 
 108     /** Property identifier: symbol table. */
 109     protected static final String SYMBOL_TABLE =
 110             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
 111 
 112     /** Property identifier: error reporter. */
 113     protected static final String ERROR_REPORTER =
 114             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
 115 
 116     /** Feature identifier: allow Java encodings. */
 117     protected static final String ALLOW_JAVA_ENCODINGS =
 118             Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
 119 
 120     protected PropertyManager fPropertyManager = null ;
 121 
 122     boolean isExternal = false;
 123     static {
 124 
 125         for(int i=0x0041;i<=0x005A ; i++){
 126             VALID_NAMES[i]=true;
 127         }
 128         for(int i=0x0061;i<=0x007A; i++){
 129             VALID_NAMES[i]=true;
 130         }
 131         for(int i=0x0030;i<=0x0039; i++){
 132             VALID_NAMES[i]=true;
 133         }
 134         VALID_NAMES[45]=true;
 135         VALID_NAMES[46]=true;
 136         VALID_NAMES[58]=true;
 137         VALID_NAMES[95]=true;
 138     }
 139     // SAPJVM: Remember, that the XML version has explicitly been set,
 140     // so that XMLStreamReader.getVersion() can find that out.
 141     boolean xmlVersionSetExplicitly = false;
 142     //
 143     // Constructors
 144     //
 145 
 146     /** Default constructor. */
 147     public XMLEntityScanner() {
 148     } // <init>()
 149 
 150 
 151     /**  private constructor, this class can only be instantiated within this class. Instance of this class should
 152      *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
 153      *    @see getEntityScanner()
 154      *    @see getEntityScanner(ScannedEntity)
 155      */
 156     public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
 157         fEntityManager = entityManager ;
 158         reset(propertyManager);
 159     } // <init>()
 160 
 161 
 162     // set buffer size:
 163     public final void setBufferSize(int size) {
 164         // REVISIT: Buffer size passed to entity scanner
 165         // was not being kept in synch with the actual size
 166         // of the buffers in each scanned entity. If any
 167         // of the buffers were actually resized, it was possible
 168         // that the parser would throw an ArrayIndexOutOfBoundsException
 169         // for documents which contained names which are longer than
 170         // the current buffer size. Conceivably the buffer size passed
 171         // to entity scanner could be used to determine a minimum size
 172         // for resizing, if doubling its size is smaller than this
 173         // minimum. -- mrglavas
 174         fBufferSize = size;
 175     }
 176 
 177     /**
 178      * Resets the components.
 179      */
 180     public void reset(PropertyManager propertyManager){
 181         fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
 182         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
 183         fCurrentEntity = null;
 184         whiteSpaceLen = 0;
 185         whiteSpaceInfoNeeded = true;
 186         listeners.clear();
 187     }
 188 
 189     /**
 190      * Resets the component. The component can query the component manager
 191      * about any features and properties that affect the operation of the
 192      * component.
 193      *
 194      * @param componentManager The component manager.
 195      *
 196      * @throws SAXException Thrown by component on initialization error.
 197      *                      For example, if a feature or property is
 198      *                      required for the operation of the component, the
 199      *                      component manager may throw a
 200      *                      SAXNotRecognizedException or a
 201      *                      SAXNotSupportedException.
 202      */
 203     public void reset(XMLComponentManager componentManager)
 204     throws XMLConfigurationException {
 205 
 206         //System.out.println(" this is being called");
 207         // xerces features
 208         fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
 209 
 210         //xerces properties
 211         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
 212         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
 213         fCurrentEntity = null;
 214         whiteSpaceLen = 0;
 215         whiteSpaceInfoNeeded = true;
 216         listeners.clear();
 217     } // reset(XMLComponentManager)
 218 
 219 
 220     public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
 221             XMLErrorReporter reporter) {
 222         fCurrentEntity = null;
 223         fSymbolTable = symbolTable;
 224         fEntityManager = entityManager;
 225         fErrorReporter = reporter;
 226     }
 227 
 228     /**
 229      * Returns the XML version of the current entity. This will normally be the
 230      * value from the XML or text declaration or defaulted by the parser. Note that
 231      * that this value may be different than the version of the processing rules
 232      * applied to the current entity. For instance, an XML 1.1 document may refer to
 233      * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
 234      * document. Also note that, for a given entity, this value can only be considered
 235      * final once the XML or text declaration has been read or once it has been
 236      * determined that there is no such declaration.
 237      */
 238     public final String getXMLVersion() {
 239         if (fCurrentEntity != null) {
 240             return fCurrentEntity.xmlVersion;
 241         }
 242         return null;
 243     } // getXMLVersion():String
 244 
 245     /**
 246      * Sets the XML version. This method is used by the
 247      * scanners to report the value of the version pseudo-attribute
 248      * in an XML or text declaration.
 249      *
 250      * @param xmlVersion the XML version of the current entity
 251      */
 252     public final void setXMLVersion(String xmlVersion) {
 253         xmlVersionSetExplicitly = true; // SAPJVM
 254         fCurrentEntity.xmlVersion = xmlVersion;
 255     } // setXMLVersion(String)
 256 
 257 
 258     /** set the instance of current scanned entity.
 259      *   @param ScannedEntity
 260      */
 261 
 262     public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
 263         fCurrentEntity = scannedEntity ;
 264         if(fCurrentEntity != null){
 265             isExternal = fCurrentEntity.isExternal();
 266             if(DEBUG_BUFFER)
 267                 System.out.println("Current Entity is "+scannedEntity.name);
 268         }
 269     }
 270 
 271     public  Entity.ScannedEntity getCurrentEntity(){
 272         return fCurrentEntity ;
 273     }
 274     //
 275     // XMLEntityReader methods
 276     //
 277 
 278     /**
 279      * Returns the base system identifier of the currently scanned
 280      * entity, or null if none is available.
 281      */
 282     public final String getBaseSystemId() {
 283         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 284     } // getBaseSystemId():String
 285 
 286     /**
 287      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
 288      */
 289     public void setBaseSystemId(String systemId) {
 290         //no-op
 291     }
 292 
 293     ///////////// Locator methods start.
 294     public final int getLineNumber(){
 295         //if the entity is closed, we should return -1
 296         //xxx at first place why such call should be there...
 297         return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
 298     }
 299 
 300     /**
 301      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
 302      */
 303     public void setLineNumber(int line) {
 304         //no-op
 305     }
 306 
 307 
 308     public final int getColumnNumber(){
 309         //if the entity is closed, we should return -1
 310         //xxx at first place why such call should be there...
 311         return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
 312     }
 313 
 314     /**
 315      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
 316      */
 317     public void setColumnNumber(int col) {
 318         // no-op
 319     }
 320 
 321 
 322     public final int getCharacterOffset(){
 323         return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
 324     }
 325 
 326     /** Returns the expanded system identifier.  */
 327     public final String getExpandedSystemId() {
 328         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 329     }
 330 
 331     /**
 332      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
 333      */
 334     public void setExpandedSystemId(String systemId) {
 335         //no-op
 336     }
 337 
 338     /** Returns the literal system identifier.  */
 339     public final String getLiteralSystemId() {
 340         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
 341     }
 342 
 343     /**
 344      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
 345      */
 346     public void setLiteralSystemId(String systemId) {
 347         //no-op
 348     }
 349 
 350     /** Returns the public identifier.  */
 351     public final String getPublicId() {
 352         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
 353     }
 354 
 355     /**
 356      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
 357      */
 358     public void setPublicId(String publicId) {
 359         //no-op
 360     }
 361 
 362     ///////////////// Locator methods finished.
 363 
 364     /** the version of the current entity being scanned */
 365     public void setVersion(String version){
 366         fCurrentEntity.version = version;
 367     }
 368 
 369     public String getVersion(){
 370         if (fCurrentEntity != null)
 371             return fCurrentEntity.version ;
 372         return null;
 373     }
 374 
 375     /**
 376      * Returns the encoding of the current entity.
 377      * Note that, for a given entity, this value can only be
 378      * considered final once the encoding declaration has been read (or once it
 379      * has been determined that there is no such declaration) since, no encoding
 380      * having been specified on the XMLInputSource, the parser
 381      * will make an initial "guess" which could be in error.
 382      */
 383     public final String getEncoding() {
 384         if (fCurrentEntity != null) {
 385             return fCurrentEntity.encoding;
 386         }
 387         return null;
 388     } // getEncoding():String
 389 
 390     /**
 391      * Sets the encoding of the scanner. This method is used by the
 392      * scanners if the XMLDecl or TextDecl line contains an encoding
 393      * pseudo-attribute.
 394      * <p>
 395      * <strong>Note:</strong> The underlying character reader on the
 396      * current entity will be changed to accomodate the new encoding.
 397      * However, the new encoding is ignored if the current reader was
 398      * not constructed from an input stream (e.g. an external entity
 399      * that is resolved directly to the appropriate java.io.Reader
 400      * object).
 401      *
 402      * @param encoding The IANA encoding name of the new encoding.
 403      *
 404      * @throws IOException Thrown if the new encoding is not supported.
 405      *
 406      * @see com.sun.org.apache.xerces.internal.util.EncodingMap
 407      */
 408     public final void setEncoding(String encoding) throws IOException {
 409 
 410         if (DEBUG_ENCODINGS) {
 411             System.out.println("$$$ setEncoding: "+encoding);
 412         }
 413 
 414         if (fCurrentEntity.stream != null) {
 415             // if the encoding is the same, don't change the reader and
 416             // re-use the original reader used by the OneCharReader
 417             // NOTE: Besides saving an object, this overcomes deficiencies
 418             //       in the UTF-16 reader supplied with the standard Java
 419             //       distribution (up to and including 1.3). The UTF-16
 420             //       decoder buffers 8K blocks even when only asked to read
 421             //       a single char! -Ac
 422             if (fCurrentEntity.encoding == null ||
 423                     !fCurrentEntity.encoding.equals(encoding)) {
 424                 // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
 425                 // and we know the endian-ness, we shouldn't change readers.
 426                 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
 427                 // the endian-ness from the encoding we presently have.
 428                 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
 429                     String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
 430                     if(ENCODING.equals("UTF-16")) return;
 431                     if(ENCODING.equals("ISO-10646-UCS-4")) {
 432                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 433                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
 434                         } else {
 435                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
 436                         }
 437                         return;
 438                     }
 439                     if(ENCODING.equals("ISO-10646-UCS-2")) {
 440                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 441                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
 442                         } else {
 443                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
 444                         }
 445                         return;
 446                     }
 447                 }
 448                 // wrap a new reader around the input stream, changing
 449                 // the encoding
 450                 if (DEBUG_ENCODINGS) {
 451                     System.out.println("$$$ creating new reader from stream: "+
 452                             fCurrentEntity.stream);
 453                 }
 454                 //fCurrentEntity.stream.reset();
 455                 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
 456                 fCurrentEntity.encoding = encoding;
 457 
 458             } else {
 459                 if (DEBUG_ENCODINGS)
 460                     System.out.println("$$$ reusing old reader on stream");
 461             }
 462         }
 463 
 464     } // setEncoding(String)
 465 
 466     /** Returns true if the current entity being scanned is external. */
 467     public final boolean isExternal() {
 468         return fCurrentEntity.isExternal();
 469     } // isExternal():boolean
 470 
 471     public int getChar(int relative) throws IOException{
 472         if(arrangeCapacity(relative + 1, false)){
 473             return fCurrentEntity.ch[fCurrentEntity.position + relative];
 474         }else{
 475             return -1;
 476         }
 477     }//getChar()
 478 
 479     /**
 480      * Returns the next character on the input.
 481      * <p>
 482      * <strong>Note:</strong> The character is <em>not</em> consumed.
 483      *
 484      * @throws IOException  Thrown if i/o error occurs.
 485      * @throws EOFException Thrown on end of file.
 486      */
 487     public int peekChar() throws IOException {
 488         if (DEBUG_BUFFER) {
 489             System.out.print("(peekChar: ");
 490             print();
 491             System.out.println();
 492         }
 493 
 494         // load more characters, if needed
 495         if (fCurrentEntity.position == fCurrentEntity.count) {
 496             invokeListeners(0);
 497             load(0, true);
 498         }
 499 
 500         // peek at character
 501         int c = fCurrentEntity.ch[fCurrentEntity.position];
 502 
 503         // return peeked character
 504         if (DEBUG_BUFFER) {
 505             System.out.print(")peekChar: ");
 506             print();
 507             if (isExternal) {
 508                 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
 509             } else {
 510                 System.out.println(" -> '"+(char)c+"'");
 511             }
 512         }
 513         if (isExternal) {
 514             return c != '\r' ? c : '\n';
 515         } else {
 516             return c;
 517         }
 518 
 519     } // peekChar():int
 520 
 521     /**
 522      * Returns the next character on the input.
 523      * <p>
 524      * <strong>Note:</strong> The character is consumed.
 525      *
 526      * @throws IOException  Thrown if i/o error occurs.
 527      * @throws EOFException Thrown on end of file.
 528      */
 529     public int scanChar() throws IOException {
 530         if (DEBUG_BUFFER) {
 531             System.out.print("(scanChar: ");
 532             print();
 533             System.out.println();
 534         }
 535 
 536         // load more characters, if needed
 537         if (fCurrentEntity.position == fCurrentEntity.count) {
 538             invokeListeners(0);
 539             load(0, true);
 540         }
 541 
 542         // scan character
 543         int c = fCurrentEntity.ch[fCurrentEntity.position++];
 544         if (c == '\n' ||
 545                 (c == '\r' && isExternal)) {
 546             fCurrentEntity.lineNumber++;
 547             fCurrentEntity.columnNumber = 1;
 548             if (fCurrentEntity.position == fCurrentEntity.count) {
 549                 invokeListeners(1);
 550                 fCurrentEntity.ch[0] = (char)c;
 551                 load(1, false);
 552             }
 553             if (c == '\r' && isExternal) {
 554                 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
 555                     fCurrentEntity.position--;
 556                 }
 557                 c = '\n';
 558             }
 559         }
 560 
 561         // return character that was scanned
 562         if (DEBUG_BUFFER) {
 563             System.out.print(")scanChar: ");
 564             print();
 565             System.out.println(" -> '"+(char)c+"'");
 566         }
 567         fCurrentEntity.columnNumber++;
 568         return c;
 569 
 570     } // scanChar():int
 571 
 572     /**
 573      * Returns a string matching the NMTOKEN production appearing immediately
 574      * on the input as a symbol, or null if NMTOKEN Name string is present.
 575      * <p>
 576      * <strong>Note:</strong> The NMTOKEN characters are consumed.
 577      * <p>
 578      * <strong>Note:</strong> The string returned must be a symbol. The
 579      * SymbolTable can be used for this purpose.
 580      *
 581      * @throws IOException  Thrown if i/o error occurs.
 582      * @throws EOFException Thrown on end of file.
 583      *
 584      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 585      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 586      */
 587     public String scanNmtoken() throws IOException {
 588         if (DEBUG_BUFFER) {
 589             System.out.print("(scanNmtoken: ");
 590             print();
 591             System.out.println();
 592         }
 593 
 594         // load more characters, if needed
 595         if (fCurrentEntity.position == fCurrentEntity.count) {
 596             invokeListeners(0);
 597             load(0, true);
 598         }
 599 
 600         // scan nmtoken
 601         int offset = fCurrentEntity.position;
 602         boolean vc = false;
 603         char c;
 604         while (true){
 605             //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
 606             c = fCurrentEntity.ch[fCurrentEntity.position];
 607             if(c < 127){
 608                 vc = VALID_NAMES[c];
 609             }else{
 610                 vc = XMLChar.isName(c);
 611             }
 612             if(!vc)break;
 613 
 614             if (++fCurrentEntity.position == fCurrentEntity.count) {
 615                 int length = fCurrentEntity.position - offset;
 616                 invokeListeners(length);
 617                 if (length == fCurrentEntity.fBufferSize) {
 618                     // bad luck we have to resize our buffer
 619                     char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 620                     System.arraycopy(fCurrentEntity.ch, offset,
 621                             tmp, 0, length);
 622                     fCurrentEntity.ch = tmp;
 623                     fCurrentEntity.fBufferSize *= 2;
 624                 } else {
 625                     System.arraycopy(fCurrentEntity.ch, offset,
 626                             fCurrentEntity.ch, 0, length);
 627                 }
 628                 offset = 0;
 629                 if (load(length, false)) {
 630                     break;
 631                 }
 632             }
 633         }
 634         int length = fCurrentEntity.position - offset;
 635         fCurrentEntity.columnNumber += length;
 636 
 637         // return nmtoken
 638         String symbol = null;
 639         if (length > 0) {
 640             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 641         }
 642         if (DEBUG_BUFFER) {
 643             System.out.print(")scanNmtoken: ");
 644             print();
 645             System.out.println(" -> "+String.valueOf(symbol));
 646         }
 647         return symbol;
 648 
 649     } // scanNmtoken():String
 650 
 651     /**
 652      * Returns a string matching the Name production appearing immediately
 653      * on the input as a symbol, or null if no Name string is present.
 654      * <p>
 655      * <strong>Note:</strong> The Name characters are consumed.
 656      * <p>
 657      * <strong>Note:</strong> The string returned must be a symbol. The
 658      * SymbolTable can be used for this purpose.
 659      *
 660      * @throws IOException  Thrown if i/o error occurs.
 661      * @throws EOFException Thrown on end of file.
 662      *
 663      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 664      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 665      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 666      */
 667     public String scanName() throws IOException {
 668         if (DEBUG_BUFFER) {
 669             System.out.print("(scanName: ");
 670             print();
 671             System.out.println();
 672         }
 673 
 674         // load more characters, if needed
 675         if (fCurrentEntity.position == fCurrentEntity.count) {
 676             invokeListeners(0);
 677             load(0, true);
 678         }
 679 
 680         // scan name
 681         int offset = fCurrentEntity.position;
 682         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 683             if (++fCurrentEntity.position == fCurrentEntity.count) {
 684                 invokeListeners(1);
 685                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 686                 offset = 0;
 687                 if (load(1, false)) {
 688                     fCurrentEntity.columnNumber++;
 689                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 690 
 691                     if (DEBUG_BUFFER) {
 692                         System.out.print(")scanName: ");
 693                         print();
 694                         System.out.println(" -> "+String.valueOf(symbol));
 695                     }
 696                     return symbol;
 697                 }
 698             }
 699             boolean vc =false;
 700             while (true ){
 701                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 702                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 703                 if(c < 127){
 704                     vc = VALID_NAMES[c];
 705                 }else{
 706                     vc = XMLChar.isName(c);
 707                 }
 708                 if(!vc)break;
 709                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 710                     int length = fCurrentEntity.position - offset;
 711                     invokeListeners(length);
 712                     if (length == fCurrentEntity.fBufferSize) {
 713                         // bad luck we have to resize our buffer
 714                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 715                         System.arraycopy(fCurrentEntity.ch, offset,
 716                                 tmp, 0, length);
 717                         fCurrentEntity.ch = tmp;
 718                         fCurrentEntity.fBufferSize *= 2;
 719                     } else {
 720                         System.arraycopy(fCurrentEntity.ch, offset,
 721                                 fCurrentEntity.ch, 0, length);
 722                     }
 723                     offset = 0;
 724                     if (load(length, false)) {
 725                         break;
 726                     }
 727                 }
 728             }
 729         }
 730         int length = fCurrentEntity.position - offset;
 731         fCurrentEntity.columnNumber += length;
 732 
 733         // return name
 734         String symbol;
 735         if (length > 0) {
 736             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 737         } else
 738             symbol = null;
 739         if (DEBUG_BUFFER) {
 740             System.out.print(")scanName: ");
 741             print();
 742             System.out.println(" -> "+String.valueOf(symbol));
 743         }
 744         return symbol;
 745 
 746     } // scanName():String
 747 
 748     /**
 749      * Scans a qualified name from the input, setting the fields of the
 750      * QName structure appropriately.
 751      * <p>
 752      * <strong>Note:</strong> The qualified name characters are consumed.
 753      * <p>
 754      * <strong>Note:</strong> The strings used to set the values of the
 755      * QName structure must be symbols. The SymbolTable can be used for
 756      * this purpose.
 757      *
 758      * @param qname The qualified name structure to fill.
 759      *
 760      * @return Returns true if a qualified name appeared immediately on
 761      *         the input and was scanned, false otherwise.
 762      *
 763      * @throws IOException  Thrown if i/o error occurs.
 764      * @throws EOFException Thrown on end of file.
 765      *
 766      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 767      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 768      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 769      */
 770     public boolean scanQName(QName qname) throws IOException {
 771         if (DEBUG_BUFFER) {
 772             System.out.print("(scanQName, "+qname+": ");
 773             print();
 774             System.out.println();
 775         }
 776 
 777         // load more characters, if needed
 778         if (fCurrentEntity.position == fCurrentEntity.count) {
 779             invokeListeners(0);
 780             load(0, true);
 781         }
 782 
 783         // scan qualified name
 784         int offset = fCurrentEntity.position;
 785 
 786         //making a check if if the specified character is a valid name start character
 787         //as defined by production [5] in the XML 1.0 specification.
 788         // Name ::= (Letter | '_' | ':') (NameChar)*
 789 
 790         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 791             if (++fCurrentEntity.position == fCurrentEntity.count) {
 792                 invokeListeners(1);
 793                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 794                 offset = 0;
 795 
 796                 if (load(1, false)) {
 797                     fCurrentEntity.columnNumber++;
 798                     //adding into symbol table.
 799                     //XXX We are trying to add single character in SymbolTable??????
 800                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 801                     qname.setValues(null, name, name, null);
 802                     if (DEBUG_BUFFER) {
 803                         System.out.print(")scanQName, "+qname+": ");
 804                         print();
 805                         System.out.println(" -> true");
 806                     }
 807                     return true;
 808                 }
 809             }
 810             int index = -1;
 811             boolean vc = false;
 812             while ( true){
 813 
 814                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 815                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 816                 if(c < 127){
 817                     vc = VALID_NAMES[c];
 818                 }else{
 819                     vc = XMLChar.isName(c);
 820                 }
 821                 if(!vc)break;
 822                 if (c == ':') {
 823                     if (index != -1) {
 824                         break;
 825                     }
 826                     index = fCurrentEntity.position;
 827                 }
 828                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 829                     int length = fCurrentEntity.position - offset;
 830                     invokeListeners(length);
 831                     if (length == fCurrentEntity.fBufferSize) {
 832                         // bad luck we have to resize our buffer
 833                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 834                         System.arraycopy(fCurrentEntity.ch, offset,
 835                                 tmp, 0, length);
 836                         fCurrentEntity.ch = tmp;
 837                         fCurrentEntity.fBufferSize *= 2;
 838                     } else {
 839                         System.arraycopy(fCurrentEntity.ch, offset,
 840                                 fCurrentEntity.ch, 0, length);
 841                     }
 842                     if (index != -1) {
 843                         index = index - offset;
 844                     }
 845                     offset = 0;
 846                     if (load(length, false)) {
 847                         break;
 848                     }
 849                 }
 850             }
 851             int length = fCurrentEntity.position - offset;
 852             fCurrentEntity.columnNumber += length;
 853             if (length > 0) {
 854                 String prefix = null;
 855                 String localpart = null;
 856                 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
 857                         offset, length);
 858 
 859                 if (index != -1) {
 860                     int prefixLength = index - offset;
 861                     prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
 862                             offset, prefixLength);
 863                     int len = length - prefixLength - 1;
 864                     localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
 865                             index + 1, len);
 866 
 867                 } else {
 868                     localpart = rawname;
 869                 }
 870                 qname.setValues(prefix, localpart, rawname, null);
 871                 if (DEBUG_BUFFER) {
 872                     System.out.print(")scanQName, "+qname+": ");
 873                     print();
 874                     System.out.println(" -> true");
 875                 }
 876                 return true;
 877             }
 878         }
 879 
 880         // no qualified name found
 881         if (DEBUG_BUFFER) {
 882             System.out.print(")scanQName, "+qname+": ");
 883             print();
 884             System.out.println(" -> false");
 885         }
 886         return false;
 887 
 888     } // scanQName(QName):boolean
 889 
 890     /**
 891      * CHANGED:
 892      * Scans a range of parsed character data, This function appends the character data to
 893      * the supplied buffer.
 894      * <p>
 895      * <strong>Note:</strong> The characters are consumed.
 896      * <p>
 897      * <strong>Note:</strong> This method does not guarantee to return
 898      * the longest run of parsed character data. This method may return
 899      * before markup due to reaching the end of the input buffer or any
 900      * other reason.
 901      * <p>
 902      *
 903      * @param content The content structure to fill.
 904      *
 905      * @return Returns the next character on the input, if known. This
 906      *         value may be -1 but this does <em>note</em> designate
 907      *         end of file.
 908      *
 909      * @throws IOException  Thrown if i/o error occurs.
 910      * @throws EOFException Thrown on end of file.
 911      */
 912     public int scanContent(XMLString content) throws IOException {
 913         if (DEBUG_BUFFER) {
 914             System.out.print("(scanContent: ");
 915             print();
 916             System.out.println();
 917         }
 918 
 919         // load more characters, if needed
 920         if (fCurrentEntity.position == fCurrentEntity.count) {
 921             invokeListeners(0);
 922             load(0, true);
 923         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 924             invokeListeners(0);
 925             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 926             load(1, false);
 927             fCurrentEntity.position = 0;
 928         }
 929 
 930         // normalize newlines
 931         int offset = fCurrentEntity.position;
 932         int c = fCurrentEntity.ch[offset];
 933         int newlines = 0;
 934         if (c == '\n' || (c == '\r' && isExternal)) {
 935             if (DEBUG_BUFFER) {
 936                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 937                 print();
 938                 System.out.println();
 939             }
 940             do {
 941                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 942                 if (c == '\r' && isExternal) {
 943                     newlines++;
 944                     fCurrentEntity.lineNumber++;
 945                     fCurrentEntity.columnNumber = 1;
 946                     if (fCurrentEntity.position == fCurrentEntity.count) {
 947                         offset = 0;
 948                         invokeListeners(newlines);
 949                         fCurrentEntity.position = newlines;
 950                         if (load(newlines, false)) {
 951                             break;
 952                         }
 953                     }
 954                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 955                         fCurrentEntity.position++;
 956                         offset++;
 957                     }
 958                     /*** NEWLINE NORMALIZATION ***/
 959                     else {
 960                         newlines++;
 961                     }
 962                 } else if (c == '\n') {
 963                     newlines++;
 964                     fCurrentEntity.lineNumber++;
 965                     fCurrentEntity.columnNumber = 1;
 966                     if (fCurrentEntity.position == fCurrentEntity.count) {
 967                         offset = 0;
 968                         invokeListeners(newlines);
 969                         fCurrentEntity.position = newlines;
 970                         if (load(newlines, false)) {
 971                             break;
 972                         }
 973                     }
 974                 } else {
 975                     fCurrentEntity.position--;
 976                     break;
 977                 }
 978             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 979             for (int i = offset; i < fCurrentEntity.position; i++) {
 980                 fCurrentEntity.ch[i] = '\n';
 981             }
 982             int length = fCurrentEntity.position - offset;
 983             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 984                 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
 985                 //on buffering the data..
 986                 content.setValues(fCurrentEntity.ch, offset, length);
 987                 //content.append(fCurrentEntity.ch, offset, length);
 988                 if (DEBUG_BUFFER) {
 989                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 990                     print();
 991                     System.out.println();
 992                 }
 993                 return -1;
 994             }
 995             if (DEBUG_BUFFER) {
 996                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 997                 print();
 998                 System.out.println();
 999             }
1000         }
1001 
1002         while (fCurrentEntity.position < fCurrentEntity.count) {
1003             c = fCurrentEntity.ch[fCurrentEntity.position++];
1004             if (!XMLChar.isContent(c)) {
1005                 fCurrentEntity.position--;
1006                 break;
1007             }
1008         }
1009         int length = fCurrentEntity.position - offset;
1010         fCurrentEntity.columnNumber += length - newlines;
1011 
1012         //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
1013         //on buffering the data..
1014         content.setValues(fCurrentEntity.ch, offset, length);
1015         //content.append(fCurrentEntity.ch, offset, length);
1016         // return next character
1017         if (fCurrentEntity.position != fCurrentEntity.count) {
1018             c = fCurrentEntity.ch[fCurrentEntity.position];
1019             // REVISIT: Does this need to be updated to fix the
1020             //          #x0D ^#x0A newline normalization problem? -Ac
1021             if (c == '\r' && isExternal) {
1022                 c = '\n';
1023             }
1024         } else {
1025             c = -1;
1026         }
1027         if (DEBUG_BUFFER) {
1028             System.out.print(")scanContent: ");
1029             print();
1030             System.out.println(" -> '"+(char)c+"'");
1031         }
1032         return c;
1033 
1034     } // scanContent(XMLString):int
1035 
1036     /**
1037      * Scans a range of attribute value data, setting the fields of the
1038      * XMLString structure, appropriately.
1039      * <p>
1040      * <strong>Note:</strong> The characters are consumed.
1041      * <p>
1042      * <strong>Note:</strong> This method does not guarantee to return
1043      * the longest run of attribute value data. This method may return
1044      * before the quote character due to reaching the end of the input
1045      * buffer or any other reason.
1046      * <p>
1047      * <strong>Note:</strong> The fields contained in the XMLString
1048      * structure are not guaranteed to remain valid upon subsequent calls
1049      * to the entity scanner. Therefore, the caller is responsible for
1050      * immediately using the returned character data or making a copy of
1051      * the character data.
1052      *
1053      * @param quote   The quote character that signifies the end of the
1054      *                attribute value data.
1055      * @param content The content structure to fill.
1056      *
1057      * @return Returns the next character on the input, if known. This
1058      *         value may be -1 but this does <em>note</em> designate
1059      *         end of file.
1060      *
1061      * @throws IOException  Thrown if i/o error occurs.
1062      * @throws EOFException Thrown on end of file.
1063      */
1064     public int scanLiteral(int quote, XMLString content)
1065     throws IOException {
1066         if (DEBUG_BUFFER) {
1067             System.out.print("(scanLiteral, '"+(char)quote+"': ");
1068             print();
1069             System.out.println();
1070         }
1071         // load more characters, if needed
1072         if (fCurrentEntity.position == fCurrentEntity.count) {
1073             invokeListeners(0);
1074             load(0, true);
1075         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1076             invokeListeners(0);
1077             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1078 
1079             load(1, false);
1080             fCurrentEntity.position = 0;
1081         }
1082 
1083         // normalize newlines
1084         int offset = fCurrentEntity.position;
1085         int c = fCurrentEntity.ch[offset];
1086         int newlines = 0;
1087         if(whiteSpaceInfoNeeded)
1088             whiteSpaceLen=0;
1089         if (c == '\n' || (c == '\r' && isExternal)) {
1090             if (DEBUG_BUFFER) {
1091                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1092                 print();
1093                 System.out.println();
1094             }
1095             do {
1096                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1097                 if (c == '\r' && isExternal) {
1098                     newlines++;
1099                     fCurrentEntity.lineNumber++;
1100                     fCurrentEntity.columnNumber = 1;
1101                     if (fCurrentEntity.position == fCurrentEntity.count) {
1102                         invokeListeners(newlines);
1103                         offset = 0;
1104                         fCurrentEntity.position = newlines;
1105                         if (load(newlines, false)) {
1106                             break;
1107                         }
1108                     }
1109                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1110                         fCurrentEntity.position++;
1111                         offset++;
1112                     }
1113                     /*** NEWLINE NORMALIZATION ***/
1114                     else {
1115                         newlines++;
1116                     }
1117                     /***/
1118                 } else if (c == '\n') {
1119                     newlines++;
1120                     fCurrentEntity.lineNumber++;
1121                     fCurrentEntity.columnNumber = 1;
1122                     if (fCurrentEntity.position == fCurrentEntity.count) {
1123                         offset = 0;
1124                         invokeListeners(newlines);
1125                         fCurrentEntity.position = newlines;
1126                         if (load(newlines, false)) {
1127                             break;
1128                         }
1129                     }
1130                     /*** NEWLINE NORMALIZATION ***
1131                      * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
1132                      * && external) {
1133                      * fCurrentEntity.position++;
1134                      * offset++;
1135                      * }
1136                      * /***/
1137                 } else {
1138                     fCurrentEntity.position--;
1139                     break;
1140                 }
1141             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1142             int i=0;
1143             for ( i = offset; i < fCurrentEntity.position; i++) {
1144                 fCurrentEntity.ch[i] = '\n';
1145                 storeWhiteSpace(i);
1146             }
1147 
1148             int length = fCurrentEntity.position - offset;
1149             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1150                 content.setValues(fCurrentEntity.ch, offset, length);
1151                 if (DEBUG_BUFFER) {
1152                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1153                     print();
1154                     System.out.println();
1155                 }
1156                 return -1;
1157             }
1158             if (DEBUG_BUFFER) {
1159                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1160                 print();
1161                 System.out.println();
1162             }
1163         }
1164 
1165         // scan literal value
1166         for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
1167             c = fCurrentEntity.ch[fCurrentEntity.position];
1168             if ((c == quote &&
1169                     (!fCurrentEntity.literal || isExternal)) ||
1170                     c == '%' || !XMLChar.isContent(c)) {
1171                 break;
1172             }
1173             if (whiteSpaceInfoNeeded && c == '\t') {
1174                 storeWhiteSpace(fCurrentEntity.position);
1175             }
1176         }
1177 
1178         int length = fCurrentEntity.position - offset;
1179         fCurrentEntity.columnNumber += length - newlines;
1180         content.setValues(fCurrentEntity.ch, offset, length);
1181 
1182         // return next character
1183         if (fCurrentEntity.position != fCurrentEntity.count) {
1184             c = fCurrentEntity.ch[fCurrentEntity.position];
1185             // NOTE: We don't want to accidentally signal the
1186             //       end of the literal if we're expanding an
1187             //       entity appearing in the literal. -Ac
1188             if (c == quote && fCurrentEntity.literal) {
1189                 c = -1;
1190             }
1191         } else {
1192             c = -1;
1193         }
1194         if (DEBUG_BUFFER) {
1195             System.out.print(")scanLiteral, '"+(char)quote+"': ");
1196             print();
1197             System.out.println(" -> '"+(char)c+"'");
1198         }
1199         return c;
1200 
1201     } // scanLiteral(int,XMLString):int
1202 
1203     /**
1204      * Save whitespace information. Increase the whitespace buffer by 100
1205      * when needed.
1206      *
1207      * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR).
1208      *
1209      * @param whiteSpacePos position of a whitespace in the scanner entity buffer
1210      */
1211     private void storeWhiteSpace(int whiteSpacePos) {
1212         if (whiteSpaceLen >= whiteSpaceLookup.length) {
1213             int [] tmp = new int[whiteSpaceLookup.length + 100];
1214             System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
1215             whiteSpaceLookup = tmp;
1216         }
1217 
1218         whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos;
1219     }
1220 
1221     //CHANGED:
1222     /**
1223      * Scans a range of character data up to the specified delimiter,
1224      * setting the fields of the XMLString structure, appropriately.
1225      * <p>
1226      * <strong>Note:</strong> The characters are consumed.
1227      * <p>
1228      * <strong>Note:</strong> This assumes that the length of the delimiter
1229      * and that the delimiter contains at least one character.
1230      * <p>
1231      * <strong>Note:</strong> This method does not guarantee to return
1232      * the longest run of character data. This method may return before
1233      * the delimiter due to reaching the end of the input buffer or any
1234      * other reason.
1235      * <p>
1236      * @param delimiter The string that signifies the end of the character
1237      *                  data to be scanned.
1238      * @param buffer    The XMLStringBuffer to fill.
1239      *
1240      * @return Returns true if there is more data to scan, false otherwise.
1241      *
1242      * @throws IOException  Thrown if i/o error occurs.
1243      * @throws EOFException Thrown on end of file.
1244      */
1245     public boolean scanData(String delimiter, XMLStringBuffer buffer)
1246     throws IOException {
1247 
1248         boolean done = false;
1249         int delimLen = delimiter.length();
1250         char charAt0 = delimiter.charAt(0);
1251         do {
1252             if (DEBUG_BUFFER) {
1253                 System.out.print("(scanData: ");
1254                 print();
1255                 System.out.println();
1256             }
1257 
1258             // load more characters, if needed
1259 
1260             if (fCurrentEntity.position == fCurrentEntity.count) {
1261                 load(0, true);
1262             }
1263 
1264             boolean bNextEntity = false;
1265 
1266             while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
1267                 && (!bNextEntity))
1268             {
1269               System.arraycopy(fCurrentEntity.ch,
1270                                fCurrentEntity.position,
1271                                fCurrentEntity.ch,
1272                                0,
1273                                fCurrentEntity.count - fCurrentEntity.position);
1274 
1275               bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false);
1276               fCurrentEntity.position = 0;
1277               fCurrentEntity.startPosition = 0;
1278             }
1279 
1280             if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1281                 // something must be wrong with the input:  e.g., file ends in an unterminated comment
1282                 int length = fCurrentEntity.count - fCurrentEntity.position;
1283                 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1284                 fCurrentEntity.columnNumber += fCurrentEntity.count;
1285                 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1286                 fCurrentEntity.position = fCurrentEntity.count;
1287                 fCurrentEntity.startPosition = fCurrentEntity.count;
1288                 load(0, true);
1289                 return false;
1290             }
1291 
1292             // normalize newlines
1293             int offset = fCurrentEntity.position;
1294             int c = fCurrentEntity.ch[offset];
1295             int newlines = 0;
1296             if (c == '\n' || (c == '\r' && isExternal)) {
1297                 if (DEBUG_BUFFER) {
1298                     System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1299                     print();
1300                     System.out.println();
1301                 }
1302                 do {
1303                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1304                     if (c == '\r' && isExternal) {
1305                         newlines++;
1306                         fCurrentEntity.lineNumber++;
1307                         fCurrentEntity.columnNumber = 1;
1308                         if (fCurrentEntity.position == fCurrentEntity.count) {
1309                             offset = 0;
1310                             invokeListeners(newlines);
1311                             fCurrentEntity.position = newlines;
1312                             if (load(newlines, false)) {
1313                                 break;
1314                             }
1315                         }
1316                         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1317                             fCurrentEntity.position++;
1318                             offset++;
1319                         }
1320                         /*** NEWLINE NORMALIZATION ***/
1321                         else {
1322                             newlines++;
1323                         }
1324                     } else if (c == '\n') {
1325                         newlines++;
1326                         fCurrentEntity.lineNumber++;
1327                         fCurrentEntity.columnNumber = 1;
1328                         if (fCurrentEntity.position == fCurrentEntity.count) {
1329                             offset = 0;
1330                             invokeListeners(newlines);
1331                             fCurrentEntity.position = newlines;
1332                             fCurrentEntity.count = newlines;
1333                             if (load(newlines, false)) {
1334                                 break;
1335                             }
1336                         }
1337                     } else {
1338                         fCurrentEntity.position--;
1339                         break;
1340                     }
1341                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1342                 for (int i = offset; i < fCurrentEntity.position; i++) {
1343                     fCurrentEntity.ch[i] = '\n';
1344                 }
1345                 int length = fCurrentEntity.position - offset;
1346                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1347                     buffer.append(fCurrentEntity.ch, offset, length);
1348                     if (DEBUG_BUFFER) {
1349                         System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1350                         print();
1351                         System.out.println();
1352                     }
1353                     return true;
1354                 }
1355                 if (DEBUG_BUFFER) {
1356                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1357                     print();
1358                     System.out.println();
1359                 }
1360             }
1361 
1362             // iterate over buffer looking for delimiter
1363             OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1364                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1365                 if (c == charAt0) {
1366                     // looks like we just hit the delimiter
1367                     int delimOffset = fCurrentEntity.position - 1;
1368                     for (int i = 1; i < delimLen; i++) {
1369                         if (fCurrentEntity.position == fCurrentEntity.count) {
1370                             fCurrentEntity.position -= i;
1371                             break OUTER;
1372                         }
1373                         c = fCurrentEntity.ch[fCurrentEntity.position++];
1374                         if (delimiter.charAt(i) != c) {
1375                             fCurrentEntity.position -= i;
1376                             break;
1377                         }
1378                     }
1379                     if (fCurrentEntity.position == delimOffset + delimLen) {
1380                         done = true;
1381                         break;
1382                     }
1383                 } else if (c == '\n' || (isExternal && c == '\r')) {
1384                     fCurrentEntity.position--;
1385                     break;
1386                 } else if (XMLChar.isInvalid(c)) {
1387                     fCurrentEntity.position--;
1388                     int length = fCurrentEntity.position - offset;
1389                     fCurrentEntity.columnNumber += length - newlines;
1390                     buffer.append(fCurrentEntity.ch, offset, length);
1391                     return true;
1392                 }
1393             }
1394             int length = fCurrentEntity.position - offset;
1395             fCurrentEntity.columnNumber += length - newlines;
1396             if (done) {
1397                 length -= delimLen;
1398             }
1399             buffer.append(fCurrentEntity.ch, offset, length);
1400 
1401             // return true if string was skipped
1402             if (DEBUG_BUFFER) {
1403                 System.out.print(")scanData: ");
1404                 print();
1405                 System.out.println(" -> " + done);
1406             }
1407         } while (!done);
1408         return !done;
1409 
1410     } // scanData(String,XMLString)
1411 
1412     /**
1413      * Skips a character appearing immediately on the input.
1414      * <p>
1415      * <strong>Note:</strong> The character is consumed only if it matches
1416      * the specified character.
1417      *
1418      * @param c The character to skip.
1419      *
1420      * @return Returns true if the character was skipped.
1421      *
1422      * @throws IOException  Thrown if i/o error occurs.
1423      * @throws EOFException Thrown on end of file.
1424      */
1425     public boolean skipChar(int c) throws IOException {
1426         if (DEBUG_BUFFER) {
1427             System.out.print("(skipChar, '"+(char)c+"': ");
1428             print();
1429             System.out.println();
1430         }
1431 
1432         // load more characters, if needed
1433         if (fCurrentEntity.position == fCurrentEntity.count) {
1434             invokeListeners(0);
1435             load(0, true);
1436         }
1437 
1438         // skip character
1439         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1440         if (cc == c) {
1441             fCurrentEntity.position++;
1442             if (c == '\n') {
1443                 fCurrentEntity.lineNumber++;
1444                 fCurrentEntity.columnNumber = 1;
1445             } else {
1446                 fCurrentEntity.columnNumber++;
1447             }
1448             if (DEBUG_BUFFER) {
1449                 System.out.print(")skipChar, '"+(char)c+"': ");
1450                 print();
1451                 System.out.println(" -> true");
1452             }
1453             return true;
1454         } else if (c == '\n' && cc == '\r' && isExternal) {
1455             // handle newlines
1456             if (fCurrentEntity.position == fCurrentEntity.count) {
1457                 invokeListeners(1);
1458                 fCurrentEntity.ch[0] = (char)cc;
1459                 load(1, false);
1460             }
1461             fCurrentEntity.position++;
1462             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1463                 fCurrentEntity.position++;
1464             }
1465             fCurrentEntity.lineNumber++;
1466             fCurrentEntity.columnNumber = 1;
1467             if (DEBUG_BUFFER) {
1468                 System.out.print(")skipChar, '"+(char)c+"': ");
1469                 print();
1470                 System.out.println(" -> true");
1471             }
1472             return true;
1473         }
1474 
1475         // character was not skipped
1476         if (DEBUG_BUFFER) {
1477             System.out.print(")skipChar, '"+(char)c+"': ");
1478             print();
1479             System.out.println(" -> false");
1480         }
1481         return false;
1482 
1483     } // skipChar(int):boolean
1484 
1485     public boolean isSpace(char ch){
1486         return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
1487     }
1488     /**
1489      * Skips space characters appearing immediately on the input.
1490      * <p>
1491      * <strong>Note:</strong> The characters are consumed only if they are
1492      * space characters.
1493      *
1494      * @return Returns true if at least one space character was skipped.
1495      *
1496      * @throws IOException  Thrown if i/o error occurs.
1497      * @throws EOFException Thrown on end of file.
1498      *
1499      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1500      */
1501     public boolean skipSpaces() throws IOException {
1502         if (DEBUG_BUFFER) {
1503             System.out.print("(skipSpaces: ");
1504             print();
1505             System.out.println();
1506         }
1507         //boolean entityChanged = false;
1508         // load more characters, if needed
1509         if (fCurrentEntity.position == fCurrentEntity.count) {
1510             invokeListeners(0);
1511             load(0, true);
1512         }
1513 
1514         //we are doing this check only in skipSpace() because it is called by
1515         //fMiscDispatcher and we want the parser to exit gracefully when document
1516         //is well-formed.
1517         //it is possible that end of document is reached and
1518         //fCurrentEntity becomes null
1519         //nothing was read so entity changed  'false' should be returned.
1520         if(fCurrentEntity == null){
1521             return false ;
1522         }
1523 
1524         // skip spaces
1525         int c = fCurrentEntity.ch[fCurrentEntity.position];
1526         if (XMLChar.isSpace(c)) {
1527             do {
1528                 boolean entityChanged = false;
1529                 // handle newlines
1530                 if (c == '\n' || (isExternal && c == '\r')) {
1531                     fCurrentEntity.lineNumber++;
1532                     fCurrentEntity.columnNumber = 1;
1533                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1534                         invokeListeners(0);
1535                         fCurrentEntity.ch[0] = (char)c;
1536                         entityChanged = load(1, true);
1537                         if (!entityChanged){
1538                             // the load change the position to be 1,
1539                             // need to restore it when entity not changed
1540                             fCurrentEntity.position = 0;
1541                         }else if(fCurrentEntity == null){
1542                             return true ;
1543                         }
1544                     }
1545                     if (c == '\r' && isExternal) {
1546                         // REVISIT: Does this need to be updated to fix the
1547                         //          #x0D ^#x0A newline normalization problem? -Ac
1548                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1549                             fCurrentEntity.position--;
1550                         }
1551                     }
1552                 } else {
1553                     fCurrentEntity.columnNumber++;
1554                 }
1555                 // load more characters, if needed
1556                 if (!entityChanged){
1557                     fCurrentEntity.position++;
1558                 }
1559 
1560                 if (fCurrentEntity.position == fCurrentEntity.count) {
1561                     invokeListeners(0);
1562                     load(0, true);
1563 
1564                     //we are doing this check only in skipSpace() because it is called by
1565                     //fMiscDispatcher and we want the parser to exit gracefully when document
1566                     //is well-formed.
1567 
1568                     //it is possible that end of document is reached and
1569                     //fCurrentEntity becomes null
1570                     //nothing was read so entity changed  'false' should be returned.
1571                     if(fCurrentEntity == null){
1572                         return true ;
1573                     }
1574 
1575                 }
1576             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1577             if (DEBUG_BUFFER) {
1578                 System.out.print(")skipSpaces: ");
1579                 print();
1580                 System.out.println(" -> true");
1581             }
1582             return true;
1583         }
1584 
1585         // no spaces were found
1586         if (DEBUG_BUFFER) {
1587             System.out.print(")skipSpaces: ");
1588             print();
1589             System.out.println(" -> false");
1590         }
1591         return false;
1592 
1593     } // skipSpaces():boolean
1594 
1595 
1596     /**
1597      * @param legnth This function checks that following number of characters are available.
1598      * to the underlying buffer.
1599      * @return This function returns true if capacity asked is available.
1600      */
1601     public boolean arrangeCapacity(int length) throws IOException{
1602         return arrangeCapacity(length, false);
1603     }
1604 
1605     /**
1606      * @param legnth This function checks that following number of characters are available.
1607      * to the underlying buffer.
1608      * @param if the underlying function should change the entity
1609      * @return This function returns true if capacity asked is available.
1610      *
1611      */
1612     public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
1613         //check if the capacity is availble in the current buffer
1614         //count is no. of characters in the buffer   [x][m][l]
1615         //position is '0' based
1616         //System.out.println("fCurrent Entity " + fCurrentEntity);
1617         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1618             return true;
1619         }
1620         if(DEBUG_SKIP_STRING){
1621             System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1622             System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1623             System.out.println("length = " + length);
1624         }
1625         boolean entityChanged = false;
1626         //load more characters -- this function shouldn't change the entity
1627         while((fCurrentEntity.count - fCurrentEntity.position) < length){
1628             if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
1629                 invokeListeners(0);
1630                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
1631                 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
1632                 fCurrentEntity.position = 0;
1633             }
1634 
1635             if((fCurrentEntity.count - fCurrentEntity.position) < length){
1636                 int pos = fCurrentEntity.position;
1637                 invokeListeners(pos);
1638                 entityChanged = load(fCurrentEntity.count, changeEntity);
1639                 fCurrentEntity.position = pos;
1640                 if(entityChanged)break;
1641             }
1642             if(DEBUG_SKIP_STRING){
1643                 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1644                 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1645                 System.out.println("length = " + length);
1646             }
1647         }
1648         //load changes the position.. set it back to the point where we started.
1649 
1650         //after loading check again.
1651         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1652             return true;
1653         } else {
1654             return false;
1655         }
1656     }
1657 
1658     /**
1659      * Skips the specified string appearing immediately on the input.
1660      * <p>
1661      * <strong>Note:</strong> The characters are consumed only if all
1662      * the characters are skipped.
1663      *
1664      * @param s The string to skip.
1665      *
1666      * @return Returns true if the string was skipped.
1667      *
1668      * @throws IOException  Thrown if i/o error occurs.
1669      * @throws EOFException Thrown on end of file.
1670      */
1671     public boolean skipString(String s) throws IOException {
1672 
1673         final int length = s.length();
1674 
1675         //first make sure that required capacity is avaible
1676         if(arrangeCapacity(length, false)){
1677             final int beforeSkip = fCurrentEntity.position ;
1678             int afterSkip = fCurrentEntity.position + length - 1 ;
1679             if(DEBUG_SKIP_STRING){
1680                 System.out.println("skipString,length = " + s + "," + length);
1681                 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip,  length));
1682             }
1683 
1684             //s.charAt() indexes are 0 to 'Length -1' based.
1685             int i = length - 1 ;
1686             //check from reverse
1687             while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
1688                 if(afterSkip-- == beforeSkip){
1689                     fCurrentEntity.position = fCurrentEntity.position + length ;
1690                     fCurrentEntity.columnNumber += length;
1691                     return true;
1692                 }
1693             }
1694         }
1695 
1696         return false;
1697     } // skipString(String):boolean
1698 
1699     public boolean skipString(char [] s) throws IOException {
1700 
1701         final int length = s.length;
1702         //first make sure that required capacity is avaible
1703         if(arrangeCapacity(length, false)){
1704             int beforeSkip = fCurrentEntity.position ;
1705             int afterSkip = fCurrentEntity.position + length  ;
1706 
1707             if(DEBUG_SKIP_STRING){
1708                 System.out.println("skipString,length = " + new String(s) + "," + length);
1709                 System.out.println("skipString,length = " + new String(s) + "," + length);
1710             }
1711 
1712             for(int i=0;i<length;i++){
1713                 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
1714                    return false;
1715                 }
1716             }
1717             fCurrentEntity.position = fCurrentEntity.position + length ;
1718             fCurrentEntity.columnNumber += length;
1719             return true;
1720 
1721         }
1722 
1723         return false;
1724     }
1725 
1726     //
1727     // Locator methods
1728     //
1729     //
1730     // Private methods
1731     //
1732 
1733     /**
1734      * Loads a chunk of text.
1735      *
1736      * @param offset       The offset into the character buffer to
1737      *                     read the next batch of characters.
1738      * @param changeEntity True if the load should change entities
1739      *                     at the end of the entity, otherwise leave
1740      *                     the current entity in place and the entity
1741      *                     boundary will be signaled by the return
1742      *                     value.
1743      *
1744      * @returns Returns true if the entity changed as a result of this
1745      *          load operation.
1746      */
1747     final boolean load(int offset, boolean changeEntity)
1748     throws IOException {
1749         if (DEBUG_BUFFER) {
1750             System.out.print("(load, "+offset+": ");
1751             print();
1752             System.out.println();
1753         }
1754         //maintaing the count till last load
1755         fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
1756         // read characters
1757         int length = fCurrentEntity.ch.length - offset;
1758         if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) {
1759             length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE;
1760         }
1761         if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
1762         int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
1763         if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
1764 
1765         // reset count and position
1766         boolean entityChanged = false;
1767         if (count != -1) {
1768             if (count != 0) {
1769                 // record the last count
1770                 fCurrentEntity.fLastCount = count;
1771                 fCurrentEntity.count = count + offset;
1772                 fCurrentEntity.position = offset;
1773             }
1774         }
1775         // end of this entity
1776         else {
1777             fCurrentEntity.count = offset;
1778             fCurrentEntity.position = offset;
1779             entityChanged = true;
1780 
1781             if (changeEntity) {
1782                 //notify the entity manager about the end of entity
1783                 fEntityManager.endEntity();
1784                 //return if the current entity becomes null
1785                 if(fCurrentEntity == null){
1786                     throw END_OF_DOCUMENT_ENTITY;
1787                 }
1788                 // handle the trailing edges
1789                 if (fCurrentEntity.position == fCurrentEntity.count) {
1790                     load(0, true);
1791                 }
1792             }
1793 
1794         }
1795         if (DEBUG_BUFFER) {
1796             System.out.print(")load, "+offset+": ");
1797             print();
1798             System.out.println();
1799         }
1800 
1801         return entityChanged;
1802 
1803     } // load(int, boolean):boolean
1804 
1805     /**
1806      * Creates a reader capable of reading the given input stream in
1807      * the specified encoding.
1808      *
1809      * @param inputStream  The input stream.
1810      * @param encoding     The encoding name that the input stream is
1811      *                     encoded using. If the user has specified that
1812      *                     Java encoding names are allowed, then the
1813      *                     encoding name may be a Java encoding name;
1814      *                     otherwise, it is an ianaEncoding name.
1815      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
1816      *                      specify a byte order, this tells whether the order is bigEndian.  null menas
1817      *                      unknown or not relevant.
1818      *
1819      * @return Returns a reader.
1820      */
1821     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
1822     throws IOException {
1823 
1824         // normalize encoding name
1825         if (encoding == null) {
1826             encoding = "UTF-8";
1827         }
1828 
1829         // try to use an optimized reader
1830         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
1831         if (ENCODING.equals("UTF-8")) {
1832             if (DEBUG_ENCODINGS) {
1833                 System.out.println("$$$ creating UTF8Reader");
1834             }
1835             return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
1836         }
1837         if (ENCODING.equals("US-ASCII")) {
1838             if (DEBUG_ENCODINGS) {
1839                 System.out.println("$$$ creating ASCIIReader");
1840             }
1841             return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1842         }
1843         if(ENCODING.equals("ISO-10646-UCS-4")) {
1844             if(isBigEndian != null) {
1845                 boolean isBE = isBigEndian.booleanValue();
1846                 if(isBE) {
1847                     return new UCSReader(inputStream, UCSReader.UCS4BE);
1848                 } else {
1849                     return new UCSReader(inputStream, UCSReader.UCS4LE);
1850                 }
1851             } else {
1852                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1853                         "EncodingByteOrderUnsupported",
1854                         new Object[] { encoding },
1855                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1856             }
1857         }
1858         if(ENCODING.equals("ISO-10646-UCS-2")) {
1859             if(isBigEndian != null) { // sould never happen with this encoding...
1860                 boolean isBE = isBigEndian.booleanValue();
1861                 if(isBE) {
1862                     return new UCSReader(inputStream, UCSReader.UCS2BE);
1863                 } else {
1864                     return new UCSReader(inputStream, UCSReader.UCS2LE);
1865                 }
1866             } else {
1867                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1868                         "EncodingByteOrderUnsupported",
1869                         new Object[] { encoding },
1870                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1871             }
1872         }
1873 
1874         // check for valid name
1875         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
1876         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
1877         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
1878             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1879                     "EncodingDeclInvalid",
1880                     new Object[] { encoding },
1881                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
1882                     // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
1883                     //       because every byte is a valid ISO Latin 1 character.
1884                     //       It may not translate correctly but if we failed on
1885                     //       the encoding anyway, then we're expecting the content
1886                     //       of the document to be bad. This will just prevent an
1887                     //       invalid UTF-8 sequence to be detected. This is only
1888                     //       important when continue-after-fatal-error is turned
1889                     //       on. -Ac
1890                     encoding = "ISO-8859-1";
1891         }
1892 
1893         // try to use a Java reader
1894         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
1895         if (javaEncoding == null) {
1896             if(fAllowJavaEncodings) {
1897                 javaEncoding = encoding;
1898             } else {
1899                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1900                         "EncodingDeclInvalid",
1901                         new Object[] { encoding },
1902                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1903                         // see comment above.
1904                         javaEncoding = "ISO8859_1";
1905             }
1906         }
1907         else if (javaEncoding.equals("ASCII")) {
1908             if (DEBUG_ENCODINGS) {
1909                 System.out.println("$$$ creating ASCIIReader");
1910             }
1911             return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1912         }
1913 
1914         if (DEBUG_ENCODINGS) {
1915             System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
1916             if (javaEncoding == encoding) {
1917                 System.out.print(" (IANA encoding)");
1918             }
1919             System.out.println();
1920         }
1921         return new InputStreamReader(inputStream, javaEncoding);
1922 
1923     } // createReader(InputStream,String, Boolean): Reader
1924 
1925     /**
1926      * Returns the IANA encoding name that is auto-detected from
1927      * the bytes specified, with the endian-ness of that encoding where appropriate.
1928      *
1929      * @param b4    The first four bytes of the input.
1930      * @param count The number of bytes actually read.
1931      * @return a 2-element array:  the first element, an IANA-encoding string,
1932      *  the second element a Boolean which is true iff the document is big endian, false
1933      *  if it's little-endian, and null if the distinction isn't relevant.
1934      */
1935     protected Object[] getEncodingName(byte[] b4, int count) {
1936 
1937         if (count < 2) {
1938             return new Object[]{"UTF-8", null};
1939         }
1940 
1941         // UTF-16, with BOM
1942         int b0 = b4[0] & 0xFF;
1943         int b1 = b4[1] & 0xFF;
1944         if (b0 == 0xFE && b1 == 0xFF) {
1945             // UTF-16, big-endian
1946             return new Object [] {"UTF-16BE", new Boolean(true)};
1947         }
1948         if (b0 == 0xFF && b1 == 0xFE) {
1949             // UTF-16, little-endian
1950             return new Object [] {"UTF-16LE", new Boolean(false)};
1951         }
1952 
1953         // default to UTF-8 if we don't have enough bytes to make a
1954         // good determination of the encoding
1955         if (count < 3) {
1956             return new Object [] {"UTF-8", null};
1957         }
1958 
1959         // UTF-8 with a BOM
1960         int b2 = b4[2] & 0xFF;
1961         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1962             return new Object [] {"UTF-8", null};
1963         }
1964 
1965         // default to UTF-8 if we don't have enough bytes to make a
1966         // good determination of the encoding
1967         if (count < 4) {
1968             return new Object [] {"UTF-8", null};
1969         }
1970 
1971         // other encodings
1972         int b3 = b4[3] & 0xFF;
1973         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
1974             // UCS-4, big endian (1234)
1975             return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
1976         }
1977         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
1978             // UCS-4, little endian (4321)
1979             return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
1980         }
1981         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
1982             // UCS-4, unusual octet order (2143)
1983             // REVISIT: What should this be?
1984             return new Object [] {"ISO-10646-UCS-4", null};
1985         }
1986         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
1987             // UCS-4, unusual octect order (3412)
1988             // REVISIT: What should this be?
1989             return new Object [] {"ISO-10646-UCS-4", null};
1990         }
1991         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
1992             // UTF-16, big-endian, no BOM
1993             // (or could turn out to be UCS-2...
1994             // REVISIT: What should this be?
1995             return new Object [] {"UTF-16BE", new Boolean(true)};
1996         }
1997         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
1998             // UTF-16, little-endian, no BOM
1999             // (or could turn out to be UCS-2...
2000             return new Object [] {"UTF-16LE", new Boolean(false)};
2001         }
2002         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
2003             // EBCDIC
2004             // a la xerces1, return CP037 instead of EBCDIC here
2005             return new Object [] {"CP037", null};
2006         }
2007 
2008         // default encoding
2009         return new Object [] {"UTF-8", null};
2010 
2011     } // getEncodingName(byte[],int):Object[]
2012 
2013     /**
2014      * xxx not removing endEntity() so that i remember that we need to implement it.
2015      * Ends an entity.
2016      *
2017      * @throws XNIException Thrown by entity handler to signal an error.
2018      */
2019     //
2020     /** Prints the contents of the buffer. */
2021     final void print() {
2022         if (DEBUG_BUFFER) {
2023             if (fCurrentEntity != null) {
2024                 System.out.print('[');
2025                 System.out.print(fCurrentEntity.count);
2026                 System.out.print(' ');
2027                 System.out.print(fCurrentEntity.position);
2028                 if (fCurrentEntity.count > 0) {
2029                     System.out.print(" \"");
2030                     for (int i = 0; i < fCurrentEntity.count; i++) {
2031                         if (i == fCurrentEntity.position) {
2032                             System.out.print('^');
2033                         }
2034                         char c = fCurrentEntity.ch[i];
2035                         switch (c) {
2036                             case '\n': {
2037                                 System.out.print("\\n");
2038                                 break;
2039                             }
2040                             case '\r': {
2041                                 System.out.print("\\r");
2042                                 break;
2043                             }
2044                             case '\t': {
2045                                 System.out.print("\\t");
2046                                 break;
2047                             }
2048                             case '\\': {
2049                                 System.out.print("\\\\");
2050                                 break;
2051                             }
2052                             default: {
2053                                 System.out.print(c);
2054                             }
2055                         }
2056                     }
2057                     if (fCurrentEntity.position == fCurrentEntity.count) {
2058                         System.out.print('^');
2059                     }
2060                     System.out.print('"');
2061                 }
2062                 System.out.print(']');
2063                 System.out.print(" @ ");
2064                 System.out.print(fCurrentEntity.lineNumber);
2065                 System.out.print(',');
2066                 System.out.print(fCurrentEntity.columnNumber);
2067             } else {
2068                 System.out.print("*NO CURRENT ENTITY*");
2069             }
2070         }
2071     }
2072 
2073     /**
2074      * Registers the listener object and provides callback.
2075      * @param listener listener to which call back should be provided when scanner buffer
2076      * is being changed.
2077      */
2078     public void registerListener(XMLBufferListener listener) {
2079         if(!listeners.contains(listener))
2080             listeners.add(listener);
2081     }
2082 
2083     /**
2084      *
2085      * @param loadPos Starting position from which new data is being loaded into scanner buffer.
2086      */
2087     private void invokeListeners(int loadPos){
2088         for(int i=0;i<listeners.size();i++){
2089             XMLBufferListener listener =(XMLBufferListener) listeners.get(i);
2090             listener.refresh(loadPos);
2091         }
2092     }
2093 
2094     /**
2095      * Skips space characters appearing immediately on the input that would
2096      * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
2097      * normalization is performed. This is useful when scanning structures
2098      * such as the XMLDecl and TextDecl that can only contain US-ASCII
2099      * characters.
2100      * <p>
2101      * <strong>Note:</strong> The characters are consumed only if they would
2102      * match non-terminal S before end of line normalization is performed.
2103      *
2104      * @return Returns true if at least one space character was skipped.
2105      *
2106      * @throws IOException  Thrown if i/o error occurs.
2107      * @throws EOFException Thrown on end of file.
2108      *
2109      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
2110      */
2111     public final boolean skipDeclSpaces() throws IOException {
2112         if (DEBUG_BUFFER) {
2113             System.out.print("(skipDeclSpaces: ");
2114             //XMLEntityManager.print(fCurrentEntity);
2115             System.out.println();
2116         }
2117 
2118         // load more characters, if needed
2119         if (fCurrentEntity.position == fCurrentEntity.count) {
2120             load(0, true);
2121         }
2122 
2123         // skip spaces
2124         int c = fCurrentEntity.ch[fCurrentEntity.position];
2125         if (XMLChar.isSpace(c)) {
2126             boolean external = fCurrentEntity.isExternal();
2127             do {
2128                 boolean entityChanged = false;
2129                 // handle newlines
2130                 if (c == '\n' || (external && c == '\r')) {
2131                     fCurrentEntity.lineNumber++;
2132                     fCurrentEntity.columnNumber = 1;
2133                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
2134                         fCurrentEntity.ch[0] = (char)c;
2135                         entityChanged = load(1, true);
2136                         if (!entityChanged)
2137                             // the load change the position to be 1,
2138                             // need to restore it when entity not changed
2139                             fCurrentEntity.position = 0;
2140                     }
2141                     if (c == '\r' && external) {
2142                         // REVISIT: Does this need to be updated to fix the
2143                         //          #x0D ^#x0A newline normalization problem? -Ac
2144                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
2145                             fCurrentEntity.position--;
2146                         }
2147                     }
2148                     /*** NEWLINE NORMALIZATION ***
2149                      * else {
2150                      * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
2151                      * && external) {
2152                      * fCurrentEntity.position++;
2153                      * }
2154                      * }
2155                      * /***/
2156                 } else {
2157                     fCurrentEntity.columnNumber++;
2158                 }
2159                 // load more characters, if needed
2160                 if (!entityChanged)
2161                     fCurrentEntity.position++;
2162                 if (fCurrentEntity.position == fCurrentEntity.count) {
2163                     load(0, true);
2164                 }
2165             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
2166             if (DEBUG_BUFFER) {
2167                 System.out.print(")skipDeclSpaces: ");
2168                 //  XMLEntityManager.print(fCurrentEntity);
2169                 System.out.println(" -> true");
2170             }
2171             return true;
2172         }
2173 
2174         // no spaces were found
2175         if (DEBUG_BUFFER) {
2176             System.out.print(")skipDeclSpaces: ");
2177             //XMLEntityManager.print(fCurrentEntity);
2178             System.out.println(" -> false");
2179         }
2180         return false;
2181 
2182     } // skipDeclSpaces():boolean
2183 
2184 
2185 } // class XMLEntityScanner