Old src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityScanner.java

   1 /*
   2  * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 
   5 /*
   6  * Copyright 2005 The Apache Software Foundation.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl;
  22 
  23 
  24 
  25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
  26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
  27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
  28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  29 import com.sun.org.apache.xerces.internal.util.EncodingMap;
  30 import com.sun.org.apache.xerces.internal.util.SymbolTable;
  31 import com.sun.org.apache.xerces.internal.util.XMLChar;
  32 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  33 import com.sun.org.apache.xerces.internal.xni.*;
  34 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  35 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  36 import com.sun.xml.internal.stream.Entity;
  37 import com.sun.xml.internal.stream.XMLBufferListener;
  38 import java.io.EOFException;
  39 import java.io.IOException;
  40 import java.io.InputStream;
  41 import java.io.InputStreamReader;
  42 import java.io.Reader;
  43 import java.util.Locale;
  44 import java.util.Vector;
  45 
  46 /**
  47  * Implements the entity scanner methods.
  48  *
  49  * @author Neeraj Bajaj, Sun Microsystems
  50  * @author Andy Clark, IBM
  51  * @author Arnaud  Le Hors, IBM
  52  * @author K.Venugopal Sun Microsystems
  53  *
  54  */
  55 public class XMLEntityScanner implements XMLLocator  {
  56 
  57 
  58     protected Entity.ScannedEntity fCurrentEntity = null ;
  59     protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
  60 
  61     protected XMLEntityManager fEntityManager ;
  62 
  63     /** Debug switching readers for encodings. */
  64     private static final boolean DEBUG_ENCODINGS = false;
  65     /** Listeners which should know when load is being called */
  66     private Vector listeners = new Vector();
  67 
  68     private static final boolean [] VALID_NAMES = new boolean[127];
  69 
  70     /**
  71      * Debug printing of buffer. This debugging flag works best when you
  72      * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
  73      * 64 characters.
  74      */
  75     private static final boolean DEBUG_BUFFER = false;
  76     private static final boolean DEBUG_SKIP_STRING = false;
  77     /**
  78      * To signal the end of the document entity, this exception will be thrown.
  79      */
  80     private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
  81         private static final long serialVersionUID = 980337771224675268L;
  82         public Throwable fillInStackTrace() {
  83             return this;
  84         }
  85     };
  86 
  87     protected SymbolTable fSymbolTable = null;
  88     protected XMLErrorReporter fErrorReporter = null;
  89     int [] whiteSpaceLookup = new int[100];
  90     int whiteSpaceLen = 0;
  91     boolean whiteSpaceInfoNeeded = true;
  92 
  93     /**
  94      * Allow Java encoding names. This feature identifier is:
  95      * http://apache.org/xml/features/allow-java-encodings
  96      */
  97     protected boolean fAllowJavaEncodings;
  98 
  99     //Will be used only during internal subsets.
 100     //for appending data.
 101 
 102     /** Property identifier: symbol table. */
 103     protected static final String SYMBOL_TABLE =
 104             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
 105 
 106     /** Property identifier: error reporter. */
 107     protected static final String ERROR_REPORTER =
 108             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
 109 
 110     /** Feature identifier: allow Java encodings. */
 111     protected static final String ALLOW_JAVA_ENCODINGS =
 112             Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
 113 
 114     protected PropertyManager fPropertyManager = null ;
 115 
 116     boolean isExternal = false;
 117     static {
 118 
 119         for(int i=0x0041;i<=0x005A ; i++){
 120             VALID_NAMES[i]=true;
 121         }
 122         for(int i=0x0061;i<=0x007A; i++){
 123             VALID_NAMES[i]=true;
 124         }
 125         for(int i=0x0030;i<=0x0039; i++){
 126             VALID_NAMES[i]=true;
 127         }
 128         VALID_NAMES[45]=true;
 129         VALID_NAMES[46]=true;
 130         VALID_NAMES[58]=true;
 131         VALID_NAMES[95]=true;
 132     }
 133     // SAPJVM: Remember, that the XML version has explicitly been set,
 134     // so that XMLStreamReader.getVersion() can find that out.
 135     boolean xmlVersionSetExplicitly = false;
 136     //
 137     // Constructors
 138     //
 139 
 140     /** Default constructor. */
 141     public XMLEntityScanner() {
 142     } // <init>()
 143 
 144 
 145     /**  private constructor, this class can only be instantiated within this class. Instance of this class should
 146      *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
 147      *    @see getEntityScanner()
 148      *    @see getEntityScanner(ScannedEntity)
 149      */
 150     public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
 151         fEntityManager = entityManager ;
 152         reset(propertyManager);
 153     } // <init>()
 154 
 155 
 156     // set buffer size:
 157     public final void setBufferSize(int size) {
 158         // REVISIT: Buffer size passed to entity scanner
 159         // was not being kept in synch with the actual size
 160         // of the buffers in each scanned entity. If any
 161         // of the buffers were actually resized, it was possible
 162         // that the parser would throw an ArrayIndexOutOfBoundsException
 163         // for documents which contained names which are longer than
 164         // the current buffer size. Conceivably the buffer size passed
 165         // to entity scanner could be used to determine a minimum size
 166         // for resizing, if doubling its size is smaller than this
 167         // minimum. -- mrglavas
 168         fBufferSize = size;
 169     }
 170 
 171     /**
 172      * Resets the components.
 173      */
 174     public void reset(PropertyManager propertyManager){
 175         fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
 176         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
 177         fCurrentEntity = null;
 178         whiteSpaceLen = 0;
 179         whiteSpaceInfoNeeded = true;
 180         listeners.clear();
 181     }
 182 
 183     /**
 184      * Resets the component. The component can query the component manager
 185      * about any features and properties that affect the operation of the
 186      * component.
 187      *
 188      * @param componentManager The component manager.
 189      *
 190      * @throws SAXException Thrown by component on initialization error.
 191      *                      For example, if a feature or property is
 192      *                      required for the operation of the component, the
 193      *                      component manager may throw a
 194      *                      SAXNotRecognizedException or a
 195      *                      SAXNotSupportedException.
 196      */
 197     public void reset(XMLComponentManager componentManager)
 198     throws XMLConfigurationException {
 199 
 200         //System.out.println(" this is being called");
 201         // xerces features
 202         fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
 203 
 204         //xerces properties
 205         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
 206         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
 207         fCurrentEntity = null;
 208         whiteSpaceLen = 0;
 209         whiteSpaceInfoNeeded = true;
 210         listeners.clear();
 211     } // reset(XMLComponentManager)
 212 
 213 
 214     public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
 215             XMLErrorReporter reporter) {
 216         fCurrentEntity = null;
 217         fSymbolTable = symbolTable;
 218         fEntityManager = entityManager;
 219         fErrorReporter = reporter;
 220     }
 221 
 222     /**
 223      * Returns the XML version of the current entity. This will normally be the
 224      * value from the XML or text declaration or defaulted by the parser. Note that
 225      * that this value may be different than the version of the processing rules
 226      * applied to the current entity. For instance, an XML 1.1 document may refer to
 227      * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
 228      * document. Also note that, for a given entity, this value can only be considered
 229      * final once the XML or text declaration has been read or once it has been
 230      * determined that there is no such declaration.
 231      */
 232     public final String getXMLVersion() {
 233         if (fCurrentEntity != null) {
 234             return fCurrentEntity.xmlVersion;
 235         }
 236         return null;
 237     } // getXMLVersion():String
 238 
 239     /**
 240      * Sets the XML version. This method is used by the
 241      * scanners to report the value of the version pseudo-attribute
 242      * in an XML or text declaration.
 243      *
 244      * @param xmlVersion the XML version of the current entity
 245      */
 246     public final void setXMLVersion(String xmlVersion) {
 247         xmlVersionSetExplicitly = true; // SAPJVM
 248         fCurrentEntity.xmlVersion = xmlVersion;
 249     } // setXMLVersion(String)
 250 
 251 
 252     /** set the instance of current scanned entity.
 253      *   @param ScannedEntity
 254      */
 255 
 256     public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
 257         fCurrentEntity = scannedEntity ;
 258         if(fCurrentEntity != null){
 259             isExternal = fCurrentEntity.isExternal();
 260             if(DEBUG_BUFFER)
 261                 System.out.println("Current Entity is "+scannedEntity.name);
 262         }
 263     }
 264 
 265     public  Entity.ScannedEntity getCurrentEntity(){
 266         return fCurrentEntity ;
 267     }
 268     //
 269     // XMLEntityReader methods
 270     //
 271 
 272     /**
 273      * Returns the base system identifier of the currently scanned
 274      * entity, or null if none is available.
 275      */
 276     public final String getBaseSystemId() {
 277         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 278     } // getBaseSystemId():String
 279 
 280     /**
 281      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
 282      */
 283     public void setBaseSystemId(String systemId) {
 284         //no-op
 285     }
 286 
 287     ///////////// Locator methods start.
 288     public final int getLineNumber(){
 289         //if the entity is closed, we should return -1
 290         //xxx at first place why such call should be there...
 291         return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
 292     }
 293 
 294     /**
 295      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
 296      */
 297     public void setLineNumber(int line) {
 298         //no-op
 299     }
 300 
 301 
 302     public final int getColumnNumber(){
 303         //if the entity is closed, we should return -1
 304         //xxx at first place why such call should be there...
 305         return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
 306     }
 307 
 308     /**
 309      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
 310      */
 311     public void setColumnNumber(int col) {
 312         // no-op
 313     }
 314 
 315 
 316     public final int getCharacterOffset(){
 317         return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
 318     }
 319 
 320     /** Returns the expanded system identifier.  */
 321     public final String getExpandedSystemId() {
 322         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 323     }
 324 
 325     /**
 326      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
 327      */
 328     public void setExpandedSystemId(String systemId) {
 329         //no-op
 330     }
 331 
 332     /** Returns the literal system identifier.  */
 333     public final String getLiteralSystemId() {
 334         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
 335     }
 336 
 337     /**
 338      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
 339      */
 340     public void setLiteralSystemId(String systemId) {
 341         //no-op
 342     }
 343 
 344     /** Returns the public identifier.  */
 345     public final String getPublicId() {
 346         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
 347     }
 348 
 349     /**
 350      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
 351      */
 352     public void setPublicId(String publicId) {
 353         //no-op
 354     }
 355 
 356     ///////////////// Locator methods finished.
 357 
 358     /** the version of the current entity being scanned */
 359     public void setVersion(String version){
 360         fCurrentEntity.version = version;
 361     }
 362 
 363     public String getVersion(){
 364         if (fCurrentEntity != null)
 365             return fCurrentEntity.version ;
 366         return null;
 367     }
 368 
 369     /**
 370      * Returns the encoding of the current entity.
 371      * Note that, for a given entity, this value can only be
 372      * considered final once the encoding declaration has been read (or once it
 373      * has been determined that there is no such declaration) since, no encoding
 374      * having been specified on the XMLInputSource, the parser
 375      * will make an initial "guess" which could be in error.
 376      */
 377     public final String getEncoding() {
 378         if (fCurrentEntity != null) {
 379             return fCurrentEntity.encoding;
 380         }
 381         return null;
 382     } // getEncoding():String
 383 
 384     /**
 385      * Sets the encoding of the scanner. This method is used by the
 386      * scanners if the XMLDecl or TextDecl line contains an encoding
 387      * pseudo-attribute.
 388      * <p>
 389      * <strong>Note:</strong> The underlying character reader on the
 390      * current entity will be changed to accomodate the new encoding.
 391      * However, the new encoding is ignored if the current reader was
 392      * not constructed from an input stream (e.g. an external entity
 393      * that is resolved directly to the appropriate java.io.Reader
 394      * object).
 395      *
 396      * @param encoding The IANA encoding name of the new encoding.
 397      *
 398      * @throws IOException Thrown if the new encoding is not supported.
 399      *
 400      * @see com.sun.org.apache.xerces.internal.util.EncodingMap
 401      */
 402     public final void setEncoding(String encoding) throws IOException {
 403 
 404         if (DEBUG_ENCODINGS) {
 405             System.out.println("$$$ setEncoding: "+encoding);
 406         }
 407 
 408         if (fCurrentEntity.stream != null) {
 409             // if the encoding is the same, don't change the reader and
 410             // re-use the original reader used by the OneCharReader
 411             // NOTE: Besides saving an object, this overcomes deficiencies
 412             //       in the UTF-16 reader supplied with the standard Java
 413             //       distribution (up to and including 1.3). The UTF-16
 414             //       decoder buffers 8K blocks even when only asked to read
 415             //       a single char! -Ac
 416             if (fCurrentEntity.encoding == null ||
 417                     !fCurrentEntity.encoding.equals(encoding)) {
 418                 // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
 419                 // and we know the endian-ness, we shouldn't change readers.
 420                 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
 421                 // the endian-ness from the encoding we presently have.
 422                 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
 423                     String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
 424                     if(ENCODING.equals("UTF-16")) return;
 425                     if(ENCODING.equals("ISO-10646-UCS-4")) {
 426                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 427                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
 428                         } else {
 429                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
 430                         }
 431                         return;
 432                     }
 433                     if(ENCODING.equals("ISO-10646-UCS-2")) {
 434                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 435                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
 436                         } else {
 437                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
 438                         }
 439                         return;
 440                     }
 441                 }
 442                 // wrap a new reader around the input stream, changing
 443                 // the encoding
 444                 if (DEBUG_ENCODINGS) {
 445                     System.out.println("$$$ creating new reader from stream: "+
 446                             fCurrentEntity.stream);
 447                 }
 448                 //fCurrentEntity.stream.reset();
 449                 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
 450                 fCurrentEntity.encoding = encoding;
 451 
 452             } else {
 453                 if (DEBUG_ENCODINGS)
 454                     System.out.println("$$$ reusing old reader on stream");
 455             }
 456         }
 457 
 458     } // setEncoding(String)
 459 
 460     /** Returns true if the current entity being scanned is external. */
 461     public final boolean isExternal() {
 462         return fCurrentEntity.isExternal();
 463     } // isExternal():boolean
 464 
 465     public int getChar(int relative) throws IOException{
 466         if(arrangeCapacity(relative + 1, false)){
 467             return fCurrentEntity.ch[fCurrentEntity.position + relative];
 468         }else{
 469             return -1;
 470         }
 471     }//getChar()
 472 
 473     /**
 474      * Returns the next character on the input.
 475      * <p>
 476      * <strong>Note:</strong> The character is <em>not</em> consumed.
 477      *
 478      * @throws IOException  Thrown if i/o error occurs.
 479      * @throws EOFException Thrown on end of file.
 480      */
 481     public int peekChar() throws IOException {
 482         if (DEBUG_BUFFER) {
 483             System.out.print("(peekChar: ");
 484             print();
 485             System.out.println();
 486         }
 487 
 488         // load more characters, if needed
 489         if (fCurrentEntity.position == fCurrentEntity.count) {
 490             load(0, true, true);
 491         }
 492 
 493         // peek at character
 494         int c = fCurrentEntity.ch[fCurrentEntity.position];
 495 
 496         // return peeked character
 497         if (DEBUG_BUFFER) {
 498             System.out.print(")peekChar: ");
 499             print();
 500             if (isExternal) {
 501                 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
 502             } else {
 503                 System.out.println(" -> '"+(char)c+"'");
 504             }
 505         }
 506         if (isExternal) {
 507             return c != '\r' ? c : '\n';
 508         } else {
 509             return c;
 510         }
 511 
 512     } // peekChar():int
 513 
 514     /**
 515      * Returns the next character on the input.
 516      * <p>
 517      * <strong>Note:</strong> The character is consumed.
 518      *
 519      * @throws IOException  Thrown if i/o error occurs.
 520      * @throws EOFException Thrown on end of file.
 521      */
 522     public int scanChar() throws IOException {
 523         if (DEBUG_BUFFER) {
 524             System.out.print("(scanChar: ");
 525             print();
 526             System.out.println();
 527         }
 528 
 529         // load more characters, if needed
 530         if (fCurrentEntity.position == fCurrentEntity.count) {
 531             load(0, true, true);
 532         }
 533 
 534         // scan character
 535         int c = fCurrentEntity.ch[fCurrentEntity.position++];
 536         if (c == '\n' ||
 537                 (c == '\r' && isExternal)) {
 538             fCurrentEntity.lineNumber++;
 539             fCurrentEntity.columnNumber = 1;
 540             if (fCurrentEntity.position == fCurrentEntity.count) {
 541                 invokeListeners(1);
 542                 fCurrentEntity.ch[0] = (char)c;
 543                 load(1, false, false);
 544             }
 545             if (c == '\r' && isExternal) {
 546                 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
 547                     fCurrentEntity.position--;
 548                 }
 549                 c = '\n';
 550             }
 551         }
 552 
 553         // return character that was scanned
 554         if (DEBUG_BUFFER) {
 555             System.out.print(")scanChar: ");
 556             print();
 557             System.out.println(" -> '"+(char)c+"'");
 558         }
 559         fCurrentEntity.columnNumber++;
 560         return c;
 561 
 562     } // scanChar():int
 563 
 564     /**
 565      * Returns a string matching the NMTOKEN production appearing immediately
 566      * on the input as a symbol, or null if NMTOKEN Name string is present.
 567      * <p>
 568      * <strong>Note:</strong> The NMTOKEN characters are consumed.
 569      * <p>
 570      * <strong>Note:</strong> The string returned must be a symbol. The
 571      * SymbolTable can be used for this purpose.
 572      *
 573      * @throws IOException  Thrown if i/o error occurs.
 574      * @throws EOFException Thrown on end of file.
 575      *
 576      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 577      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 578      */
 579     public String scanNmtoken() throws IOException {
 580         if (DEBUG_BUFFER) {
 581             System.out.print("(scanNmtoken: ");
 582             print();
 583             System.out.println();
 584         }
 585 
 586         // load more characters, if needed
 587         if (fCurrentEntity.position == fCurrentEntity.count) {
 588             load(0, true, true);
 589         }
 590 
 591         // scan nmtoken
 592         int offset = fCurrentEntity.position;
 593         boolean vc = false;
 594         char c;
 595         while (true){
 596             //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
 597             c = fCurrentEntity.ch[fCurrentEntity.position];
 598             if(c < 127){
 599                 vc = VALID_NAMES[c];
 600             }else{
 601                 vc = XMLChar.isName(c);
 602             }
 603             if(!vc)break;
 604 
 605             if (++fCurrentEntity.position == fCurrentEntity.count) {
 606                 int length = fCurrentEntity.position - offset;
 607                 invokeListeners(length);
 608                 if (length == fCurrentEntity.fBufferSize) {
 609                     // bad luck we have to resize our buffer
 610                     char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 611                     System.arraycopy(fCurrentEntity.ch, offset,
 612                             tmp, 0, length);
 613                     fCurrentEntity.ch = tmp;
 614                     fCurrentEntity.fBufferSize *= 2;
 615                 } else {
 616                     System.arraycopy(fCurrentEntity.ch, offset,
 617                             fCurrentEntity.ch, 0, length);
 618                 }
 619                 offset = 0;
 620                 if (load(length, false, false)) {
 621                     break;
 622                 }
 623             }
 624         }
 625         int length = fCurrentEntity.position - offset;
 626         fCurrentEntity.columnNumber += length;
 627 
 628         // return nmtoken
 629         String symbol = null;
 630         if (length > 0) {
 631             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 632         }
 633         if (DEBUG_BUFFER) {
 634             System.out.print(")scanNmtoken: ");
 635             print();
 636             System.out.println(" -> "+String.valueOf(symbol));
 637         }
 638         return symbol;
 639 
 640     } // scanNmtoken():String
 641 
 642     /**
 643      * Returns a string matching the Name production appearing immediately
 644      * on the input as a symbol, or null if no Name string is present.
 645      * <p>
 646      * <strong>Note:</strong> The Name characters are consumed.
 647      * <p>
 648      * <strong>Note:</strong> The string returned must be a symbol. The
 649      * SymbolTable can be used for this purpose.
 650      *
 651      * @throws IOException  Thrown if i/o error occurs.
 652      * @throws EOFException Thrown on end of file.
 653      *
 654      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 655      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 656      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 657      */
 658     public String scanName() throws IOException {
 659         if (DEBUG_BUFFER) {
 660             System.out.print("(scanName: ");
 661             print();
 662             System.out.println();
 663         }
 664 
 665         // load more characters, if needed
 666         if (fCurrentEntity.position == fCurrentEntity.count) {
 667             load(0, true, true);
 668         }
 669 
 670         // scan name
 671         int offset = fCurrentEntity.position;
 672         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 673             if (++fCurrentEntity.position == fCurrentEntity.count) {
 674                 invokeListeners(1);
 675                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 676                 offset = 0;
 677                 if (load(1, false, false)) {
 678                     fCurrentEntity.columnNumber++;
 679                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 680 
 681                     if (DEBUG_BUFFER) {
 682                         System.out.print(")scanName: ");
 683                         print();
 684                         System.out.println(" -> "+String.valueOf(symbol));
 685                     }
 686                     return symbol;
 687                 }
 688             }
 689             boolean vc =false;
 690             while (true ){
 691                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 692                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 693                 if(c < 127){
 694                     vc = VALID_NAMES[c];
 695                 }else{
 696                     vc = XMLChar.isName(c);
 697                 }
 698                 if(!vc)break;
 699                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 700                     int length = fCurrentEntity.position - offset;
 701                     invokeListeners(length);
 702                     if (length == fCurrentEntity.fBufferSize) {
 703                         // bad luck we have to resize our buffer
 704                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 705                         System.arraycopy(fCurrentEntity.ch, offset,
 706                                 tmp, 0, length);
 707                         fCurrentEntity.ch = tmp;
 708                         fCurrentEntity.fBufferSize *= 2;
 709                     } else {
 710                         System.arraycopy(fCurrentEntity.ch, offset,
 711                                 fCurrentEntity.ch, 0, length);
 712                     }
 713                     offset = 0;
 714                     if (load(length, false, false)) {
 715                         break;
 716                     }
 717                 }
 718             }
 719         }
 720         int length = fCurrentEntity.position - offset;
 721         fCurrentEntity.columnNumber += length;
 722 
 723         // return name
 724         String symbol;
 725         if (length > 0) {
 726             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 727         } else
 728             symbol = null;
 729         if (DEBUG_BUFFER) {
 730             System.out.print(")scanName: ");
 731             print();
 732             System.out.println(" -> "+String.valueOf(symbol));
 733         }
 734         return symbol;
 735 
 736     } // scanName():String
 737 
 738     /**
 739      * Scans a qualified name from the input, setting the fields of the
 740      * QName structure appropriately.
 741      * <p>
 742      * <strong>Note:</strong> The qualified name characters are consumed.
 743      * <p>
 744      * <strong>Note:</strong> The strings used to set the values of the
 745      * QName structure must be symbols. The SymbolTable can be used for
 746      * this purpose.
 747      *
 748      * @param qname The qualified name structure to fill.
 749      *
 750      * @return Returns true if a qualified name appeared immediately on
 751      *         the input and was scanned, false otherwise.
 752      *
 753      * @throws IOException  Thrown if i/o error occurs.
 754      * @throws EOFException Thrown on end of file.
 755      *
 756      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 757      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 758      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 759      */
 760     public boolean scanQName(QName qname) throws IOException {
 761         if (DEBUG_BUFFER) {
 762             System.out.print("(scanQName, "+qname+": ");
 763             print();
 764             System.out.println();
 765         }
 766 
 767         // load more characters, if needed
 768         if (fCurrentEntity.position == fCurrentEntity.count) {
 769             load(0, true, true);
 770         }
 771 
 772         // scan qualified name
 773         int offset = fCurrentEntity.position;
 774 
 775         //making a check if if the specified character is a valid name start character
 776         //as defined by production [5] in the XML 1.0 specification.
 777         // Name ::= (Letter | '_' | ':') (NameChar)*
 778 
 779         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 780             if (++fCurrentEntity.position == fCurrentEntity.count) {
 781                 invokeListeners(1);
 782                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 783                 offset = 0;
 784 
 785                 if (load(1, false, false)) {
 786                     fCurrentEntity.columnNumber++;
 787                     //adding into symbol table.
 788                     //XXX We are trying to add single character in SymbolTable??????
 789                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 790                     qname.setValues(null, name, name, null);
 791                     if (DEBUG_BUFFER) {
 792                         System.out.print(")scanQName, "+qname+": ");
 793                         print();
 794                         System.out.println(" -> true");
 795                     }
 796                     return true;
 797                 }
 798             }
 799             int index = -1;
 800             boolean vc = false;
 801             while ( true){
 802 
 803                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 804                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 805                 if(c < 127){
 806                     vc = VALID_NAMES[c];
 807                 }else{
 808                     vc = XMLChar.isName(c);
 809                 }
 810                 if(!vc)break;
 811                 if (c == ':') {
 812                     if (index != -1) {
 813                         break;
 814                     }
 815                     index = fCurrentEntity.position;
 816                 }
 817                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 818                     int length = fCurrentEntity.position - offset;
 819                     invokeListeners(length);
 820                     if (length == fCurrentEntity.fBufferSize) {
 821                         // bad luck we have to resize our buffer
 822                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 823                         System.arraycopy(fCurrentEntity.ch, offset,
 824                                 tmp, 0, length);
 825                         fCurrentEntity.ch = tmp;
 826                         fCurrentEntity.fBufferSize *= 2;
 827                     } else {
 828                         System.arraycopy(fCurrentEntity.ch, offset,
 829                                 fCurrentEntity.ch, 0, length);
 830                     }
 831                     if (index != -1) {
 832                         index = index - offset;
 833                     }
 834                     offset = 0;
 835                     if (load(length, false, false)) {
 836                         break;
 837                     }
 838                 }
 839             }
 840             int length = fCurrentEntity.position - offset;
 841             fCurrentEntity.columnNumber += length;
 842             if (length > 0) {
 843                 String prefix = null;
 844                 String localpart = null;
 845                 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
 846                         offset, length);
 847 
 848                 if (index != -1) {
 849                     int prefixLength = index - offset;
 850                     prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
 851                             offset, prefixLength);
 852                     int len = length - prefixLength - 1;
 853                     localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
 854                             index + 1, len);
 855 
 856                 } else {
 857                     localpart = rawname;
 858                 }
 859                 qname.setValues(prefix, localpart, rawname, null);
 860                 if (DEBUG_BUFFER) {
 861                     System.out.print(")scanQName, "+qname+": ");
 862                     print();
 863                     System.out.println(" -> true");
 864                 }
 865                 return true;
 866             }
 867         }
 868 
 869         // no qualified name found
 870         if (DEBUG_BUFFER) {
 871             System.out.print(")scanQName, "+qname+": ");
 872             print();
 873             System.out.println(" -> false");
 874         }
 875         return false;
 876 
 877     } // scanQName(QName):boolean
 878 
 879     /**
 880      * CHANGED:
 881      * Scans a range of parsed character data, This function appends the character data to
 882      * the supplied buffer.
 883      * <p>
 884      * <strong>Note:</strong> The characters are consumed.
 885      * <p>
 886      * <strong>Note:</strong> This method does not guarantee to return
 887      * the longest run of parsed character data. This method may return
 888      * before markup due to reaching the end of the input buffer or any
 889      * other reason.
 890      * <p>
 891      *
 892      * @param content The content structure to fill.
 893      *
 894      * @return Returns the next character on the input, if known. This
 895      *         value may be -1 but this does <em>note</em> designate
 896      *         end of file.
 897      *
 898      * @throws IOException  Thrown if i/o error occurs.
 899      * @throws EOFException Thrown on end of file.
 900      */
 901     public int scanContent(XMLString content) throws IOException {
 902         if (DEBUG_BUFFER) {
 903             System.out.print("(scanContent: ");
 904             print();
 905             System.out.println();
 906         }
 907 
 908         // load more characters, if needed
 909         if (fCurrentEntity.position == fCurrentEntity.count) {
 910             load(0, true, true);
 911         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 912             invokeListeners(0);
 913             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 914             load(1, false, false);
 915             fCurrentEntity.position = 0;
 916         }
 917 
 918         // normalize newlines
 919         int offset = fCurrentEntity.position;
 920         int c = fCurrentEntity.ch[offset];
 921         int newlines = 0;
 922         if (c == '\n' || (c == '\r' && isExternal)) {
 923             if (DEBUG_BUFFER) {
 924                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 925                 print();
 926                 System.out.println();
 927             }
 928             do {
 929                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 930                 if (c == '\r' && isExternal) {
 931                     newlines++;
 932                     fCurrentEntity.lineNumber++;
 933                     fCurrentEntity.columnNumber = 1;
 934                     if (fCurrentEntity.position == fCurrentEntity.count) {
 935                         offset = 0;
 936                         fCurrentEntity.position = newlines;
 937                         if (load(newlines, false, true)) {
 938                             break;
 939                         }
 940                     }
 941                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 942                         fCurrentEntity.position++;
 943                         offset++;
 944                     }
 945                     /*** NEWLINE NORMALIZATION ***/
 946                     else {
 947                         newlines++;
 948                     }
 949                 } else if (c == '\n') {
 950                     newlines++;
 951                     fCurrentEntity.lineNumber++;
 952                     fCurrentEntity.columnNumber = 1;
 953                     if (fCurrentEntity.position == fCurrentEntity.count) {
 954                         offset = 0;
 955                         fCurrentEntity.position = newlines;
 956                         if (load(newlines, false, true)) {
 957                             break;
 958                         }
 959                     }
 960                 } else {
 961                     fCurrentEntity.position--;
 962                     break;
 963                 }
 964             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 965             for (int i = offset; i < fCurrentEntity.position; i++) {
 966                 fCurrentEntity.ch[i] = '\n';
 967             }
 968             int length = fCurrentEntity.position - offset;
 969             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 970                 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
 971                 //on buffering the data..
 972                 content.setValues(fCurrentEntity.ch, offset, length);
 973                 //content.append(fCurrentEntity.ch, offset, length);
 974                 if (DEBUG_BUFFER) {
 975                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 976                     print();
 977                     System.out.println();
 978                 }
 979                 return -1;
 980             }
 981             if (DEBUG_BUFFER) {
 982                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 983                 print();
 984                 System.out.println();
 985             }
 986         }
 987 
 988         while (fCurrentEntity.position < fCurrentEntity.count) {
 989             c = fCurrentEntity.ch[fCurrentEntity.position++];
 990             if (!XMLChar.isContent(c)) {
 991                 fCurrentEntity.position--;
 992                 break;
 993             }
 994         }
 995         int length = fCurrentEntity.position - offset;
 996         fCurrentEntity.columnNumber += length - newlines;
 997 
 998         //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
 999         //on buffering the data..
1000         content.setValues(fCurrentEntity.ch, offset, length);
1001         //content.append(fCurrentEntity.ch, offset, length);
1002         // return next character
1003         if (fCurrentEntity.position != fCurrentEntity.count) {
1004             c = fCurrentEntity.ch[fCurrentEntity.position];
1005             // REVISIT: Does this need to be updated to fix the
1006             //          #x0D ^#x0A newline normalization problem? -Ac
1007             if (c == '\r' && isExternal) {
1008                 c = '\n';
1009             }
1010         } else {
1011             c = -1;
1012         }
1013         if (DEBUG_BUFFER) {
1014             System.out.print(")scanContent: ");
1015             print();
1016             System.out.println(" -> '"+(char)c+"'");
1017         }
1018         return c;
1019 
1020     } // scanContent(XMLString):int
1021 
1022     /**
1023      * Scans a range of attribute value data, setting the fields of the
1024      * XMLString structure, appropriately.
1025      * <p>
1026      * <strong>Note:</strong> The characters are consumed.
1027      * <p>
1028      * <strong>Note:</strong> This method does not guarantee to return
1029      * the longest run of attribute value data. This method may return
1030      * before the quote character due to reaching the end of the input
1031      * buffer or any other reason.
1032      * <p>
1033      * <strong>Note:</strong> The fields contained in the XMLString
1034      * structure are not guaranteed to remain valid upon subsequent calls
1035      * to the entity scanner. Therefore, the caller is responsible for
1036      * immediately using the returned character data or making a copy of
1037      * the character data.
1038      *
1039      * @param quote   The quote character that signifies the end of the
1040      *                attribute value data.
1041      * @param content The content structure to fill.
1042      *
1043      * @return Returns the next character on the input, if known. This
1044      *         value may be -1 but this does <em>note</em> designate
1045      *         end of file.
1046      *
1047      * @throws IOException  Thrown if i/o error occurs.
1048      * @throws EOFException Thrown on end of file.
1049      */
1050     public int scanLiteral(int quote, XMLString content)
1051     throws IOException {
1052         if (DEBUG_BUFFER) {
1053             System.out.print("(scanLiteral, '"+(char)quote+"': ");
1054             print();
1055             System.out.println();
1056         }
1057         // load more characters, if needed
1058         if (fCurrentEntity.position == fCurrentEntity.count) {
1059             load(0, true, true);
1060         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1061             invokeListeners(0);
1062             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1063             load(1, false, false);
1064             fCurrentEntity.position = 0;
1065         }
1066 
1067         // normalize newlines
1068         int offset = fCurrentEntity.position;
1069         int c = fCurrentEntity.ch[offset];
1070         int newlines = 0;
1071         if(whiteSpaceInfoNeeded)
1072             whiteSpaceLen=0;
1073         if (c == '\n' || (c == '\r' && isExternal)) {
1074             if (DEBUG_BUFFER) {
1075                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1076                 print();
1077                 System.out.println();
1078             }
1079             do {
1080                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1081                 if (c == '\r' && isExternal) {
1082                     newlines++;
1083                     fCurrentEntity.lineNumber++;
1084                     fCurrentEntity.columnNumber = 1;
1085                     if (fCurrentEntity.position == fCurrentEntity.count) {
1086                         offset = 0;
1087                         fCurrentEntity.position = newlines;
1088                         if (load(newlines, false, true)) {
1089                             break;
1090                         }
1091                     }
1092                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1093                         fCurrentEntity.position++;
1094                         offset++;
1095                     }
1096                     /*** NEWLINE NORMALIZATION ***/
1097                     else {
1098                         newlines++;
1099                     }
1100                     /***/
1101                 } else if (c == '\n') {
1102                     newlines++;
1103                     fCurrentEntity.lineNumber++;
1104                     fCurrentEntity.columnNumber = 1;
1105                     if (fCurrentEntity.position == fCurrentEntity.count) {
1106                         offset = 0;
1107                         fCurrentEntity.position = newlines;
1108                         if (load(newlines, false, true)) {
1109                             break;
1110                         }
1111                     }
1112                     /*** NEWLINE NORMALIZATION ***
1113                      * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
1114                      * && external) {
1115                      * fCurrentEntity.position++;
1116                      * offset++;
1117                      * }
1118                      * /***/
1119                 } else {
1120                     fCurrentEntity.position--;
1121                     break;
1122                 }
1123             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1124             int i=0;
1125             for ( i = offset; i < fCurrentEntity.position; i++) {
1126                 fCurrentEntity.ch[i] = '\n';
1127                 storeWhiteSpace(i);
1128             }
1129 
1130             int length = fCurrentEntity.position - offset;
1131             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1132                 content.setValues(fCurrentEntity.ch, offset, length);
1133                 if (DEBUG_BUFFER) {
1134                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1135                     print();
1136                     System.out.println();
1137                 }
1138                 return -1;
1139             }
1140             if (DEBUG_BUFFER) {
1141                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1142                 print();
1143                 System.out.println();
1144             }
1145         }
1146 
1147         // scan literal value
1148         for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
1149             c = fCurrentEntity.ch[fCurrentEntity.position];
1150             if ((c == quote &&
1151                     (!fCurrentEntity.literal || isExternal)) ||
1152                     c == '%' || !XMLChar.isContent(c)) {
1153                 break;
1154             }
1155             if (whiteSpaceInfoNeeded && c == '\t') {
1156                 storeWhiteSpace(fCurrentEntity.position);
1157             }
1158         }
1159         int length = fCurrentEntity.position - offset;
1160         fCurrentEntity.columnNumber += length - newlines;
1161         content.setValues(fCurrentEntity.ch, offset, length);
1162 
1163         // return next character
1164         if (fCurrentEntity.position != fCurrentEntity.count) {
1165             c = fCurrentEntity.ch[fCurrentEntity.position];
1166             // NOTE: We don't want to accidentally signal the
1167             //       end of the literal if we're expanding an
1168             //       entity appearing in the literal. -Ac
1169             if (c == quote && fCurrentEntity.literal) {
1170                 c = -1;
1171             }
1172         } else {
1173             c = -1;
1174         }
1175         if (DEBUG_BUFFER) {
1176             System.out.print(")scanLiteral, '"+(char)quote+"': ");
1177             print();
1178             System.out.println(" -> '"+(char)c+"'");
1179         }
1180         return c;
1181 
1182     } // scanLiteral(int,XMLString):int
1183 
1184     /**
1185      * Save whitespace information. Increase the whitespace buffer by 100
1186      * when needed.
1187      *
1188      * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR).
1189      *
1190      * @param whiteSpacePos position of a whitespace in the scanner entity buffer
1191      */
1192     private void storeWhiteSpace(int whiteSpacePos) {
1193         if (whiteSpaceLen >= whiteSpaceLookup.length) {
1194             int [] tmp = new int[whiteSpaceLookup.length + 100];
1195             System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
1196             whiteSpaceLookup = tmp;
1197         }
1198 
1199         whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos;
1200     }
1201 
1202     //CHANGED:
1203     /**
1204      * Scans a range of character data up to the specified delimiter,
1205      * setting the fields of the XMLString structure, appropriately.
1206      * <p>
1207      * <strong>Note:</strong> The characters are consumed.
1208      * <p>
1209      * <strong>Note:</strong> This assumes that the length of the delimiter
1210      * and that the delimiter contains at least one character.
1211      * <p>
1212      * <strong>Note:</strong> This method does not guarantee to return
1213      * the longest run of character data. This method may return before
1214      * the delimiter due to reaching the end of the input buffer or any
1215      * other reason.
1216      * <p>
1217      * @param delimiter The string that signifies the end of the character
1218      *                  data to be scanned.
1219      * @param buffer    The XMLStringBuffer to fill.
1220      *
1221      * @return Returns true if there is more data to scan, false otherwise.
1222      *
1223      * @throws IOException  Thrown if i/o error occurs.
1224      * @throws EOFException Thrown on end of file.
1225      */
1226     public boolean scanData(String delimiter, XMLStringBuffer buffer)
1227     throws IOException {
1228 
1229         boolean done = false;
1230         int delimLen = delimiter.length();
1231         char charAt0 = delimiter.charAt(0);
1232         do {
1233             if (DEBUG_BUFFER) {
1234                 System.out.print("(scanData: ");
1235                 print();
1236                 System.out.println();
1237             }
1238 
1239             // load more characters, if needed
1240 
1241             if (fCurrentEntity.position == fCurrentEntity.count) {
1242                 load(0, true, false);
1243             }
1244 
1245             boolean bNextEntity = false;
1246 
1247             while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
1248                 && (!bNextEntity))
1249             {
1250               System.arraycopy(fCurrentEntity.ch,
1251                                fCurrentEntity.position,
1252                                fCurrentEntity.ch,
1253                                0,
1254                                fCurrentEntity.count - fCurrentEntity.position);
1255 
1256               bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false);
1257               fCurrentEntity.position = 0;
1258               fCurrentEntity.startPosition = 0;
1259             }
1260 
1261             if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1262                 // something must be wrong with the input:  e.g., file ends in an unterminated comment
1263                 int length = fCurrentEntity.count - fCurrentEntity.position;
1264                 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1265                 fCurrentEntity.columnNumber += fCurrentEntity.count;
1266                 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1267                 fCurrentEntity.position = fCurrentEntity.count;
1268                 fCurrentEntity.startPosition = fCurrentEntity.count;
1269                 load(0, true, false);
1270                 return false;
1271             }
1272 
1273             // normalize newlines
1274             int offset = fCurrentEntity.position;
1275             int c = fCurrentEntity.ch[offset];
1276             int newlines = 0;
1277             if (c == '\n' || (c == '\r' && isExternal)) {
1278                 if (DEBUG_BUFFER) {
1279                     System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1280                     print();
1281                     System.out.println();
1282                 }
1283                 do {
1284                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1285                     if (c == '\r' && isExternal) {
1286                         newlines++;
1287                         fCurrentEntity.lineNumber++;
1288                         fCurrentEntity.columnNumber = 1;
1289                         if (fCurrentEntity.position == fCurrentEntity.count) {
1290                             offset = 0;
1291                             fCurrentEntity.position = newlines;
1292                             if (load(newlines, false, true)) {
1293                                 break;
1294                             }
1295                         }
1296                         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1297                             fCurrentEntity.position++;
1298                             offset++;
1299                         }
1300                         /*** NEWLINE NORMALIZATION ***/
1301                         else {
1302                             newlines++;
1303                         }
1304                     } else if (c == '\n') {
1305                         newlines++;
1306                         fCurrentEntity.lineNumber++;
1307                         fCurrentEntity.columnNumber = 1;
1308                         if (fCurrentEntity.position == fCurrentEntity.count) {
1309                             offset = 0;
1310                             fCurrentEntity.position = newlines;
1311                             fCurrentEntity.count = newlines;
1312                             if (load(newlines, false, true)) {
1313                                 break;
1314                             }
1315                         }
1316                     } else {
1317                         fCurrentEntity.position--;
1318                         break;
1319                     }
1320                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1321                 for (int i = offset; i < fCurrentEntity.position; i++) {
1322                     fCurrentEntity.ch[i] = '\n';
1323                 }
1324                 int length = fCurrentEntity.position - offset;
1325                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1326                     buffer.append(fCurrentEntity.ch, offset, length);
1327                     if (DEBUG_BUFFER) {
1328                         System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1329                         print();
1330                         System.out.println();
1331                     }
1332                     return true;
1333                 }
1334                 if (DEBUG_BUFFER) {
1335                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1336                     print();
1337                     System.out.println();
1338                 }
1339             }
1340 
1341             // iterate over buffer looking for delimiter
1342             OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1343                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1344                 if (c == charAt0) {
1345                     // looks like we just hit the delimiter
1346                     int delimOffset = fCurrentEntity.position - 1;
1347                     for (int i = 1; i < delimLen; i++) {
1348                         if (fCurrentEntity.position == fCurrentEntity.count) {
1349                             fCurrentEntity.position -= i;
1350                             break OUTER;
1351                         }
1352                         c = fCurrentEntity.ch[fCurrentEntity.position++];
1353                         if (delimiter.charAt(i) != c) {
1354                             fCurrentEntity.position -= i;
1355                             break;
1356                         }
1357                     }
1358                     if (fCurrentEntity.position == delimOffset + delimLen) {
1359                         done = true;
1360                         break;
1361                     }
1362                 } else if (c == '\n' || (isExternal && c == '\r')) {
1363                     fCurrentEntity.position--;
1364                     break;
1365                 } else if (XMLChar.isInvalid(c)) {
1366                     fCurrentEntity.position--;
1367                     int length = fCurrentEntity.position - offset;
1368                     fCurrentEntity.columnNumber += length - newlines;
1369                     buffer.append(fCurrentEntity.ch, offset, length);
1370                     return true;
1371                 }
1372             }
1373             int length = fCurrentEntity.position - offset;
1374             fCurrentEntity.columnNumber += length - newlines;
1375             if (done) {
1376                 length -= delimLen;
1377             }
1378             buffer.append(fCurrentEntity.ch, offset, length);
1379 
1380             // return true if string was skipped
1381             if (DEBUG_BUFFER) {
1382                 System.out.print(")scanData: ");
1383                 print();
1384                 System.out.println(" -> " + done);
1385             }
1386         } while (!done);
1387         return !done;
1388 
1389     } // scanData(String,XMLString)
1390 
1391     /**
1392      * Skips a character appearing immediately on the input.
1393      * <p>
1394      * <strong>Note:</strong> The character is consumed only if it matches
1395      * the specified character.
1396      *
1397      * @param c The character to skip.
1398      *
1399      * @return Returns true if the character was skipped.
1400      *
1401      * @throws IOException  Thrown if i/o error occurs.
1402      * @throws EOFException Thrown on end of file.
1403      */
1404     public boolean skipChar(int c) throws IOException {
1405         if (DEBUG_BUFFER) {
1406             System.out.print("(skipChar, '"+(char)c+"': ");
1407             print();
1408             System.out.println();
1409         }
1410 
1411         // load more characters, if needed
1412         if (fCurrentEntity.position == fCurrentEntity.count) {
1413             load(0, true, true);
1414         }
1415 
1416         // skip character
1417         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1418         if (cc == c) {
1419             fCurrentEntity.position++;
1420             if (c == '\n') {
1421                 fCurrentEntity.lineNumber++;
1422                 fCurrentEntity.columnNumber = 1;
1423             } else {
1424                 fCurrentEntity.columnNumber++;
1425             }
1426             if (DEBUG_BUFFER) {
1427                 System.out.print(")skipChar, '"+(char)c+"': ");
1428                 print();
1429                 System.out.println(" -> true");
1430             }
1431             return true;
1432         } else if (c == '\n' && cc == '\r' && isExternal) {
1433             // handle newlines
1434             if (fCurrentEntity.position == fCurrentEntity.count) {
1435                 invokeListeners(1);
1436                 fCurrentEntity.ch[0] = (char)cc;
1437                 load(1, false, false);
1438             }
1439             fCurrentEntity.position++;
1440             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1441                 fCurrentEntity.position++;
1442             }
1443             fCurrentEntity.lineNumber++;
1444             fCurrentEntity.columnNumber = 1;
1445             if (DEBUG_BUFFER) {
1446                 System.out.print(")skipChar, '"+(char)c+"': ");
1447                 print();
1448                 System.out.println(" -> true");
1449             }
1450             return true;
1451         }
1452 
1453         // character was not skipped
1454         if (DEBUG_BUFFER) {
1455             System.out.print(")skipChar, '"+(char)c+"': ");
1456             print();
1457             System.out.println(" -> false");
1458         }
1459         return false;
1460 
1461     } // skipChar(int):boolean
1462 
1463     public boolean isSpace(char ch){
1464         return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
1465     }
1466     /**
1467      * Skips space characters appearing immediately on the input.
1468      * <p>
1469      * <strong>Note:</strong> The characters are consumed only if they are
1470      * space characters.
1471      *
1472      * @return Returns true if at least one space character was skipped.
1473      *
1474      * @throws IOException  Thrown if i/o error occurs.
1475      * @throws EOFException Thrown on end of file.
1476      *
1477      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1478      */
1479     public boolean skipSpaces() throws IOException {
1480         if (DEBUG_BUFFER) {
1481             System.out.print("(skipSpaces: ");
1482             print();
1483             System.out.println();
1484         }
1485         //boolean entityChanged = false;
1486         // load more characters, if needed
1487         if (fCurrentEntity.position == fCurrentEntity.count) {
1488             load(0, true, true);
1489         }
1490 
1491         //we are doing this check only in skipSpace() because it is called by
1492         //fMiscDispatcher and we want the parser to exit gracefully when document
1493         //is well-formed.
1494         //it is possible that end of document is reached and
1495         //fCurrentEntity becomes null
1496         //nothing was read so entity changed  'false' should be returned.
1497         if(fCurrentEntity == null){
1498             return false ;
1499         }
1500 
1501         // skip spaces
1502         int c = fCurrentEntity.ch[fCurrentEntity.position];
1503         if (XMLChar.isSpace(c)) {
1504             do {
1505                 boolean entityChanged = false;
1506                 // handle newlines
1507                 if (c == '\n' || (isExternal && c == '\r')) {
1508                     fCurrentEntity.lineNumber++;
1509                     fCurrentEntity.columnNumber = 1;
1510                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1511                         invokeListeners(0);
1512                         fCurrentEntity.ch[0] = (char)c;
1513                         entityChanged = load(1, true, false);
1514                         if (!entityChanged){
1515                             // the load change the position to be 1,
1516                             // need to restore it when entity not changed
1517                             fCurrentEntity.position = 0;
1518                         }else if(fCurrentEntity == null){
1519                             return true ;
1520                         }
1521                     }
1522                     if (c == '\r' && isExternal) {
1523                         // REVISIT: Does this need to be updated to fix the
1524                         //          #x0D ^#x0A newline normalization problem? -Ac
1525                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1526                             fCurrentEntity.position--;
1527                         }
1528                     }
1529                 } else {
1530                     fCurrentEntity.columnNumber++;
1531                 }
1532                 // load more characters, if needed
1533                 if (!entityChanged){
1534                     fCurrentEntity.position++;
1535                 }
1536 
1537                 if (fCurrentEntity.position == fCurrentEntity.count) {
1538                     load(0, true, true);
1539 
1540                     //we are doing this check only in skipSpace() because it is called by
1541                     //fMiscDispatcher and we want the parser to exit gracefully when document
1542                     //is well-formed.
1543 
1544                     //it is possible that end of document is reached and
1545                     //fCurrentEntity becomes null
1546                     //nothing was read so entity changed  'false' should be returned.
1547                     if(fCurrentEntity == null){
1548                         return true ;
1549                     }
1550 
1551                 }
1552             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1553             if (DEBUG_BUFFER) {
1554                 System.out.print(")skipSpaces: ");
1555                 print();
1556                 System.out.println(" -> true");
1557             }
1558             return true;
1559         }
1560 
1561         // no spaces were found
1562         if (DEBUG_BUFFER) {
1563             System.out.print(")skipSpaces: ");
1564             print();
1565             System.out.println(" -> false");
1566         }
1567         return false;
1568 
1569     } // skipSpaces():boolean
1570 
1571 
1572     /**
1573      * @param legnth This function checks that following number of characters are available.
1574      * to the underlying buffer.
1575      * @return This function returns true if capacity asked is available.
1576      */
1577     public boolean arrangeCapacity(int length) throws IOException{
1578         return arrangeCapacity(length, false);
1579     }
1580 
1581     /**
1582      * @param legnth This function checks that following number of characters are available.
1583      * to the underlying buffer.
1584      * @param if the underlying function should change the entity
1585      * @return This function returns true if capacity asked is available.
1586      *
1587      */
1588     public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
1589         //check if the capacity is availble in the current buffer
1590         //count is no. of characters in the buffer   [x][m][l]
1591         //position is '0' based
1592         //System.out.println("fCurrent Entity " + fCurrentEntity);
1593         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1594             return true;
1595         }
1596         if(DEBUG_SKIP_STRING){
1597             System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1598             System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1599             System.out.println("length = " + length);
1600         }
1601         boolean entityChanged = false;
1602         //load more characters -- this function shouldn't change the entity
1603         while((fCurrentEntity.count - fCurrentEntity.position) < length){
1604             if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
1605                 invokeListeners(0);
1606                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
1607                 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
1608                 fCurrentEntity.position = 0;
1609             }
1610 
1611             if((fCurrentEntity.count - fCurrentEntity.position) < length){
1612                 int pos = fCurrentEntity.position;
1613                 invokeListeners(pos);
1614                 entityChanged = load(fCurrentEntity.count, changeEntity, false);
1615                 fCurrentEntity.position = pos;
1616                 if(entityChanged)break;
1617             }
1618             if(DEBUG_SKIP_STRING){
1619                 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1620                 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1621                 System.out.println("length = " + length);
1622             }
1623         }
1624         //load changes the position.. set it back to the point where we started.
1625 
1626         //after loading check again.
1627         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1628             return true;
1629         } else {
1630             return false;
1631         }
1632     }
1633 
1634     /**
1635      * Skips the specified string appearing immediately on the input.
1636      * <p>
1637      * <strong>Note:</strong> The characters are consumed only if all
1638      * the characters are skipped.
1639      *
1640      * @param s The string to skip.
1641      *
1642      * @return Returns true if the string was skipped.
1643      *
1644      * @throws IOException  Thrown if i/o error occurs.
1645      * @throws EOFException Thrown on end of file.
1646      */
1647     public boolean skipString(String s) throws IOException {
1648 
1649         final int length = s.length();
1650 
1651         //first make sure that required capacity is avaible
1652         if(arrangeCapacity(length, false)){
1653             final int beforeSkip = fCurrentEntity.position ;
1654             int afterSkip = fCurrentEntity.position + length - 1 ;
1655             if(DEBUG_SKIP_STRING){
1656                 System.out.println("skipString,length = " + s + "," + length);
1657                 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip,  length));
1658             }
1659 
1660             //s.charAt() indexes are 0 to 'Length -1' based.
1661             int i = length - 1 ;
1662             //check from reverse
1663             while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
1664                 if(afterSkip-- == beforeSkip){
1665                     fCurrentEntity.position = fCurrentEntity.position + length ;
1666                     fCurrentEntity.columnNumber += length;
1667                     return true;
1668                 }
1669             }
1670         }
1671 
1672         return false;
1673     } // skipString(String):boolean
1674 
1675     public boolean skipString(char [] s) throws IOException {
1676 
1677         final int length = s.length;
1678         //first make sure that required capacity is avaible
1679         if(arrangeCapacity(length, false)){
1680             int beforeSkip = fCurrentEntity.position ;
1681             int afterSkip = fCurrentEntity.position + length  ;
1682 
1683             if(DEBUG_SKIP_STRING){
1684                 System.out.println("skipString,length = " + new String(s) + "," + length);
1685                 System.out.println("skipString,length = " + new String(s) + "," + length);
1686             }
1687 
1688             for(int i=0;i<length;i++){
1689                 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
1690                    return false;
1691                 }
1692             }
1693             fCurrentEntity.position = fCurrentEntity.position + length ;
1694             fCurrentEntity.columnNumber += length;
1695             return true;
1696 
1697         }
1698 
1699         return false;
1700     }
1701 
1702     //
1703     // Locator methods
1704     //
1705     //
1706     // Private methods
1707     //
1708 
1709     /**
1710      * Loads a chunk of text.
1711      *
1712      * @param offset       The offset into the character buffer to
1713      *                     read the next batch of characters.
1714      * @param changeEntity True if the load should change entities
1715      *                     at the end of the entity, otherwise leave
1716      *                     the current entity in place and the entity
1717      *                     boundary will be signaled by the return
1718      *                     value.
1719      * @param notify       Determine whether to notify listeners of
1720      *                     the event
1721      *
1722      * @returns Returns true if the entity changed as a result of this
1723      *          load operation.
1724      */
1725     final boolean load(int offset, boolean changeEntity, boolean notify)
1726     throws IOException {
1727         if (DEBUG_BUFFER) {
1728             System.out.print("(load, "+offset+": ");
1729             print();
1730             System.out.println();
1731         }
1732         if (notify) {
1733             invokeListeners(offset);
1734         }
1735         //maintaing the count till last load
1736         fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
1737         // read characters
1738         int length = fCurrentEntity.ch.length - offset;
1739         if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) {
1740             length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE;
1741         }
1742         if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
1743         int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
1744         if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
1745 
1746         // reset count and position
1747         boolean entityChanged = false;
1748         if (count != -1) {
1749             if (count != 0) {
1750                 // record the last count
1751                 fCurrentEntity.fLastCount = count;
1752                 fCurrentEntity.count = count + offset;
1753                 fCurrentEntity.position = offset;
1754             }
1755         }
1756         // end of this entity
1757         else {
1758             fCurrentEntity.count = offset;
1759             fCurrentEntity.position = offset;
1760             entityChanged = true;
1761 
1762             if (changeEntity) {
1763                 //notify the entity manager about the end of entity
1764                 fEntityManager.endEntity();
1765                 //return if the current entity becomes null
1766                 if(fCurrentEntity == null){
1767                     throw END_OF_DOCUMENT_ENTITY;
1768                 }
1769                 // handle the trailing edges
1770                 if (fCurrentEntity.position == fCurrentEntity.count) {
1771                     load(0, true, false);
1772                 }
1773             }
1774 
1775         }
1776         if (DEBUG_BUFFER) {
1777             System.out.print(")load, "+offset+": ");
1778             print();
1779             System.out.println();
1780         }
1781 
1782         return entityChanged;
1783 
1784     } // load(int, boolean):boolean
1785 
1786     /**
1787      * Creates a reader capable of reading the given input stream in
1788      * the specified encoding.
1789      *
1790      * @param inputStream  The input stream.
1791      * @param encoding     The encoding name that the input stream is
1792      *                     encoded using. If the user has specified that
1793      *                     Java encoding names are allowed, then the
1794      *                     encoding name may be a Java encoding name;
1795      *                     otherwise, it is an ianaEncoding name.
1796      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
1797      *                      specify a byte order, this tells whether the order is bigEndian.  null menas
1798      *                      unknown or not relevant.
1799      *
1800      * @return Returns a reader.
1801      */
1802     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
1803     throws IOException {
1804 
1805         // normalize encoding name
1806         if (encoding == null) {
1807             encoding = "UTF-8";
1808         }
1809 
1810         // try to use an optimized reader
1811         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
1812         if (ENCODING.equals("UTF-8")) {
1813             if (DEBUG_ENCODINGS) {
1814                 System.out.println("$$$ creating UTF8Reader");
1815             }
1816             return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
1817         }
1818         if (ENCODING.equals("US-ASCII")) {
1819             if (DEBUG_ENCODINGS) {
1820                 System.out.println("$$$ creating ASCIIReader");
1821             }
1822             return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1823         }
1824         if(ENCODING.equals("ISO-10646-UCS-4")) {
1825             if(isBigEndian != null) {
1826                 boolean isBE = isBigEndian.booleanValue();
1827                 if(isBE) {
1828                     return new UCSReader(inputStream, UCSReader.UCS4BE);
1829                 } else {
1830                     return new UCSReader(inputStream, UCSReader.UCS4LE);
1831                 }
1832             } else {
1833                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1834                         "EncodingByteOrderUnsupported",
1835                         new Object[] { encoding },
1836                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1837             }
1838         }
1839         if(ENCODING.equals("ISO-10646-UCS-2")) {
1840             if(isBigEndian != null) { // sould never happen with this encoding...
1841                 boolean isBE = isBigEndian.booleanValue();
1842                 if(isBE) {
1843                     return new UCSReader(inputStream, UCSReader.UCS2BE);
1844                 } else {
1845                     return new UCSReader(inputStream, UCSReader.UCS2LE);
1846                 }
1847             } else {
1848                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1849                         "EncodingByteOrderUnsupported",
1850                         new Object[] { encoding },
1851                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1852             }
1853         }
1854 
1855         // check for valid name
1856         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
1857         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
1858         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
1859             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1860                     "EncodingDeclInvalid",
1861                     new Object[] { encoding },
1862                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
1863                     // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
1864                     //       because every byte is a valid ISO Latin 1 character.
1865                     //       It may not translate correctly but if we failed on
1866                     //       the encoding anyway, then we're expecting the content
1867                     //       of the document to be bad. This will just prevent an
1868                     //       invalid UTF-8 sequence to be detected. This is only
1869                     //       important when continue-after-fatal-error is turned
1870                     //       on. -Ac
1871                     encoding = "ISO-8859-1";
1872         }
1873 
1874         // try to use a Java reader
1875         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
1876         if (javaEncoding == null) {
1877             if(fAllowJavaEncodings) {
1878                 javaEncoding = encoding;
1879             } else {
1880                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1881                         "EncodingDeclInvalid",
1882                         new Object[] { encoding },
1883                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1884                         // see comment above.
1885                         javaEncoding = "ISO8859_1";
1886             }
1887         }
1888         else if (javaEncoding.equals("ASCII")) {
1889             if (DEBUG_ENCODINGS) {
1890                 System.out.println("$$$ creating ASCIIReader");
1891             }
1892             return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1893         }
1894 
1895         if (DEBUG_ENCODINGS) {
1896             System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
1897             if (javaEncoding == encoding) {
1898                 System.out.print(" (IANA encoding)");
1899             }
1900             System.out.println();
1901         }
1902         return new InputStreamReader(inputStream, javaEncoding);
1903 
1904     } // createReader(InputStream,String, Boolean): Reader
1905 
1906     /**
1907      * Returns the IANA encoding name that is auto-detected from
1908      * the bytes specified, with the endian-ness of that encoding where appropriate.
1909      *
1910      * @param b4    The first four bytes of the input.
1911      * @param count The number of bytes actually read.
1912      * @return a 2-element array:  the first element, an IANA-encoding string,
1913      *  the second element a Boolean which is true iff the document is big endian, false
1914      *  if it's little-endian, and null if the distinction isn't relevant.
1915      */
1916     protected Object[] getEncodingName(byte[] b4, int count) {
1917 
1918         if (count < 2) {
1919             return new Object[]{"UTF-8", null};
1920         }
1921 
1922         // UTF-16, with BOM
1923         int b0 = b4[0] & 0xFF;
1924         int b1 = b4[1] & 0xFF;
1925         if (b0 == 0xFE && b1 == 0xFF) {
1926             // UTF-16, big-endian
1927             return new Object [] {"UTF-16BE", new Boolean(true)};
1928         }
1929         if (b0 == 0xFF && b1 == 0xFE) {
1930             // UTF-16, little-endian
1931             return new Object [] {"UTF-16LE", new Boolean(false)};
1932         }
1933 
1934         // default to UTF-8 if we don't have enough bytes to make a
1935         // good determination of the encoding
1936         if (count < 3) {
1937             return new Object [] {"UTF-8", null};
1938         }
1939 
1940         // UTF-8 with a BOM
1941         int b2 = b4[2] & 0xFF;
1942         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1943             return new Object [] {"UTF-8", null};
1944         }
1945 
1946         // default to UTF-8 if we don't have enough bytes to make a
1947         // good determination of the encoding
1948         if (count < 4) {
1949             return new Object [] {"UTF-8", null};
1950         }
1951 
1952         // other encodings
1953         int b3 = b4[3] & 0xFF;
1954         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
1955             // UCS-4, big endian (1234)
1956             return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
1957         }
1958         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
1959             // UCS-4, little endian (4321)
1960             return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
1961         }
1962         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
1963             // UCS-4, unusual octet order (2143)
1964             // REVISIT: What should this be?
1965             return new Object [] {"ISO-10646-UCS-4", null};
1966         }
1967         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
1968             // UCS-4, unusual octect order (3412)
1969             // REVISIT: What should this be?
1970             return new Object [] {"ISO-10646-UCS-4", null};
1971         }
1972         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
1973             // UTF-16, big-endian, no BOM
1974             // (or could turn out to be UCS-2...
1975             // REVISIT: What should this be?
1976             return new Object [] {"UTF-16BE", new Boolean(true)};
1977         }
1978         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
1979             // UTF-16, little-endian, no BOM
1980             // (or could turn out to be UCS-2...
1981             return new Object [] {"UTF-16LE", new Boolean(false)};
1982         }
1983         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
1984             // EBCDIC
1985             // a la xerces1, return CP037 instead of EBCDIC here
1986             return new Object [] {"CP037", null};
1987         }
1988 
1989         // default encoding
1990         return new Object [] {"UTF-8", null};
1991 
1992     } // getEncodingName(byte[],int):Object[]
1993 
1994     /**
1995      * xxx not removing endEntity() so that i remember that we need to implement it.
1996      * Ends an entity.
1997      *
1998      * @throws XNIException Thrown by entity handler to signal an error.
1999      */
2000     //
2001     /** Prints the contents of the buffer. */
2002     final void print() {
2003         if (DEBUG_BUFFER) {
2004             if (fCurrentEntity != null) {
2005                 System.out.print('[');
2006                 System.out.print(fCurrentEntity.count);
2007                 System.out.print(' ');
2008                 System.out.print(fCurrentEntity.position);
2009                 if (fCurrentEntity.count > 0) {
2010                     System.out.print(" \"");
2011                     for (int i = 0; i < fCurrentEntity.count; i++) {
2012                         if (i == fCurrentEntity.position) {
2013                             System.out.print('^');
2014                         }
2015                         char c = fCurrentEntity.ch[i];
2016                         switch (c) {
2017                             case '\n': {
2018                                 System.out.print("\\n");
2019                                 break;
2020                             }
2021                             case '\r': {
2022                                 System.out.print("\\r");
2023                                 break;
2024                             }
2025                             case '\t': {
2026                                 System.out.print("\\t");
2027                                 break;
2028                             }
2029                             case '\\': {
2030                                 System.out.print("\\\\");
2031                                 break;
2032                             }
2033                             default: {
2034                                 System.out.print(c);
2035                             }
2036                         }
2037                     }
2038                     if (fCurrentEntity.position == fCurrentEntity.count) {
2039                         System.out.print('^');
2040                     }
2041                     System.out.print('"');
2042                 }
2043                 System.out.print(']');
2044                 System.out.print(" @ ");
2045                 System.out.print(fCurrentEntity.lineNumber);
2046                 System.out.print(',');
2047                 System.out.print(fCurrentEntity.columnNumber);
2048             } else {
2049                 System.out.print("*NO CURRENT ENTITY*");
2050             }
2051         }
2052     }
2053 
2054     /**
2055      * Registers the listener object and provides callback.
2056      * @param listener listener to which call back should be provided when scanner buffer
2057      * is being changed.
2058      */
2059     public void registerListener(XMLBufferListener listener) {
2060         if(!listeners.contains(listener))
2061             listeners.add(listener);
2062     }
2063 
2064     /**
2065      *
2066      * @param loadPos Starting position from which new data is being loaded into scanner buffer.
2067      */
2068     public void invokeListeners(int loadPos){
2069         for(int i=0;i<listeners.size();i++){
2070             XMLBufferListener listener =(XMLBufferListener) listeners.get(i);
2071             listener.refresh(loadPos);
2072         }
2073     }
2074 
2075     /**
2076      * Skips space characters appearing immediately on the input that would
2077      * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
2078      * normalization is performed. This is useful when scanning structures
2079      * such as the XMLDecl and TextDecl that can only contain US-ASCII
2080      * characters.
2081      * <p>
2082      * <strong>Note:</strong> The characters are consumed only if they would
2083      * match non-terminal S before end of line normalization is performed.
2084      *
2085      * @return Returns true if at least one space character was skipped.
2086      *
2087      * @throws IOException  Thrown if i/o error occurs.
2088      * @throws EOFException Thrown on end of file.
2089      *
2090      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
2091      */
2092     public final boolean skipDeclSpaces() throws IOException {
2093         if (DEBUG_BUFFER) {
2094             System.out.print("(skipDeclSpaces: ");
2095             //XMLEntityManager.print(fCurrentEntity);
2096             System.out.println();
2097         }
2098 
2099         // load more characters, if needed
2100         if (fCurrentEntity.position == fCurrentEntity.count) {
2101             load(0, true, false);
2102         }
2103 
2104         // skip spaces
2105         int c = fCurrentEntity.ch[fCurrentEntity.position];
2106         if (XMLChar.isSpace(c)) {
2107             boolean external = fCurrentEntity.isExternal();
2108             do {
2109                 boolean entityChanged = false;
2110                 // handle newlines
2111                 if (c == '\n' || (external && c == '\r')) {
2112                     fCurrentEntity.lineNumber++;
2113                     fCurrentEntity.columnNumber = 1;
2114                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
2115                         fCurrentEntity.ch[0] = (char)c;
2116                         entityChanged = load(1, true, false);
2117                         if (!entityChanged)
2118                             // the load change the position to be 1,
2119                             // need to restore it when entity not changed
2120                             fCurrentEntity.position = 0;
2121                     }
2122                     if (c == '\r' && external) {
2123                         // REVISIT: Does this need to be updated to fix the
2124                         //          #x0D ^#x0A newline normalization problem? -Ac
2125                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
2126                             fCurrentEntity.position--;
2127                         }
2128                     }
2129                     /*** NEWLINE NORMALIZATION ***
2130                      * else {
2131                      * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
2132                      * && external) {
2133                      * fCurrentEntity.position++;
2134                      * }
2135                      * }
2136                      * /***/
2137                 } else {
2138                     fCurrentEntity.columnNumber++;
2139                 }
2140                 // load more characters, if needed
2141                 if (!entityChanged)
2142                     fCurrentEntity.position++;
2143                 if (fCurrentEntity.position == fCurrentEntity.count) {
2144                     load(0, true, false);
2145                 }
2146             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
2147             if (DEBUG_BUFFER) {
2148                 System.out.print(")skipDeclSpaces: ");
2149                 //  XMLEntityManager.print(fCurrentEntity);
2150                 System.out.println(" -> true");
2151             }
2152             return true;
2153         }
2154 
2155         // no spaces were found
2156         if (DEBUG_BUFFER) {
2157             System.out.print(")skipDeclSpaces: ");
2158             //XMLEntityManager.print(fCurrentEntity);
2159             System.out.println(" -> false");
2160         }
2161         return false;
2162 
2163     } // skipDeclSpaces():boolean
2164 
2165 
2166 } // class XMLEntityScanner