1 /*
   2  * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 
   5 /*
   6  * Copyright 2005 The Apache Software Foundation.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl;
  22 
  23 
  24 
  25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
  26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
  27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
  28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  29 import com.sun.org.apache.xerces.internal.util.EncodingMap;
  30 import com.sun.org.apache.xerces.internal.util.SymbolTable;
  31 import com.sun.org.apache.xerces.internal.util.XMLChar;
  32 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  33 import com.sun.org.apache.xerces.internal.xni.*;
  34 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  35 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  36 import com.sun.xml.internal.stream.Entity;
  37 import com.sun.xml.internal.stream.XMLBufferListener;
  38 import java.io.EOFException;
  39 import java.io.IOException;
  40 import java.io.InputStream;
  41 import java.io.InputStreamReader;
  42 import java.io.Reader;
  43 import java.util.Locale;
  44 import java.util.Vector;
  45 
  46 /**
  47  * Implements the entity scanner methods.
  48  *
  49  * @author Neeraj Bajaj, Sun Microsystems
  50  * @author Andy Clark, IBM
  51  * @author Arnaud  Le Hors, IBM
  52  * @author K.Venugopal Sun Microsystems
  53  *
  54  */
  55 public class XMLEntityScanner implements XMLLocator  {
  56 
  57 
  58     protected Entity.ScannedEntity fCurrentEntity = null ;
  59     protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
  60 
  61     protected XMLEntityManager fEntityManager ;
  62 
  63     /** Debug switching readers for encodings. */
  64     private static final boolean DEBUG_ENCODINGS = false;
  65     /** Listeners which should know when load is being called */
  66     private Vector listeners = new Vector();
  67 
  68     private static final boolean [] VALID_NAMES = new boolean[127];
  69 
  70     /**
  71      * Debug printing of buffer. This debugging flag works best when you
  72      * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
  73      * 64 characters.
  74      */
  75     private static final boolean DEBUG_BUFFER = false;
  76     private static final boolean DEBUG_SKIP_STRING = false;
  77     /**
  78      * To signal the end of the document entity, this exception will be thrown.
  79      */
  80     private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
  81         private static final long serialVersionUID = 980337771224675268L;
  82         public Throwable fillInStackTrace() {
  83             return this;
  84         }
  85     };
  86 
  87     protected SymbolTable fSymbolTable = null;
  88     protected XMLErrorReporter fErrorReporter = null;
  89     int [] whiteSpaceLookup = new int[100];
  90     int whiteSpaceLen = 0;
  91     boolean whiteSpaceInfoNeeded = true;
  92 
  93     /**
  94      * Allow Java encoding names. This feature identifier is:
  95      * http://apache.org/xml/features/allow-java-encodings
  96      */
  97     protected boolean fAllowJavaEncodings;
  98 
  99     //Will be used only during internal subsets.
 100     //for appending data.
 101 
 102     /** Property identifier: symbol table. */
 103     protected static final String SYMBOL_TABLE =
 104             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
 105 
 106     /** Property identifier: error reporter. */
 107     protected static final String ERROR_REPORTER =
 108             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
 109 
 110     /** Feature identifier: allow Java encodings. */
 111     protected static final String ALLOW_JAVA_ENCODINGS =
 112             Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
 113 
 114     protected PropertyManager fPropertyManager = null ;
 115 
 116     boolean isExternal = false;
 117     static {
 118 
 119         for(int i=0x0041;i<=0x005A ; i++){
 120             VALID_NAMES[i]=true;
 121         }
 122         for(int i=0x0061;i<=0x007A; i++){
 123             VALID_NAMES[i]=true;
 124         }
 125         for(int i=0x0030;i<=0x0039; i++){
 126             VALID_NAMES[i]=true;
 127         }
 128         VALID_NAMES[45]=true;
 129         VALID_NAMES[46]=true;
 130         VALID_NAMES[58]=true;
 131         VALID_NAMES[95]=true;
 132     }
 133     // SAPJVM: Remember, that the XML version has explicitly been set,
 134     // so that XMLStreamReader.getVersion() can find that out.
 135     boolean xmlVersionSetExplicitly = false;
 136     //
 137     // Constructors
 138     //
 139 
 140     /** Default constructor. */
 141     public XMLEntityScanner() {
 142     } // <init>()
 143 
 144 
 145     /**  private constructor, this class can only be instantiated within this class. Instance of this class should
 146      *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
 147      *    @see getEntityScanner()
 148      *    @see getEntityScanner(ScannedEntity)
 149      */
 150     public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
 151         fEntityManager = entityManager ;
 152         reset(propertyManager);
 153     } // <init>()
 154 
 155 
 156     // set buffer size:
 157     public final void setBufferSize(int size) {
 158         // REVISIT: Buffer size passed to entity scanner
 159         // was not being kept in synch with the actual size
 160         // of the buffers in each scanned entity. If any
 161         // of the buffers were actually resized, it was possible
 162         // that the parser would throw an ArrayIndexOutOfBoundsException
 163         // for documents which contained names which are longer than
 164         // the current buffer size. Conceivably the buffer size passed
 165         // to entity scanner could be used to determine a minimum size
 166         // for resizing, if doubling its size is smaller than this
 167         // minimum. -- mrglavas
 168         fBufferSize = size;
 169     }
 170 
 171     /**
 172      * Resets the components.
 173      */
 174     public void reset(PropertyManager propertyManager){
 175         fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
 176         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
 177         fCurrentEntity = null;
 178         whiteSpaceLen = 0;
 179         whiteSpaceInfoNeeded = true;
 180         listeners.clear();
 181     }
 182 
 183     /**
 184      * Resets the component. The component can query the component manager
 185      * about any features and properties that affect the operation of the
 186      * component.
 187      *
 188      * @param componentManager The component manager.
 189      *
 190      * @throws SAXException Thrown by component on initialization error.
 191      *                      For example, if a feature or property is
 192      *                      required for the operation of the component, the
 193      *                      component manager may throw a
 194      *                      SAXNotRecognizedException or a
 195      *                      SAXNotSupportedException.
 196      */
 197     public void reset(XMLComponentManager componentManager)
 198     throws XMLConfigurationException {
 199 
 200         //System.out.println(" this is being called");
 201         // xerces features
 202         fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
 203 
 204         //xerces properties
 205         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
 206         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
 207         fCurrentEntity = null;
 208         whiteSpaceLen = 0;
 209         whiteSpaceInfoNeeded = true;
 210         listeners.clear();
 211     } // reset(XMLComponentManager)
 212 
 213 
 214     public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
 215             XMLErrorReporter reporter) {
 216         fCurrentEntity = null;
 217         fSymbolTable = symbolTable;
 218         fEntityManager = entityManager;
 219         fErrorReporter = reporter;
 220     }
 221 
 222     /**
 223      * Returns the XML version of the current entity. This will normally be the
 224      * value from the XML or text declaration or defaulted by the parser. Note that
 225      * that this value may be different than the version of the processing rules
 226      * applied to the current entity. For instance, an XML 1.1 document may refer to
 227      * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
 228      * document. Also note that, for a given entity, this value can only be considered
 229      * final once the XML or text declaration has been read or once it has been
 230      * determined that there is no such declaration.
 231      */
 232     public final String getXMLVersion() {
 233         if (fCurrentEntity != null) {
 234             return fCurrentEntity.xmlVersion;
 235         }
 236         return null;
 237     } // getXMLVersion():String
 238 
 239     /**
 240      * Sets the XML version. This method is used by the
 241      * scanners to report the value of the version pseudo-attribute
 242      * in an XML or text declaration.
 243      *
 244      * @param xmlVersion the XML version of the current entity
 245      */
 246     public final void setXMLVersion(String xmlVersion) {
 247         xmlVersionSetExplicitly = true; // SAPJVM
 248         fCurrentEntity.xmlVersion = xmlVersion;
 249     } // setXMLVersion(String)
 250 
 251 
 252     /** set the instance of current scanned entity.
 253      *   @param ScannedEntity
 254      */
 255 
 256     public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
 257         fCurrentEntity = scannedEntity ;
 258         if(fCurrentEntity != null){
 259             isExternal = fCurrentEntity.isExternal();
 260             if(DEBUG_BUFFER)
 261                 System.out.println("Current Entity is "+scannedEntity.name);
 262         }
 263     }
 264 
 265     public  Entity.ScannedEntity getCurrentEntity(){
 266         return fCurrentEntity ;
 267     }
 268     //
 269     // XMLEntityReader methods
 270     //
 271 
 272     /**
 273      * Returns the base system identifier of the currently scanned
 274      * entity, or null if none is available.
 275      */
 276     public final String getBaseSystemId() {
 277         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 278     } // getBaseSystemId():String
 279 
 280     /**
 281      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
 282      */
 283     public void setBaseSystemId(String systemId) {
 284         //no-op
 285     }
 286 
 287     ///////////// Locator methods start.
 288     public final int getLineNumber(){
 289         //if the entity is closed, we should return -1
 290         //xxx at first place why such call should be there...
 291         return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
 292     }
 293 
 294     /**
 295      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
 296      */
 297     public void setLineNumber(int line) {
 298         //no-op
 299     }
 300 
 301 
 302     public final int getColumnNumber(){
 303         //if the entity is closed, we should return -1
 304         //xxx at first place why such call should be there...
 305         return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
 306     }
 307 
 308     /**
 309      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
 310      */
 311     public void setColumnNumber(int col) {
 312         // no-op
 313     }
 314 
 315 
 316     public final int getCharacterOffset(){
 317         return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
 318     }
 319 
 320     /** Returns the expanded system identifier.  */
 321     public final String getExpandedSystemId() {
 322         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 323     }
 324 
 325     /**
 326      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
 327      */
 328     public void setExpandedSystemId(String systemId) {
 329         //no-op
 330     }
 331 
 332     /** Returns the literal system identifier.  */
 333     public final String getLiteralSystemId() {
 334         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
 335     }
 336 
 337     /**
 338      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
 339      */
 340     public void setLiteralSystemId(String systemId) {
 341         //no-op
 342     }
 343 
 344     /** Returns the public identifier.  */
 345     public final String getPublicId() {
 346         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
 347     }
 348 
 349     /**
 350      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
 351      */
 352     public void setPublicId(String publicId) {
 353         //no-op
 354     }
 355 
 356     ///////////////// Locator methods finished.
 357 
 358     /** the version of the current entity being scanned */
 359     public void setVersion(String version){
 360         fCurrentEntity.version = version;
 361     }
 362 
 363     public String getVersion(){
 364         if (fCurrentEntity != null)
 365             return fCurrentEntity.version ;
 366         return null;
 367     }
 368 
 369     /**
 370      * Returns the encoding of the current entity.
 371      * Note that, for a given entity, this value can only be
 372      * considered final once the encoding declaration has been read (or once it
 373      * has been determined that there is no such declaration) since, no encoding
 374      * having been specified on the XMLInputSource, the parser
 375      * will make an initial "guess" which could be in error.
 376      */
 377     public final String getEncoding() {
 378         if (fCurrentEntity != null) {
 379             return fCurrentEntity.encoding;
 380         }
 381         return null;
 382     } // getEncoding():String
 383 
 384     /**
 385      * Sets the encoding of the scanner. This method is used by the
 386      * scanners if the XMLDecl or TextDecl line contains an encoding
 387      * pseudo-attribute.
 388      * <p>
 389      * <strong>Note:</strong> The underlying character reader on the
 390      * current entity will be changed to accomodate the new encoding.
 391      * However, the new encoding is ignored if the current reader was
 392      * not constructed from an input stream (e.g. an external entity
 393      * that is resolved directly to the appropriate java.io.Reader
 394      * object).
 395      *
 396      * @param encoding The IANA encoding name of the new encoding.
 397      *
 398      * @throws IOException Thrown if the new encoding is not supported.
 399      *
 400      * @see com.sun.org.apache.xerces.internal.util.EncodingMap
 401      */
 402     public final void setEncoding(String encoding) throws IOException {
 403 
 404         if (DEBUG_ENCODINGS) {
 405             System.out.println("$$$ setEncoding: "+encoding);
 406         }
 407 
 408         if (fCurrentEntity.stream != null) {
 409             // if the encoding is the same, don't change the reader and
 410             // re-use the original reader used by the OneCharReader
 411             // NOTE: Besides saving an object, this overcomes deficiencies
 412             //       in the UTF-16 reader supplied with the standard Java
 413             //       distribution (up to and including 1.3). The UTF-16
 414             //       decoder buffers 8K blocks even when only asked to read
 415             //       a single char! -Ac
 416             if (fCurrentEntity.encoding == null ||
 417                     !fCurrentEntity.encoding.equals(encoding)) {
 418                 // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
 419                 // and we know the endian-ness, we shouldn't change readers.
 420                 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
 421                 // the endian-ness from the encoding we presently have.
 422                 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
 423                     String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
 424                     if(ENCODING.equals("UTF-16")) return;
 425                     if(ENCODING.equals("ISO-10646-UCS-4")) {
 426                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 427                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
 428                         } else {
 429                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
 430                         }
 431                         return;
 432                     }
 433                     if(ENCODING.equals("ISO-10646-UCS-2")) {
 434                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 435                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
 436                         } else {
 437                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
 438                         }
 439                         return;
 440                     }
 441                 }
 442                 // wrap a new reader around the input stream, changing
 443                 // the encoding
 444                 if (DEBUG_ENCODINGS) {
 445                     System.out.println("$$$ creating new reader from stream: "+
 446                             fCurrentEntity.stream);
 447                 }
 448                 //fCurrentEntity.stream.reset();
 449                 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
 450                 fCurrentEntity.encoding = encoding;
 451 
 452             } else {
 453                 if (DEBUG_ENCODINGS)
 454                     System.out.println("$$$ reusing old reader on stream");
 455             }
 456         }
 457 
 458     } // setEncoding(String)
 459 
 460     /** Returns true if the current entity being scanned is external. */
 461     public final boolean isExternal() {
 462         return fCurrentEntity.isExternal();
 463     } // isExternal():boolean
 464 
 465     public int getChar(int relative) throws IOException{
 466         if(arrangeCapacity(relative + 1, false)){
 467             return fCurrentEntity.ch[fCurrentEntity.position + relative];
 468         }else{
 469             return -1;
 470         }
 471     }//getChar()
 472 
 473     /**
 474      * Returns the next character on the input.
 475      * <p>
 476      * <strong>Note:</strong> The character is <em>not</em> consumed.
 477      *
 478      * @throws IOException  Thrown if i/o error occurs.
 479      * @throws EOFException Thrown on end of file.
 480      */
 481     public int peekChar() throws IOException {
 482         if (DEBUG_BUFFER) {
 483             System.out.print("(peekChar: ");
 484             print();
 485             System.out.println();
 486         }
 487 
 488         // load more characters, if needed
 489         if (fCurrentEntity.position == fCurrentEntity.count) {
 490             load(0, true, true);
 491         }
 492 
 493         // peek at character
 494         int c = fCurrentEntity.ch[fCurrentEntity.position];
 495 
 496         // return peeked character
 497         if (DEBUG_BUFFER) {
 498             System.out.print(")peekChar: ");
 499             print();
 500             if (isExternal) {
 501                 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
 502             } else {
 503                 System.out.println(" -> '"+(char)c+"'");
 504             }
 505         }
 506         if (isExternal) {
 507             return c != '\r' ? c : '\n';
 508         } else {
 509             return c;
 510         }
 511 
 512     } // peekChar():int
 513 
 514     /**
 515      * Returns the next character on the input.
 516      * <p>
 517      * <strong>Note:</strong> The character is consumed.
 518      *
 519      * @throws IOException  Thrown if i/o error occurs.
 520      * @throws EOFException Thrown on end of file.
 521      */
 522     public int scanChar() throws IOException {
 523         if (DEBUG_BUFFER) {
 524             System.out.print("(scanChar: ");
 525             print();
 526             System.out.println();
 527         }
 528 
 529         // load more characters, if needed
 530         if (fCurrentEntity.position == fCurrentEntity.count) {
 531             load(0, true, true);
 532         }
 533 
 534         // scan character
 535         int c = fCurrentEntity.ch[fCurrentEntity.position++];
 536         if (c == '\n' ||
 537                 (c == '\r' && isExternal)) {
 538             fCurrentEntity.lineNumber++;
 539             fCurrentEntity.columnNumber = 1;
 540             if (fCurrentEntity.position == fCurrentEntity.count) {
 541                 fCurrentEntity.ch[0] = (char)c;
 542                 load(1, false, true);
 543             }
 544             if (c == '\r' && isExternal) {
 545                 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
 546                     fCurrentEntity.position--;
 547                 }
 548                 c = '\n';
 549             }
 550         }
 551 
 552         // return character that was scanned
 553         if (DEBUG_BUFFER) {
 554             System.out.print(")scanChar: ");
 555             print();
 556             System.out.println(" -> '"+(char)c+"'");
 557         }
 558         fCurrentEntity.columnNumber++;
 559         return c;
 560 
 561     } // scanChar():int
 562 
 563     /**
 564      * Returns a string matching the NMTOKEN production appearing immediately
 565      * on the input as a symbol, or null if NMTOKEN Name string is present.
 566      * <p>
 567      * <strong>Note:</strong> The NMTOKEN characters are consumed.
 568      * <p>
 569      * <strong>Note:</strong> The string returned must be a symbol. The
 570      * SymbolTable can be used for this purpose.
 571      *
 572      * @throws IOException  Thrown if i/o error occurs.
 573      * @throws EOFException Thrown on end of file.
 574      *
 575      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 576      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 577      */
 578     public String scanNmtoken() throws IOException {
 579         if (DEBUG_BUFFER) {
 580             System.out.print("(scanNmtoken: ");
 581             print();
 582             System.out.println();
 583         }
 584 
 585         // load more characters, if needed
 586         if (fCurrentEntity.position == fCurrentEntity.count) {
 587             load(0, true, true);
 588         }
 589 
 590         // scan nmtoken
 591         int offset = fCurrentEntity.position;
 592         boolean vc = false;
 593         char c;
 594         while (true){
 595             //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
 596             c = fCurrentEntity.ch[fCurrentEntity.position];
 597             if(c < 127){
 598                 vc = VALID_NAMES[c];
 599             }else{
 600                 vc = XMLChar.isName(c);
 601             }
 602             if(!vc)break;
 603 
 604             if (++fCurrentEntity.position == fCurrentEntity.count) {
 605                 int length = fCurrentEntity.position - offset;
 606                 invokeListeners(length);
 607                 if (length == fCurrentEntity.fBufferSize) {
 608                     // bad luck we have to resize our buffer
 609                     char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 610                     System.arraycopy(fCurrentEntity.ch, offset,
 611                             tmp, 0, length);
 612                     fCurrentEntity.ch = tmp;
 613                     fCurrentEntity.fBufferSize *= 2;
 614                 } else {
 615                     System.arraycopy(fCurrentEntity.ch, offset,
 616                             fCurrentEntity.ch, 0, length);
 617                 }
 618                 offset = 0;
 619                 if (load(length, false, false)) {
 620                     break;
 621                 }
 622             }
 623         }
 624         int length = fCurrentEntity.position - offset;
 625         fCurrentEntity.columnNumber += length;
 626 
 627         // return nmtoken
 628         String symbol = null;
 629         if (length > 0) {
 630             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 631         }
 632         if (DEBUG_BUFFER) {
 633             System.out.print(")scanNmtoken: ");
 634             print();
 635             System.out.println(" -> "+String.valueOf(symbol));
 636         }
 637         return symbol;
 638 
 639     } // scanNmtoken():String
 640 
 641     /**
 642      * Returns a string matching the Name production appearing immediately
 643      * on the input as a symbol, or null if no Name string is present.
 644      * <p>
 645      * <strong>Note:</strong> The Name characters are consumed.
 646      * <p>
 647      * <strong>Note:</strong> The string returned must be a symbol. The
 648      * SymbolTable can be used for this purpose.
 649      *
 650      * @throws IOException  Thrown if i/o error occurs.
 651      * @throws EOFException Thrown on end of file.
 652      *
 653      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 654      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 655      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 656      */
 657     public String scanName() throws IOException {
 658         if (DEBUG_BUFFER) {
 659             System.out.print("(scanName: ");
 660             print();
 661             System.out.println();
 662         }
 663 
 664         // load more characters, if needed
 665         if (fCurrentEntity.position == fCurrentEntity.count) {
 666             load(0, true, true);
 667         }
 668 
 669         // scan name
 670         int offset = fCurrentEntity.position;
 671         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 672             if (++fCurrentEntity.position == fCurrentEntity.count) {
 673                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 674                 offset = 0;
 675                 if (load(1, false, true)) {
 676                     fCurrentEntity.columnNumber++;
 677                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 678 
 679                     if (DEBUG_BUFFER) {
 680                         System.out.print(")scanName: ");
 681                         print();
 682                         System.out.println(" -> "+String.valueOf(symbol));
 683                     }
 684                     return symbol;
 685                 }
 686             }
 687             boolean vc =false;
 688             while (true ){
 689                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 690                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 691                 if(c < 127){
 692                     vc = VALID_NAMES[c];
 693                 }else{
 694                     vc = XMLChar.isName(c);
 695                 }
 696                 if(!vc)break;
 697                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 698                     int length = fCurrentEntity.position - offset;
 699                     invokeListeners(length);
 700                     if (length == fCurrentEntity.fBufferSize) {
 701                         // bad luck we have to resize our buffer
 702                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 703                         System.arraycopy(fCurrentEntity.ch, offset,
 704                                 tmp, 0, length);
 705                         fCurrentEntity.ch = tmp;
 706                         fCurrentEntity.fBufferSize *= 2;
 707                     } else {
 708                         System.arraycopy(fCurrentEntity.ch, offset,
 709                                 fCurrentEntity.ch, 0, length);
 710                     }
 711                     offset = 0;
 712                     if (load(length, false, false)) {
 713                         break;
 714                     }
 715                 }
 716             }
 717         }
 718         int length = fCurrentEntity.position - offset;
 719         fCurrentEntity.columnNumber += length;
 720 
 721         // return name
 722         String symbol;
 723         if (length > 0) {
 724             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 725         } else
 726             symbol = null;
 727         if (DEBUG_BUFFER) {
 728             System.out.print(")scanName: ");
 729             print();
 730             System.out.println(" -> "+String.valueOf(symbol));
 731         }
 732         return symbol;
 733 
 734     } // scanName():String
 735 
 736     /**
 737      * Scans a qualified name from the input, setting the fields of the
 738      * QName structure appropriately.
 739      * <p>
 740      * <strong>Note:</strong> The qualified name characters are consumed.
 741      * <p>
 742      * <strong>Note:</strong> The strings used to set the values of the
 743      * QName structure must be symbols. The SymbolTable can be used for
 744      * this purpose.
 745      *
 746      * @param qname The qualified name structure to fill.
 747      *
 748      * @return Returns true if a qualified name appeared immediately on
 749      *         the input and was scanned, false otherwise.
 750      *
 751      * @throws IOException  Thrown if i/o error occurs.
 752      * @throws EOFException Thrown on end of file.
 753      *
 754      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 755      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 756      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 757      */
 758     public boolean scanQName(QName qname) throws IOException {
 759         if (DEBUG_BUFFER) {
 760             System.out.print("(scanQName, "+qname+": ");
 761             print();
 762             System.out.println();
 763         }
 764 
 765         // load more characters, if needed
 766         if (fCurrentEntity.position == fCurrentEntity.count) {
 767             load(0, true, true);
 768         }
 769 
 770         // scan qualified name
 771         int offset = fCurrentEntity.position;
 772 
 773         //making a check if if the specified character is a valid name start character
 774         //as defined by production [5] in the XML 1.0 specification.
 775         // Name ::= (Letter | '_' | ':') (NameChar)*
 776 
 777         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 778             if (++fCurrentEntity.position == fCurrentEntity.count) {
 779                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 780                 offset = 0;
 781 
 782                 if (load(1, false, true)) {
 783                     fCurrentEntity.columnNumber++;
 784                     //adding into symbol table.
 785                     //XXX We are trying to add single character in SymbolTable??????
 786                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 787                     qname.setValues(null, name, name, null);
 788                     if (DEBUG_BUFFER) {
 789                         System.out.print(")scanQName, "+qname+": ");
 790                         print();
 791                         System.out.println(" -> true");
 792                     }
 793                     return true;
 794                 }
 795             }
 796             int index = -1;
 797             boolean vc = false;
 798             while ( true){
 799 
 800                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 801                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 802                 if(c < 127){
 803                     vc = VALID_NAMES[c];
 804                 }else{
 805                     vc = XMLChar.isName(c);
 806                 }
 807                 if(!vc)break;
 808                 if (c == ':') {
 809                     if (index != -1) {
 810                         break;
 811                     }
 812                     index = fCurrentEntity.position;
 813                 }
 814                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 815                     int length = fCurrentEntity.position - offset;
 816                     invokeListeners(length);
 817                     if (length == fCurrentEntity.fBufferSize) {
 818                         // bad luck we have to resize our buffer
 819                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 820                         System.arraycopy(fCurrentEntity.ch, offset,
 821                                 tmp, 0, length);
 822                         fCurrentEntity.ch = tmp;
 823                         fCurrentEntity.fBufferSize *= 2;
 824                     } else {
 825                         System.arraycopy(fCurrentEntity.ch, offset,
 826                                 fCurrentEntity.ch, 0, length);
 827                     }
 828                     if (index != -1) {
 829                         index = index - offset;
 830                     }
 831                     offset = 0;
 832                     if (load(length, false, false)) {
 833                         break;
 834                     }
 835                 }
 836             }
 837             int length = fCurrentEntity.position - offset;
 838             fCurrentEntity.columnNumber += length;
 839             if (length > 0) {
 840                 String prefix = null;
 841                 String localpart = null;
 842                 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
 843                         offset, length);
 844 
 845                 if (index != -1) {
 846                     int prefixLength = index - offset;
 847                     prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
 848                             offset, prefixLength);
 849                     int len = length - prefixLength - 1;
 850                     localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
 851                             index + 1, len);
 852 
 853                 } else {
 854                     localpart = rawname;
 855                 }
 856                 qname.setValues(prefix, localpart, rawname, null);
 857                 if (DEBUG_BUFFER) {
 858                     System.out.print(")scanQName, "+qname+": ");
 859                     print();
 860                     System.out.println(" -> true");
 861                 }
 862                 return true;
 863             }
 864         }
 865 
 866         // no qualified name found
 867         if (DEBUG_BUFFER) {
 868             System.out.print(")scanQName, "+qname+": ");
 869             print();
 870             System.out.println(" -> false");
 871         }
 872         return false;
 873 
 874     } // scanQName(QName):boolean
 875 
 876     /**
 877      * CHANGED:
 878      * Scans a range of parsed character data, This function appends the character data to
 879      * the supplied buffer.
 880      * <p>
 881      * <strong>Note:</strong> The characters are consumed.
 882      * <p>
 883      * <strong>Note:</strong> This method does not guarantee to return
 884      * the longest run of parsed character data. This method may return
 885      * before markup due to reaching the end of the input buffer or any
 886      * other reason.
 887      * <p>
 888      *
 889      * @param content The content structure to fill.
 890      *
 891      * @return Returns the next character on the input, if known. This
 892      *         value may be -1 but this does <em>note</em> designate
 893      *         end of file.
 894      *
 895      * @throws IOException  Thrown if i/o error occurs.
 896      * @throws EOFException Thrown on end of file.
 897      */
 898     public int scanContent(XMLString content) throws IOException {
 899         if (DEBUG_BUFFER) {
 900             System.out.print("(scanContent: ");
 901             print();
 902             System.out.println();
 903         }
 904 
 905         // load more characters, if needed
 906         if (fCurrentEntity.position == fCurrentEntity.count) {
 907             load(0, true, true);
 908         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 909             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 910             load(1, false, true);
 911             fCurrentEntity.position = 0;
 912         }
 913 
 914         // normalize newlines
 915         int offset = fCurrentEntity.position;
 916         int c = fCurrentEntity.ch[offset];
 917         int newlines = 0;
 918         if (c == '\n' || (c == '\r' && isExternal)) {
 919             if (DEBUG_BUFFER) {
 920                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 921                 print();
 922                 System.out.println();
 923             }
 924             do {
 925                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 926                 if (c == '\r' && isExternal) {
 927                     newlines++;
 928                     fCurrentEntity.lineNumber++;
 929                     fCurrentEntity.columnNumber = 1;
 930                     if (fCurrentEntity.position == fCurrentEntity.count) {
 931                         offset = 0;
 932                         fCurrentEntity.position = newlines;
 933                         if (load(newlines, false, true)) {
 934                             break;
 935                         }
 936                     }
 937                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 938                         fCurrentEntity.position++;
 939                         offset++;
 940                     }
 941                     /*** NEWLINE NORMALIZATION ***/
 942                     else {
 943                         newlines++;
 944                     }
 945                 } else if (c == '\n') {
 946                     newlines++;
 947                     fCurrentEntity.lineNumber++;
 948                     fCurrentEntity.columnNumber = 1;
 949                     if (fCurrentEntity.position == fCurrentEntity.count) {
 950                         offset = 0;
 951                         fCurrentEntity.position = newlines;
 952                         if (load(newlines, false, true)) {
 953                             break;
 954                         }
 955                     }
 956                 } else {
 957                     fCurrentEntity.position--;
 958                     break;
 959                 }
 960             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 961             for (int i = offset; i < fCurrentEntity.position; i++) {
 962                 fCurrentEntity.ch[i] = '\n';
 963             }
 964             int length = fCurrentEntity.position - offset;
 965             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 966                 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
 967                 //on buffering the data..
 968                 content.setValues(fCurrentEntity.ch, offset, length);
 969                 //content.append(fCurrentEntity.ch, offset, length);
 970                 if (DEBUG_BUFFER) {
 971                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 972                     print();
 973                     System.out.println();
 974                 }
 975                 return -1;
 976             }
 977             if (DEBUG_BUFFER) {
 978                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 979                 print();
 980                 System.out.println();
 981             }
 982         }
 983 
 984         while (fCurrentEntity.position < fCurrentEntity.count) {
 985             c = fCurrentEntity.ch[fCurrentEntity.position++];
 986             if (!XMLChar.isContent(c)) {
 987                 fCurrentEntity.position--;
 988                 break;
 989             }
 990         }
 991         int length = fCurrentEntity.position - offset;
 992         fCurrentEntity.columnNumber += length - newlines;
 993 
 994         //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
 995         //on buffering the data..
 996         content.setValues(fCurrentEntity.ch, offset, length);
 997         //content.append(fCurrentEntity.ch, offset, length);
 998         // return next character
 999         if (fCurrentEntity.position != fCurrentEntity.count) {
1000             c = fCurrentEntity.ch[fCurrentEntity.position];
1001             // REVISIT: Does this need to be updated to fix the
1002             //          #x0D ^#x0A newline normalization problem? -Ac
1003             if (c == '\r' && isExternal) {
1004                 c = '\n';
1005             }
1006         } else {
1007             c = -1;
1008         }
1009         if (DEBUG_BUFFER) {
1010             System.out.print(")scanContent: ");
1011             print();
1012             System.out.println(" -> '"+(char)c+"'");
1013         }
1014         return c;
1015 
1016     } // scanContent(XMLString):int
1017 
1018     /**
1019      * Scans a range of attribute value data, setting the fields of the
1020      * XMLString structure, appropriately.
1021      * <p>
1022      * <strong>Note:</strong> The characters are consumed.
1023      * <p>
1024      * <strong>Note:</strong> This method does not guarantee to return
1025      * the longest run of attribute value data. This method may return
1026      * before the quote character due to reaching the end of the input
1027      * buffer or any other reason.
1028      * <p>
1029      * <strong>Note:</strong> The fields contained in the XMLString
1030      * structure are not guaranteed to remain valid upon subsequent calls
1031      * to the entity scanner. Therefore, the caller is responsible for
1032      * immediately using the returned character data or making a copy of
1033      * the character data.
1034      *
1035      * @param quote   The quote character that signifies the end of the
1036      *                attribute value data.
1037      * @param content The content structure to fill.
1038      *
1039      * @return Returns the next character on the input, if known. This
1040      *         value may be -1 but this does <em>note</em> designate
1041      *         end of file.
1042      *
1043      * @throws IOException  Thrown if i/o error occurs.
1044      * @throws EOFException Thrown on end of file.
1045      */
1046     public int scanLiteral(int quote, XMLString content)
1047     throws IOException {
1048         if (DEBUG_BUFFER) {
1049             System.out.print("(scanLiteral, '"+(char)quote+"': ");
1050             print();
1051             System.out.println();
1052         }
1053         // load more characters, if needed
1054         if (fCurrentEntity.position == fCurrentEntity.count) {
1055             load(0, true, true);
1056         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1057             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1058             load(1, false, true);
1059             fCurrentEntity.position = 0;
1060         }
1061 
1062         // normalize newlines
1063         int offset = fCurrentEntity.position;
1064         int c = fCurrentEntity.ch[offset];
1065         int newlines = 0;
1066         if(whiteSpaceInfoNeeded)
1067             whiteSpaceLen=0;
1068         if (c == '\n' || (c == '\r' && isExternal)) {
1069             if (DEBUG_BUFFER) {
1070                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1071                 print();
1072                 System.out.println();
1073             }
1074             do {
1075                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1076                 if (c == '\r' && isExternal) {
1077                     newlines++;
1078                     fCurrentEntity.lineNumber++;
1079                     fCurrentEntity.columnNumber = 1;
1080                     if (fCurrentEntity.position == fCurrentEntity.count) {
1081                         offset = 0;
1082                         fCurrentEntity.position = newlines;
1083                         if (load(newlines, false, true)) {
1084                             break;
1085                         }
1086                     }
1087                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1088                         fCurrentEntity.position++;
1089                         offset++;
1090                     }
1091                     /*** NEWLINE NORMALIZATION ***/
1092                     else {
1093                         newlines++;
1094                     }
1095                     /***/
1096                 } else if (c == '\n') {
1097                     newlines++;
1098                     fCurrentEntity.lineNumber++;
1099                     fCurrentEntity.columnNumber = 1;
1100                     if (fCurrentEntity.position == fCurrentEntity.count) {
1101                         offset = 0;
1102                         fCurrentEntity.position = newlines;
1103                         if (load(newlines, false, true)) {
1104                             break;
1105                         }
1106                     }
1107                     /*** NEWLINE NORMALIZATION ***
1108                      * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
1109                      * && external) {
1110                      * fCurrentEntity.position++;
1111                      * offset++;
1112                      * }
1113                      * /***/
1114                 } else {
1115                     fCurrentEntity.position--;
1116                     break;
1117                 }
1118             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1119             int i=0;
1120             for ( i = offset; i < fCurrentEntity.position; i++) {
1121                 fCurrentEntity.ch[i] = '\n';
1122                 storeWhiteSpace(i);
1123             }
1124 
1125             int length = fCurrentEntity.position - offset;
1126             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1127                 content.setValues(fCurrentEntity.ch, offset, length);
1128                 if (DEBUG_BUFFER) {
1129                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1130                     print();
1131                     System.out.println();
1132                 }
1133                 return -1;
1134             }
1135             if (DEBUG_BUFFER) {
1136                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1137                 print();
1138                 System.out.println();
1139             }
1140         }
1141 
1142         // scan literal value
1143         for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
1144             c = fCurrentEntity.ch[fCurrentEntity.position];
1145             if ((c == quote &&
1146                     (!fCurrentEntity.literal || isExternal)) ||
1147                     c == '%' || !XMLChar.isContent(c)) {
1148                 break;
1149             }
1150             if (whiteSpaceInfoNeeded && c == '\t') {
1151                 storeWhiteSpace(fCurrentEntity.position);
1152             }
1153         }
1154         int length = fCurrentEntity.position - offset;
1155         fCurrentEntity.columnNumber += length - newlines;
1156         content.setValues(fCurrentEntity.ch, offset, length);
1157 
1158         // return next character
1159         if (fCurrentEntity.position != fCurrentEntity.count) {
1160             c = fCurrentEntity.ch[fCurrentEntity.position];
1161             // NOTE: We don't want to accidentally signal the
1162             //       end of the literal if we're expanding an
1163             //       entity appearing in the literal. -Ac
1164             if (c == quote && fCurrentEntity.literal) {
1165                 c = -1;
1166             }
1167         } else {
1168             c = -1;
1169         }
1170         if (DEBUG_BUFFER) {
1171             System.out.print(")scanLiteral, '"+(char)quote+"': ");
1172             print();
1173             System.out.println(" -> '"+(char)c+"'");
1174         }
1175         return c;
1176 
1177     } // scanLiteral(int,XMLString):int
1178 
1179     /**
1180      * Save whitespace information. Increase the whitespace buffer by 100
1181      * when needed.
1182      *
1183      * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR).
1184      *
1185      * @param whiteSpacePos position of a whitespace in the scanner entity buffer
1186      */
1187     private void storeWhiteSpace(int whiteSpacePos) {
1188         if (whiteSpaceLen >= whiteSpaceLookup.length) {
1189             int [] tmp = new int[whiteSpaceLookup.length + 100];
1190             System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
1191             whiteSpaceLookup = tmp;
1192         }
1193 
1194         whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos;
1195     }
1196 
1197     //CHANGED:
1198     /**
1199      * Scans a range of character data up to the specified delimiter,
1200      * setting the fields of the XMLString structure, appropriately.
1201      * <p>
1202      * <strong>Note:</strong> The characters are consumed.
1203      * <p>
1204      * <strong>Note:</strong> This assumes that the length of the delimiter
1205      * and that the delimiter contains at least one character.
1206      * <p>
1207      * <strong>Note:</strong> This method does not guarantee to return
1208      * the longest run of character data. This method may return before
1209      * the delimiter due to reaching the end of the input buffer or any
1210      * other reason.
1211      * <p>
1212      * @param delimiter The string that signifies the end of the character
1213      *                  data to be scanned.
1214      * @param buffer    The XMLStringBuffer to fill.
1215      *
1216      * @return Returns true if there is more data to scan, false otherwise.
1217      *
1218      * @throws IOException  Thrown if i/o error occurs.
1219      * @throws EOFException Thrown on end of file.
1220      */
1221     public boolean scanData(String delimiter, XMLStringBuffer buffer)
1222     throws IOException {
1223 
1224         boolean done = false;
1225         int delimLen = delimiter.length();
1226         char charAt0 = delimiter.charAt(0);
1227         do {
1228             if (DEBUG_BUFFER) {
1229                 System.out.print("(scanData: ");
1230                 print();
1231                 System.out.println();
1232             }
1233 
1234             // load more characters, if needed
1235 
1236             if (fCurrentEntity.position == fCurrentEntity.count) {
1237                 load(0, true, false);
1238             }
1239 
1240             boolean bNextEntity = false;
1241 
1242             while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
1243                 && (!bNextEntity))
1244             {
1245               System.arraycopy(fCurrentEntity.ch,
1246                                fCurrentEntity.position,
1247                                fCurrentEntity.ch,
1248                                0,
1249                                fCurrentEntity.count - fCurrentEntity.position);
1250 
1251               bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false);
1252               fCurrentEntity.position = 0;
1253               fCurrentEntity.startPosition = 0;
1254             }
1255 
1256             if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1257                 // something must be wrong with the input:  e.g., file ends in an unterminated comment
1258                 int length = fCurrentEntity.count - fCurrentEntity.position;
1259                 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1260                 fCurrentEntity.columnNumber += fCurrentEntity.count;
1261                 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1262                 fCurrentEntity.position = fCurrentEntity.count;
1263                 fCurrentEntity.startPosition = fCurrentEntity.count;
1264                 load(0, true, false);
1265                 return false;
1266             }
1267 
1268             // normalize newlines
1269             int offset = fCurrentEntity.position;
1270             int c = fCurrentEntity.ch[offset];
1271             int newlines = 0;
1272             if (c == '\n' || (c == '\r' && isExternal)) {
1273                 if (DEBUG_BUFFER) {
1274                     System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1275                     print();
1276                     System.out.println();
1277                 }
1278                 do {
1279                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1280                     if (c == '\r' && isExternal) {
1281                         newlines++;
1282                         fCurrentEntity.lineNumber++;
1283                         fCurrentEntity.columnNumber = 1;
1284                         if (fCurrentEntity.position == fCurrentEntity.count) {
1285                             offset = 0;
1286                             fCurrentEntity.position = newlines;
1287                             if (load(newlines, false, true)) {
1288                                 break;
1289                             }
1290                         }
1291                         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1292                             fCurrentEntity.position++;
1293                             offset++;
1294                         }
1295                         /*** NEWLINE NORMALIZATION ***/
1296                         else {
1297                             newlines++;
1298                         }
1299                     } else if (c == '\n') {
1300                         newlines++;
1301                         fCurrentEntity.lineNumber++;
1302                         fCurrentEntity.columnNumber = 1;
1303                         if (fCurrentEntity.position == fCurrentEntity.count) {
1304                             offset = 0;
1305                             fCurrentEntity.position = newlines;
1306                             fCurrentEntity.count = newlines;
1307                             if (load(newlines, false, true)) {
1308                                 break;
1309                             }
1310                         }
1311                     } else {
1312                         fCurrentEntity.position--;
1313                         break;
1314                     }
1315                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1316                 for (int i = offset; i < fCurrentEntity.position; i++) {
1317                     fCurrentEntity.ch[i] = '\n';
1318                 }
1319                 int length = fCurrentEntity.position - offset;
1320                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1321                     buffer.append(fCurrentEntity.ch, offset, length);
1322                     if (DEBUG_BUFFER) {
1323                         System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1324                         print();
1325                         System.out.println();
1326                     }
1327                     return true;
1328                 }
1329                 if (DEBUG_BUFFER) {
1330                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1331                     print();
1332                     System.out.println();
1333                 }
1334             }
1335 
1336             // iterate over buffer looking for delimiter
1337             OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1338                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1339                 if (c == charAt0) {
1340                     // looks like we just hit the delimiter
1341                     int delimOffset = fCurrentEntity.position - 1;
1342                     for (int i = 1; i < delimLen; i++) {
1343                         if (fCurrentEntity.position == fCurrentEntity.count) {
1344                             fCurrentEntity.position -= i;
1345                             break OUTER;
1346                         }
1347                         c = fCurrentEntity.ch[fCurrentEntity.position++];
1348                         if (delimiter.charAt(i) != c) {
1349                             fCurrentEntity.position -= i;
1350                             break;
1351                         }
1352                     }
1353                     if (fCurrentEntity.position == delimOffset + delimLen) {
1354                         done = true;
1355                         break;
1356                     }
1357                 } else if (c == '\n' || (isExternal && c == '\r')) {
1358                     fCurrentEntity.position--;
1359                     break;
1360                 } else if (XMLChar.isInvalid(c)) {
1361                     fCurrentEntity.position--;
1362                     int length = fCurrentEntity.position - offset;
1363                     fCurrentEntity.columnNumber += length - newlines;
1364                     buffer.append(fCurrentEntity.ch, offset, length);
1365                     return true;
1366                 }
1367             }
1368             int length = fCurrentEntity.position - offset;
1369             fCurrentEntity.columnNumber += length - newlines;
1370             if (done) {
1371                 length -= delimLen;
1372             }
1373             buffer.append(fCurrentEntity.ch, offset, length);
1374 
1375             // return true if string was skipped
1376             if (DEBUG_BUFFER) {
1377                 System.out.print(")scanData: ");
1378                 print();
1379                 System.out.println(" -> " + done);
1380             }
1381         } while (!done);
1382         return !done;
1383 
1384     } // scanData(String,XMLString)
1385 
1386     /**
1387      * Skips a character appearing immediately on the input.
1388      * <p>
1389      * <strong>Note:</strong> The character is consumed only if it matches
1390      * the specified character.
1391      *
1392      * @param c The character to skip.
1393      *
1394      * @return Returns true if the character was skipped.
1395      *
1396      * @throws IOException  Thrown if i/o error occurs.
1397      * @throws EOFException Thrown on end of file.
1398      */
1399     public boolean skipChar(int c) throws IOException {
1400         if (DEBUG_BUFFER) {
1401             System.out.print("(skipChar, '"+(char)c+"': ");
1402             print();
1403             System.out.println();
1404         }
1405 
1406         // load more characters, if needed
1407         if (fCurrentEntity.position == fCurrentEntity.count) {
1408             load(0, true, true);
1409         }
1410 
1411         // skip character
1412         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1413         if (cc == c) {
1414             fCurrentEntity.position++;
1415             if (c == '\n') {
1416                 fCurrentEntity.lineNumber++;
1417                 fCurrentEntity.columnNumber = 1;
1418             } else {
1419                 fCurrentEntity.columnNumber++;
1420             }
1421             if (DEBUG_BUFFER) {
1422                 System.out.print(")skipChar, '"+(char)c+"': ");
1423                 print();
1424                 System.out.println(" -> true");
1425             }
1426             return true;
1427         } else if (c == '\n' && cc == '\r' && isExternal) {
1428             // handle newlines
1429             if (fCurrentEntity.position == fCurrentEntity.count) {
1430                 fCurrentEntity.ch[0] = (char)cc;
1431                 load(1, false, true);
1432             }
1433             fCurrentEntity.position++;
1434             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1435                 fCurrentEntity.position++;
1436             }
1437             fCurrentEntity.lineNumber++;
1438             fCurrentEntity.columnNumber = 1;
1439             if (DEBUG_BUFFER) {
1440                 System.out.print(")skipChar, '"+(char)c+"': ");
1441                 print();
1442                 System.out.println(" -> true");
1443             }
1444             return true;
1445         }
1446 
1447         // character was not skipped
1448         if (DEBUG_BUFFER) {
1449             System.out.print(")skipChar, '"+(char)c+"': ");
1450             print();
1451             System.out.println(" -> false");
1452         }
1453         return false;
1454 
1455     } // skipChar(int):boolean
1456 
1457     public boolean isSpace(char ch){
1458         return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
1459     }
1460     /**
1461      * Skips space characters appearing immediately on the input.
1462      * <p>
1463      * <strong>Note:</strong> The characters are consumed only if they are
1464      * space characters.
1465      *
1466      * @return Returns true if at least one space character was skipped.
1467      *
1468      * @throws IOException  Thrown if i/o error occurs.
1469      * @throws EOFException Thrown on end of file.
1470      *
1471      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1472      */
1473     public boolean skipSpaces() throws IOException {
1474         if (DEBUG_BUFFER) {
1475             System.out.print("(skipSpaces: ");
1476             print();
1477             System.out.println();
1478         }
1479         //boolean entityChanged = false;
1480         // load more characters, if needed
1481         if (fCurrentEntity.position == fCurrentEntity.count) {
1482             load(0, true, true);
1483         }
1484 
1485         //we are doing this check only in skipSpace() because it is called by
1486         //fMiscDispatcher and we want the parser to exit gracefully when document
1487         //is well-formed.
1488         //it is possible that end of document is reached and
1489         //fCurrentEntity becomes null
1490         //nothing was read so entity changed  'false' should be returned.
1491         if(fCurrentEntity == null){
1492             return false ;
1493         }
1494 
1495         // skip spaces
1496         int c = fCurrentEntity.ch[fCurrentEntity.position];
1497         if (XMLChar.isSpace(c)) {
1498             do {
1499                 boolean entityChanged = false;
1500                 // handle newlines
1501                 if (c == '\n' || (isExternal && c == '\r')) {
1502                     fCurrentEntity.lineNumber++;
1503                     fCurrentEntity.columnNumber = 1;
1504                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1505                         fCurrentEntity.ch[0] = (char)c;
1506                         entityChanged = load(1, true, true);
1507                         if (!entityChanged){
1508                             // the load change the position to be 1,
1509                             // need to restore it when entity not changed
1510                             fCurrentEntity.position = 0;
1511                         }else if(fCurrentEntity == null){
1512                             return true ;
1513                         }
1514                     }
1515                     if (c == '\r' && isExternal) {
1516                         // REVISIT: Does this need to be updated to fix the
1517                         //          #x0D ^#x0A newline normalization problem? -Ac
1518                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1519                             fCurrentEntity.position--;
1520                         }
1521                     }
1522                 } else {
1523                     fCurrentEntity.columnNumber++;
1524                 }
1525                 // load more characters, if needed
1526                 if (!entityChanged){
1527                     fCurrentEntity.position++;
1528                 }
1529 
1530                 if (fCurrentEntity.position == fCurrentEntity.count) {
1531                     load(0, true, true);
1532 
1533                     //we are doing this check only in skipSpace() because it is called by
1534                     //fMiscDispatcher and we want the parser to exit gracefully when document
1535                     //is well-formed.
1536 
1537                     //it is possible that end of document is reached and
1538                     //fCurrentEntity becomes null
1539                     //nothing was read so entity changed  'false' should be returned.
1540                     if(fCurrentEntity == null){
1541                         return true ;
1542                     }
1543 
1544                 }
1545             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1546             if (DEBUG_BUFFER) {
1547                 System.out.print(")skipSpaces: ");
1548                 print();
1549                 System.out.println(" -> true");
1550             }
1551             return true;
1552         }
1553 
1554         // no spaces were found
1555         if (DEBUG_BUFFER) {
1556             System.out.print(")skipSpaces: ");
1557             print();
1558             System.out.println(" -> false");
1559         }
1560         return false;
1561 
1562     } // skipSpaces():boolean
1563 
1564 
1565     /**
1566      * @param legnth This function checks that following number of characters are available.
1567      * to the underlying buffer.
1568      * @return This function returns true if capacity asked is available.
1569      */
1570     public boolean arrangeCapacity(int length) throws IOException{
1571         return arrangeCapacity(length, false);
1572     }
1573 
1574     /**
1575      * @param legnth This function checks that following number of characters are available.
1576      * to the underlying buffer.
1577      * @param if the underlying function should change the entity
1578      * @return This function returns true if capacity asked is available.
1579      *
1580      */
1581     public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
1582         //check if the capacity is availble in the current buffer
1583         //count is no. of characters in the buffer   [x][m][l]
1584         //position is '0' based
1585         //System.out.println("fCurrent Entity " + fCurrentEntity);
1586         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1587             return true;
1588         }
1589         if(DEBUG_SKIP_STRING){
1590             System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1591             System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1592             System.out.println("length = " + length);
1593         }
1594         boolean entityChanged = false;
1595         //load more characters -- this function shouldn't change the entity
1596         while((fCurrentEntity.count - fCurrentEntity.position) < length){
1597             if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
1598                 invokeListeners(0);
1599                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
1600                 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
1601                 fCurrentEntity.position = 0;
1602             }
1603 
1604             if((fCurrentEntity.count - fCurrentEntity.position) < length){
1605                 int pos = fCurrentEntity.position;
1606                 invokeListeners(pos);
1607                 entityChanged = load(fCurrentEntity.count, changeEntity, false);
1608                 fCurrentEntity.position = pos;
1609                 if(entityChanged)break;
1610             }
1611             if(DEBUG_SKIP_STRING){
1612                 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1613                 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1614                 System.out.println("length = " + length);
1615             }
1616         }
1617         //load changes the position.. set it back to the point where we started.
1618 
1619         //after loading check again.
1620         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1621             return true;
1622         } else {
1623             return false;
1624         }
1625     }
1626 
1627     /**
1628      * Skips the specified string appearing immediately on the input.
1629      * <p>
1630      * <strong>Note:</strong> The characters are consumed only if all
1631      * the characters are skipped.
1632      *
1633      * @param s The string to skip.
1634      *
1635      * @return Returns true if the string was skipped.
1636      *
1637      * @throws IOException  Thrown if i/o error occurs.
1638      * @throws EOFException Thrown on end of file.
1639      */
1640     public boolean skipString(String s) throws IOException {
1641 
1642         final int length = s.length();
1643 
1644         //first make sure that required capacity is avaible
1645         if(arrangeCapacity(length, false)){
1646             final int beforeSkip = fCurrentEntity.position ;
1647             int afterSkip = fCurrentEntity.position + length - 1 ;
1648             if(DEBUG_SKIP_STRING){
1649                 System.out.println("skipString,length = " + s + "," + length);
1650                 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip,  length));
1651             }
1652 
1653             //s.charAt() indexes are 0 to 'Length -1' based.
1654             int i = length - 1 ;
1655             //check from reverse
1656             while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
1657                 if(afterSkip-- == beforeSkip){
1658                     fCurrentEntity.position = fCurrentEntity.position + length ;
1659                     fCurrentEntity.columnNumber += length;
1660                     return true;
1661                 }
1662             }
1663         }
1664 
1665         return false;
1666     } // skipString(String):boolean
1667 
1668     public boolean skipString(char [] s) throws IOException {
1669 
1670         final int length = s.length;
1671         //first make sure that required capacity is avaible
1672         if(arrangeCapacity(length, false)){
1673             int beforeSkip = fCurrentEntity.position ;
1674             int afterSkip = fCurrentEntity.position + length  ;
1675 
1676             if(DEBUG_SKIP_STRING){
1677                 System.out.println("skipString,length = " + new String(s) + "," + length);
1678                 System.out.println("skipString,length = " + new String(s) + "," + length);
1679             }
1680 
1681             for(int i=0;i<length;i++){
1682                 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
1683                    return false;
1684                 }
1685             }
1686             fCurrentEntity.position = fCurrentEntity.position + length ;
1687             fCurrentEntity.columnNumber += length;
1688             return true;
1689 
1690         }
1691 
1692         return false;
1693     }
1694 
1695     //
1696     // Locator methods
1697     //
1698     //
1699     // Private methods
1700     //
1701 
1702     /**
1703      * Loads a chunk of text.
1704      *
1705      * @param offset       The offset into the character buffer to
1706      *                     read the next batch of characters.
1707      * @param changeEntity True if the load should change entities
1708      *                     at the end of the entity, otherwise leave
1709      *                     the current entity in place and the entity
1710      *                     boundary will be signaled by the return
1711      *                     value.
1712      * @param notify       Determine whether to notify listeners of
1713      *                     the event
1714      *
1715      * @returns Returns true if the entity changed as a result of this
1716      *          load operation.
1717      */
1718     final boolean load(int offset, boolean changeEntity, boolean notify)
1719     throws IOException {
1720         if (DEBUG_BUFFER) {
1721             System.out.print("(load, "+offset+": ");
1722             print();
1723             System.out.println();
1724         }
1725         if (notify) {
1726             invokeListeners(offset);
1727         }
1728         //maintaing the count till last load
1729         fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
1730         // read characters
1731         int length = fCurrentEntity.ch.length - offset;
1732         if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) {
1733             length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE;
1734         }
1735         if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
1736         int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
1737         if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
1738 
1739         // reset count and position
1740         boolean entityChanged = false;
1741         if (count != -1) {
1742             if (count != 0) {
1743                 // record the last count
1744                 fCurrentEntity.fLastCount = count;
1745                 fCurrentEntity.count = count + offset;
1746                 fCurrentEntity.position = offset;
1747             }
1748         }
1749         // end of this entity
1750         else {
1751             fCurrentEntity.count = offset;
1752             fCurrentEntity.position = offset;
1753             entityChanged = true;
1754 
1755             if (changeEntity) {
1756                 //notify the entity manager about the end of entity
1757                 fEntityManager.endEntity();
1758                 //return if the current entity becomes null
1759                 if(fCurrentEntity == null){
1760                     throw END_OF_DOCUMENT_ENTITY;
1761                 }
1762                 // handle the trailing edges
1763                 if (fCurrentEntity.position == fCurrentEntity.count) {
1764                     load(0, true, false);
1765                 }
1766             }
1767 
1768         }
1769         if (DEBUG_BUFFER) {
1770             System.out.print(")load, "+offset+": ");
1771             print();
1772             System.out.println();
1773         }
1774 
1775         return entityChanged;
1776 
1777     } // load(int, boolean):boolean
1778 
1779     /**
1780      * Creates a reader capable of reading the given input stream in
1781      * the specified encoding.
1782      *
1783      * @param inputStream  The input stream.
1784      * @param encoding     The encoding name that the input stream is
1785      *                     encoded using. If the user has specified that
1786      *                     Java encoding names are allowed, then the
1787      *                     encoding name may be a Java encoding name;
1788      *                     otherwise, it is an ianaEncoding name.
1789      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
1790      *                      specify a byte order, this tells whether the order is bigEndian.  null menas
1791      *                      unknown or not relevant.
1792      *
1793      * @return Returns a reader.
1794      */
1795     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
1796     throws IOException {
1797 
1798         // normalize encoding name
1799         if (encoding == null) {
1800             encoding = "UTF-8";
1801         }
1802 
1803         // try to use an optimized reader
1804         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
1805         if (ENCODING.equals("UTF-8")) {
1806             if (DEBUG_ENCODINGS) {
1807                 System.out.println("$$$ creating UTF8Reader");
1808             }
1809             return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
1810         }
1811         if (ENCODING.equals("US-ASCII")) {
1812             if (DEBUG_ENCODINGS) {
1813                 System.out.println("$$$ creating ASCIIReader");
1814             }
1815             return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1816         }
1817         if(ENCODING.equals("ISO-10646-UCS-4")) {
1818             if(isBigEndian != null) {
1819                 boolean isBE = isBigEndian.booleanValue();
1820                 if(isBE) {
1821                     return new UCSReader(inputStream, UCSReader.UCS4BE);
1822                 } else {
1823                     return new UCSReader(inputStream, UCSReader.UCS4LE);
1824                 }
1825             } else {
1826                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1827                         "EncodingByteOrderUnsupported",
1828                         new Object[] { encoding },
1829                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1830             }
1831         }
1832         if(ENCODING.equals("ISO-10646-UCS-2")) {
1833             if(isBigEndian != null) { // sould never happen with this encoding...
1834                 boolean isBE = isBigEndian.booleanValue();
1835                 if(isBE) {
1836                     return new UCSReader(inputStream, UCSReader.UCS2BE);
1837                 } else {
1838                     return new UCSReader(inputStream, UCSReader.UCS2LE);
1839                 }
1840             } else {
1841                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1842                         "EncodingByteOrderUnsupported",
1843                         new Object[] { encoding },
1844                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1845             }
1846         }
1847 
1848         // check for valid name
1849         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
1850         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
1851         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
1852             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1853                     "EncodingDeclInvalid",
1854                     new Object[] { encoding },
1855                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
1856                     // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
1857                     //       because every byte is a valid ISO Latin 1 character.
1858                     //       It may not translate correctly but if we failed on
1859                     //       the encoding anyway, then we're expecting the content
1860                     //       of the document to be bad. This will just prevent an
1861                     //       invalid UTF-8 sequence to be detected. This is only
1862                     //       important when continue-after-fatal-error is turned
1863                     //       on. -Ac
1864                     encoding = "ISO-8859-1";
1865         }
1866 
1867         // try to use a Java reader
1868         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
1869         if (javaEncoding == null) {
1870             if(fAllowJavaEncodings) {
1871                 javaEncoding = encoding;
1872             } else {
1873                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1874                         "EncodingDeclInvalid",
1875                         new Object[] { encoding },
1876                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1877                         // see comment above.
1878                         javaEncoding = "ISO8859_1";
1879             }
1880         }
1881         else if (javaEncoding.equals("ASCII")) {
1882             if (DEBUG_ENCODINGS) {
1883                 System.out.println("$$$ creating ASCIIReader");
1884             }
1885             return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1886         }
1887 
1888         if (DEBUG_ENCODINGS) {
1889             System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
1890             if (javaEncoding == encoding) {
1891                 System.out.print(" (IANA encoding)");
1892             }
1893             System.out.println();
1894         }
1895         return new InputStreamReader(inputStream, javaEncoding);
1896 
1897     } // createReader(InputStream,String, Boolean): Reader
1898 
1899     /**
1900      * Returns the IANA encoding name that is auto-detected from
1901      * the bytes specified, with the endian-ness of that encoding where appropriate.
1902      *
1903      * @param b4    The first four bytes of the input.
1904      * @param count The number of bytes actually read.
1905      * @return a 2-element array:  the first element, an IANA-encoding string,
1906      *  the second element a Boolean which is true iff the document is big endian, false
1907      *  if it's little-endian, and null if the distinction isn't relevant.
1908      */
1909     protected Object[] getEncodingName(byte[] b4, int count) {
1910 
1911         if (count < 2) {
1912             return new Object[]{"UTF-8", null};
1913         }
1914 
1915         // UTF-16, with BOM
1916         int b0 = b4[0] & 0xFF;
1917         int b1 = b4[1] & 0xFF;
1918         if (b0 == 0xFE && b1 == 0xFF) {
1919             // UTF-16, big-endian
1920             return new Object [] {"UTF-16BE", new Boolean(true)};
1921         }
1922         if (b0 == 0xFF && b1 == 0xFE) {
1923             // UTF-16, little-endian
1924             return new Object [] {"UTF-16LE", new Boolean(false)};
1925         }
1926 
1927         // default to UTF-8 if we don't have enough bytes to make a
1928         // good determination of the encoding
1929         if (count < 3) {
1930             return new Object [] {"UTF-8", null};
1931         }
1932 
1933         // UTF-8 with a BOM
1934         int b2 = b4[2] & 0xFF;
1935         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1936             return new Object [] {"UTF-8", null};
1937         }
1938 
1939         // default to UTF-8 if we don't have enough bytes to make a
1940         // good determination of the encoding
1941         if (count < 4) {
1942             return new Object [] {"UTF-8", null};
1943         }
1944 
1945         // other encodings
1946         int b3 = b4[3] & 0xFF;
1947         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
1948             // UCS-4, big endian (1234)
1949             return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
1950         }
1951         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
1952             // UCS-4, little endian (4321)
1953             return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
1954         }
1955         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
1956             // UCS-4, unusual octet order (2143)
1957             // REVISIT: What should this be?
1958             return new Object [] {"ISO-10646-UCS-4", null};
1959         }
1960         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
1961             // UCS-4, unusual octect order (3412)
1962             // REVISIT: What should this be?
1963             return new Object [] {"ISO-10646-UCS-4", null};
1964         }
1965         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
1966             // UTF-16, big-endian, no BOM
1967             // (or could turn out to be UCS-2...
1968             // REVISIT: What should this be?
1969             return new Object [] {"UTF-16BE", new Boolean(true)};
1970         }
1971         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
1972             // UTF-16, little-endian, no BOM
1973             // (or could turn out to be UCS-2...
1974             return new Object [] {"UTF-16LE", new Boolean(false)};
1975         }
1976         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
1977             // EBCDIC
1978             // a la xerces1, return CP037 instead of EBCDIC here
1979             return new Object [] {"CP037", null};
1980         }
1981 
1982         // default encoding
1983         return new Object [] {"UTF-8", null};
1984 
1985     } // getEncodingName(byte[],int):Object[]
1986 
1987     /**
1988      * xxx not removing endEntity() so that i remember that we need to implement it.
1989      * Ends an entity.
1990      *
1991      * @throws XNIException Thrown by entity handler to signal an error.
1992      */
1993     //
1994     /** Prints the contents of the buffer. */
1995     final void print() {
1996         if (DEBUG_BUFFER) {
1997             if (fCurrentEntity != null) {
1998                 System.out.print('[');
1999                 System.out.print(fCurrentEntity.count);
2000                 System.out.print(' ');
2001                 System.out.print(fCurrentEntity.position);
2002                 if (fCurrentEntity.count > 0) {
2003                     System.out.print(" \"");
2004                     for (int i = 0; i < fCurrentEntity.count; i++) {
2005                         if (i == fCurrentEntity.position) {
2006                             System.out.print('^');
2007                         }
2008                         char c = fCurrentEntity.ch[i];
2009                         switch (c) {
2010                             case '\n': {
2011                                 System.out.print("\\n");
2012                                 break;
2013                             }
2014                             case '\r': {
2015                                 System.out.print("\\r");
2016                                 break;
2017                             }
2018                             case '\t': {
2019                                 System.out.print("\\t");
2020                                 break;
2021                             }
2022                             case '\\': {
2023                                 System.out.print("\\\\");
2024                                 break;
2025                             }
2026                             default: {
2027                                 System.out.print(c);
2028                             }
2029                         }
2030                     }
2031                     if (fCurrentEntity.position == fCurrentEntity.count) {
2032                         System.out.print('^');
2033                     }
2034                     System.out.print('"');
2035                 }
2036                 System.out.print(']');
2037                 System.out.print(" @ ");
2038                 System.out.print(fCurrentEntity.lineNumber);
2039                 System.out.print(',');
2040                 System.out.print(fCurrentEntity.columnNumber);
2041             } else {
2042                 System.out.print("*NO CURRENT ENTITY*");
2043             }
2044         }
2045     }
2046 
2047     /**
2048      * Registers the listener object and provides callback.
2049      * @param listener listener to which call back should be provided when scanner buffer
2050      * is being changed.
2051      */
2052     public void registerListener(XMLBufferListener listener) {
2053         if(!listeners.contains(listener))
2054             listeners.add(listener);
2055     }
2056 
2057     /**
2058      *
2059      * @param loadPos Starting position from which new data is being loaded into scanner buffer.
2060      */
2061     public void invokeListeners(int loadPos){
2062         for(int i=0;i<listeners.size();i++){
2063             XMLBufferListener listener =(XMLBufferListener) listeners.get(i);
2064             listener.refresh(loadPos);
2065         }
2066     }
2067 
2068     /**
2069      * Skips space characters appearing immediately on the input that would
2070      * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
2071      * normalization is performed. This is useful when scanning structures
2072      * such as the XMLDecl and TextDecl that can only contain US-ASCII
2073      * characters.
2074      * <p>
2075      * <strong>Note:</strong> The characters are consumed only if they would
2076      * match non-terminal S before end of line normalization is performed.
2077      *
2078      * @return Returns true if at least one space character was skipped.
2079      *
2080      * @throws IOException  Thrown if i/o error occurs.
2081      * @throws EOFException Thrown on end of file.
2082      *
2083      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
2084      */
2085     public final boolean skipDeclSpaces() throws IOException {
2086         if (DEBUG_BUFFER) {
2087             System.out.print("(skipDeclSpaces: ");
2088             //XMLEntityManager.print(fCurrentEntity);
2089             System.out.println();
2090         }
2091 
2092         // load more characters, if needed
2093         if (fCurrentEntity.position == fCurrentEntity.count) {
2094             load(0, true, false);
2095         }
2096 
2097         // skip spaces
2098         int c = fCurrentEntity.ch[fCurrentEntity.position];
2099         if (XMLChar.isSpace(c)) {
2100             boolean external = fCurrentEntity.isExternal();
2101             do {
2102                 boolean entityChanged = false;
2103                 // handle newlines
2104                 if (c == '\n' || (external && c == '\r')) {
2105                     fCurrentEntity.lineNumber++;
2106                     fCurrentEntity.columnNumber = 1;
2107                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
2108                         fCurrentEntity.ch[0] = (char)c;
2109                         entityChanged = load(1, true, false);
2110                         if (!entityChanged)
2111                             // the load change the position to be 1,
2112                             // need to restore it when entity not changed
2113                             fCurrentEntity.position = 0;
2114                     }
2115                     if (c == '\r' && external) {
2116                         // REVISIT: Does this need to be updated to fix the
2117                         //          #x0D ^#x0A newline normalization problem? -Ac
2118                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
2119                             fCurrentEntity.position--;
2120                         }
2121                     }
2122                     /*** NEWLINE NORMALIZATION ***
2123                      * else {
2124                      * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
2125                      * && external) {
2126                      * fCurrentEntity.position++;
2127                      * }
2128                      * }
2129                      * /***/
2130                 } else {
2131                     fCurrentEntity.columnNumber++;
2132                 }
2133                 // load more characters, if needed
2134                 if (!entityChanged)
2135                     fCurrentEntity.position++;
2136                 if (fCurrentEntity.position == fCurrentEntity.count) {
2137                     load(0, true, false);
2138                 }
2139             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
2140             if (DEBUG_BUFFER) {
2141                 System.out.print(")skipDeclSpaces: ");
2142                 //  XMLEntityManager.print(fCurrentEntity);
2143                 System.out.println(" -> true");
2144             }
2145             return true;
2146         }
2147 
2148         // no spaces were found
2149         if (DEBUG_BUFFER) {
2150             System.out.print(")skipDeclSpaces: ");
2151             //XMLEntityManager.print(fCurrentEntity);
2152             System.out.println(" -> false");
2153         }
2154         return false;
2155 
2156     } // skipDeclSpaces():boolean
2157 
2158 
2159 } // class XMLEntityScanner