1 /*
   2  * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 
   5 /*
   6  * Copyright 2005 The Apache Software Foundation.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl;
  22 
  23 import java.io.EOFException;
  24 import java.io.IOException;
  25 import java.util.Locale;
  26 import java.util.Vector;
  27 
  28 import com.sun.xml.internal.stream.Entity;
  29 import com.sun.xml.internal.stream.XMLBufferListener;
  30 import java.io.InputStream;
  31 import java.io.InputStreamReader;
  32 import java.io.Reader;
  33 
  34 
  35 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
  36 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
  37 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
  38 
  39 
  40 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  41 import com.sun.org.apache.xerces.internal.util.EncodingMap;
  42 
  43 import com.sun.org.apache.xerces.internal.util.SymbolTable;
  44 import com.sun.org.apache.xerces.internal.util.XMLChar;
  45 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  46 import com.sun.org.apache.xerces.internal.xni.QName;
  47 import com.sun.org.apache.xerces.internal.xni.XMLString;
  48 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  49 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  50 import com.sun.org.apache.xerces.internal.xni.*;
  51 
  52 /**
  53  * Implements the entity scanner methods.
  54  *
  55  * @author Neeraj Bajaj, Sun Microsystems
  56  * @author Andy Clark, IBM
  57  * @author Arnaud  Le Hors, IBM
  58  * @author K.Venugopal Sun Microsystems
  59  *
  60  */
  61 public class XMLEntityScanner implements XMLLocator  {
  62 
  63 
  64     protected Entity.ScannedEntity fCurrentEntity = null ;
  65     protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
  66 
  67     protected XMLEntityManager fEntityManager ;
  68 
  69     /** Debug switching readers for encodings. */
  70     private static final boolean DEBUG_ENCODINGS = false;
  71     /** Listeners which should know when load is being called */
  72     private Vector listeners = new Vector();
  73 
  74     private static final boolean [] VALID_NAMES = new boolean[127];
  75 
  76     /**
  77      * Debug printing of buffer. This debugging flag works best when you
  78      * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
  79      * 64 characters.
  80      */
  81     private static final boolean DEBUG_BUFFER = false;
  82     private static final boolean DEBUG_SKIP_STRING = false;
  83     /**
  84      * To signal the end of the document entity, this exception will be thrown.
  85      */
  86     private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
  87         private static final long serialVersionUID = 980337771224675268L;
  88         public Throwable fillInStackTrace() {
  89             return this;
  90         }
  91     };
  92 
  93     protected SymbolTable fSymbolTable = null;
  94     protected XMLErrorReporter fErrorReporter = null;
  95     int [] whiteSpaceLookup = new int[100];
  96     int whiteSpaceLen = 0;
  97     boolean whiteSpaceInfoNeeded = true;
  98 
  99     /**
 100      * Allow Java encoding names. This feature identifier is:
 101      * http://apache.org/xml/features/allow-java-encodings
 102      */
 103     protected boolean fAllowJavaEncodings;
 104 
 105     //Will be used only during internal subsets.
 106     //for appending data.
 107 
 108     /** Property identifier: symbol table. */
 109     protected static final String SYMBOL_TABLE =
 110             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
 111 
 112     /** Property identifier: error reporter. */
 113     protected static final String ERROR_REPORTER =
 114             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
 115 
 116     /** Feature identifier: allow Java encodings. */
 117     protected static final String ALLOW_JAVA_ENCODINGS =
 118             Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
 119 
 120     protected PropertyManager fPropertyManager = null ;
 121 
 122     boolean isExternal = false;
 123     static {
 124 
 125         for(int i=0x0041;i<=0x005A ; i++){
 126             VALID_NAMES[i]=true;
 127         }
 128         for(int i=0x0061;i<=0x007A; i++){
 129             VALID_NAMES[i]=true;
 130         }
 131         for(int i=0x0030;i<=0x0039; i++){
 132             VALID_NAMES[i]=true;
 133         }
 134         VALID_NAMES[45]=true;
 135         VALID_NAMES[46]=true;
 136         VALID_NAMES[58]=true;
 137         VALID_NAMES[95]=true;
 138     }
 139     // SAPJVM: Remember, that the XML version has explicitly been set,
 140     // so that XMLStreamReader.getVersion() can find that out.
 141     boolean xmlVersionSetExplicitly = false;
 142     //
 143     // Constructors
 144     //
 145 
 146     /** Default constructor. */
 147     public XMLEntityScanner() {
 148     } // <init>()
 149 
 150 
 151     /**  private constructor, this class can only be instantiated within this class. Instance of this class should
 152      *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
 153      *    @see getEntityScanner()
 154      *    @see getEntityScanner(ScannedEntity)
 155      */
 156     public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
 157         fEntityManager = entityManager ;
 158         reset(propertyManager);
 159     } // <init>()
 160 
 161 
 162     // set buffer size:
 163     public final void setBufferSize(int size) {
 164         // REVISIT: Buffer size passed to entity scanner
 165         // was not being kept in synch with the actual size
 166         // of the buffers in each scanned entity. If any
 167         // of the buffers were actually resized, it was possible
 168         // that the parser would throw an ArrayIndexOutOfBoundsException
 169         // for documents which contained names which are longer than
 170         // the current buffer size. Conceivably the buffer size passed
 171         // to entity scanner could be used to determine a minimum size
 172         // for resizing, if doubling its size is smaller than this
 173         // minimum. -- mrglavas
 174         fBufferSize = size;
 175     }
 176 
 177     /**
 178      * Resets the components.
 179      */
 180     public void reset(PropertyManager propertyManager){
 181         fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
 182         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
 183         fCurrentEntity = null;
 184         whiteSpaceLen = 0;
 185         whiteSpaceInfoNeeded = true;
 186         listeners.clear();
 187     }
 188 
 189     /**
 190      * Resets the component. The component can query the component manager
 191      * about any features and properties that affect the operation of the
 192      * component.
 193      *
 194      * @param componentManager The component manager.
 195      *
 196      * @throws SAXException Thrown by component on initialization error.
 197      *                      For example, if a feature or property is
 198      *                      required for the operation of the component, the
 199      *                      component manager may throw a
 200      *                      SAXNotRecognizedException or a
 201      *                      SAXNotSupportedException.
 202      */
 203     public void reset(XMLComponentManager componentManager)
 204     throws XMLConfigurationException {
 205 
 206         //System.out.println(" this is being called");
 207         // xerces features
 208         fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
 209 
 210         //xerces properties
 211         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
 212         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
 213         fCurrentEntity = null;
 214         whiteSpaceLen = 0;
 215         whiteSpaceInfoNeeded = true;
 216         listeners.clear();
 217     } // reset(XMLComponentManager)
 218 
 219 
 220     public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
 221             XMLErrorReporter reporter) {
 222         fCurrentEntity = null;
 223         fSymbolTable = symbolTable;
 224         fEntityManager = entityManager;
 225         fErrorReporter = reporter;
 226     }
 227 
 228     /**
 229      * Returns the XML version of the current entity. This will normally be the
 230      * value from the XML or text declaration or defaulted by the parser. Note that
 231      * that this value may be different than the version of the processing rules
 232      * applied to the current entity. For instance, an XML 1.1 document may refer to
 233      * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
 234      * document. Also note that, for a given entity, this value can only be considered
 235      * final once the XML or text declaration has been read or once it has been
 236      * determined that there is no such declaration.
 237      */
 238     public final String getXMLVersion() {
 239         if (fCurrentEntity != null) {
 240             return fCurrentEntity.xmlVersion;
 241         }
 242         return null;
 243     } // getXMLVersion():String
 244 
 245     /**
 246      * Sets the XML version. This method is used by the
 247      * scanners to report the value of the version pseudo-attribute
 248      * in an XML or text declaration.
 249      *
 250      * @param xmlVersion the XML version of the current entity
 251      */
 252     public final void setXMLVersion(String xmlVersion) {
 253         xmlVersionSetExplicitly = true; // SAPJVM
 254         fCurrentEntity.xmlVersion = xmlVersion;
 255     } // setXMLVersion(String)
 256 
 257 
 258     /** set the instance of current scanned entity.
 259      *   @param ScannedEntity
 260      */
 261 
 262     public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
 263         fCurrentEntity = scannedEntity ;
 264         if(fCurrentEntity != null){
 265             isExternal = fCurrentEntity.isExternal();
 266             if(DEBUG_BUFFER)
 267                 System.out.println("Current Entity is "+scannedEntity.name);
 268         }
 269     }
 270 
 271     public  Entity.ScannedEntity getCurrentEntity(){
 272         return fCurrentEntity ;
 273     }
 274     //
 275     // XMLEntityReader methods
 276     //
 277 
 278     /**
 279      * Returns the base system identifier of the currently scanned
 280      * entity, or null if none is available.
 281      */
 282     public final String getBaseSystemId() {
 283         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 284     } // getBaseSystemId():String
 285 
 286     /**
 287      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
 288      */
 289     public void setBaseSystemId(String systemId) {
 290         //no-op
 291     }
 292 
 293     ///////////// Locator methods start.
 294     public final int getLineNumber(){
 295         //if the entity is closed, we should return -1
 296         //xxx at first place why such call should be there...
 297         return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
 298     }
 299 
 300     /**
 301      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
 302      */
 303     public void setLineNumber(int line) {
 304         //no-op
 305     }
 306 
 307 
 308     public final int getColumnNumber(){
 309         //if the entity is closed, we should return -1
 310         //xxx at first place why such call should be there...
 311         return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
 312     }
 313 
 314     /**
 315      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
 316      */
 317     public void setColumnNumber(int col) {
 318         // no-op
 319     }
 320 
 321 
 322     public final int getCharacterOffset(){
 323         return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
 324     }
 325 
 326     /** Returns the expanded system identifier.  */
 327     public final String getExpandedSystemId() {
 328         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 329     }
 330 
 331     /**
 332      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
 333      */
 334     public void setExpandedSystemId(String systemId) {
 335         //no-op
 336     }
 337 
 338     /** Returns the literal system identifier.  */
 339     public final String getLiteralSystemId() {
 340         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
 341     }
 342 
 343     /**
 344      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
 345      */
 346     public void setLiteralSystemId(String systemId) {
 347         //no-op
 348     }
 349 
 350     /** Returns the public identifier.  */
 351     public final String getPublicId() {
 352         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
 353     }
 354 
 355     /**
 356      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
 357      */
 358     public void setPublicId(String publicId) {
 359         //no-op
 360     }
 361 
 362     ///////////////// Locator methods finished.
 363 
 364     /** the version of the current entity being scanned */
 365     public void setVersion(String version){
 366         fCurrentEntity.version = version;
 367     }
 368 
 369     public String getVersion(){
 370         if (fCurrentEntity != null)
 371             return fCurrentEntity.version ;
 372         return null;
 373     }
 374 
 375     /**
 376      * Returns the encoding of the current entity.
 377      * Note that, for a given entity, this value can only be
 378      * considered final once the encoding declaration has been read (or once it
 379      * has been determined that there is no such declaration) since, no encoding
 380      * having been specified on the XMLInputSource, the parser
 381      * will make an initial "guess" which could be in error.
 382      */
 383     public final String getEncoding() {
 384         if (fCurrentEntity != null) {
 385             return fCurrentEntity.encoding;
 386         }
 387         return null;
 388     } // getEncoding():String
 389 
 390     /**
 391      * Sets the encoding of the scanner. This method is used by the
 392      * scanners if the XMLDecl or TextDecl line contains an encoding
 393      * pseudo-attribute.
 394      * <p>
 395      * <strong>Note:</strong> The underlying character reader on the
 396      * current entity will be changed to accomodate the new encoding.
 397      * However, the new encoding is ignored if the current reader was
 398      * not constructed from an input stream (e.g. an external entity
 399      * that is resolved directly to the appropriate java.io.Reader
 400      * object).
 401      *
 402      * @param encoding The IANA encoding name of the new encoding.
 403      *
 404      * @throws IOException Thrown if the new encoding is not supported.
 405      *
 406      * @see com.sun.org.apache.xerces.internal.util.EncodingMap
 407      */
 408     public final void setEncoding(String encoding) throws IOException {
 409 
 410         if (DEBUG_ENCODINGS) {
 411             System.out.println("$$$ setEncoding: "+encoding);
 412         }
 413 
 414         if (fCurrentEntity.stream != null) {
 415             // if the encoding is the same, don't change the reader and
 416             // re-use the original reader used by the OneCharReader
 417             // NOTE: Besides saving an object, this overcomes deficiencies
 418             //       in the UTF-16 reader supplied with the standard Java
 419             //       distribution (up to and including 1.3). The UTF-16
 420             //       decoder buffers 8K blocks even when only asked to read
 421             //       a single char! -Ac
 422             if (fCurrentEntity.encoding == null ||
 423                     !fCurrentEntity.encoding.equals(encoding)) {
 424                 // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
 425                 // and we know the endian-ness, we shouldn't change readers.
 426                 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
 427                 // the endian-ness from the encoding we presently have.
 428                 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
 429                     String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
 430                     if(ENCODING.equals("UTF-16")) return;
 431                     if(ENCODING.equals("ISO-10646-UCS-4")) {
 432                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 433                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
 434                         } else {
 435                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
 436                         }
 437                         return;
 438                     }
 439                     if(ENCODING.equals("ISO-10646-UCS-2")) {
 440                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 441                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
 442                         } else {
 443                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
 444                         }
 445                         return;
 446                     }
 447                 }
 448                 // wrap a new reader around the input stream, changing
 449                 // the encoding
 450                 if (DEBUG_ENCODINGS) {
 451                     System.out.println("$$$ creating new reader from stream: "+
 452                             fCurrentEntity.stream);
 453                 }
 454                 //fCurrentEntity.stream.reset();
 455                 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
 456                 fCurrentEntity.encoding = encoding;
 457 
 458             } else {
 459                 if (DEBUG_ENCODINGS)
 460                     System.out.println("$$$ reusing old reader on stream");
 461             }
 462         }
 463 
 464     } // setEncoding(String)
 465 
 466     /** Returns true if the current entity being scanned is external. */
 467     public final boolean isExternal() {
 468         return fCurrentEntity.isExternal();
 469     } // isExternal():boolean
 470 
 471     public int getChar(int relative) throws IOException{
 472         if(arrangeCapacity(relative + 1, false)){
 473             return fCurrentEntity.ch[fCurrentEntity.position + relative];
 474         }else{
 475             return -1;
 476         }
 477     }//getChar()
 478 
 479     /**
 480      * Returns the next character on the input.
 481      * <p>
 482      * <strong>Note:</strong> The character is <em>not</em> consumed.
 483      *
 484      * @throws IOException  Thrown if i/o error occurs.
 485      * @throws EOFException Thrown on end of file.
 486      */
 487     public int peekChar() throws IOException {
 488         if (DEBUG_BUFFER) {
 489             System.out.print("(peekChar: ");
 490             print();
 491             System.out.println();
 492         }
 493 
 494         // load more characters, if needed
 495         if (fCurrentEntity.position == fCurrentEntity.count) {
 496             invokeListeners(0);
 497             load(0, true);
 498         }
 499 
 500         // peek at character
 501         int c = fCurrentEntity.ch[fCurrentEntity.position];
 502 
 503         // return peeked character
 504         if (DEBUG_BUFFER) {
 505             System.out.print(")peekChar: ");
 506             print();
 507             if (isExternal) {
 508                 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
 509             } else {
 510                 System.out.println(" -> '"+(char)c+"'");
 511             }
 512         }
 513         if (isExternal) {
 514             return c != '\r' ? c : '\n';
 515         } else {
 516             return c;
 517         }
 518 
 519     } // peekChar():int
 520 
 521     /**
 522      * Returns the next character on the input.
 523      * <p>
 524      * <strong>Note:</strong> The character is consumed.
 525      *
 526      * @throws IOException  Thrown if i/o error occurs.
 527      * @throws EOFException Thrown on end of file.
 528      */
 529     public int scanChar() throws IOException {
 530         if (DEBUG_BUFFER) {
 531             System.out.print("(scanChar: ");
 532             print();
 533             System.out.println();
 534         }
 535 
 536         // load more characters, if needed
 537         if (fCurrentEntity.position == fCurrentEntity.count) {
 538             invokeListeners(0);
 539             load(0, true);
 540         }
 541 
 542         // scan character
 543         int c = fCurrentEntity.ch[fCurrentEntity.position++];
 544         if (c == '\n' ||
 545                 (c == '\r' && isExternal)) {
 546             fCurrentEntity.lineNumber++;
 547             fCurrentEntity.columnNumber = 1;
 548             if (fCurrentEntity.position == fCurrentEntity.count) {
 549                 invokeListeners(1);
 550                 fCurrentEntity.ch[0] = (char)c;
 551                 load(1, false);
 552             }
 553             if (c == '\r' && isExternal) {
 554                 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
 555                     fCurrentEntity.position--;
 556                 }
 557                 c = '\n';
 558             }
 559         }
 560 
 561         // return character that was scanned
 562         if (DEBUG_BUFFER) {
 563             System.out.print(")scanChar: ");
 564             print();
 565             System.out.println(" -> '"+(char)c+"'");
 566         }
 567         fCurrentEntity.columnNumber++;
 568         return c;
 569 
 570     } // scanChar():int
 571 
 572     /**
 573      * Returns a string matching the NMTOKEN production appearing immediately
 574      * on the input as a symbol, or null if NMTOKEN Name string is present.
 575      * <p>
 576      * <strong>Note:</strong> The NMTOKEN characters are consumed.
 577      * <p>
 578      * <strong>Note:</strong> The string returned must be a symbol. The
 579      * SymbolTable can be used for this purpose.
 580      *
 581      * @throws IOException  Thrown if i/o error occurs.
 582      * @throws EOFException Thrown on end of file.
 583      *
 584      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 585      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 586      */
 587     public String scanNmtoken() throws IOException {
 588         if (DEBUG_BUFFER) {
 589             System.out.print("(scanNmtoken: ");
 590             print();
 591             System.out.println();
 592         }
 593 
 594         // load more characters, if needed
 595         if (fCurrentEntity.position == fCurrentEntity.count) {
 596             invokeListeners(0);
 597             load(0, true);
 598         }
 599 
 600         // scan nmtoken
 601         int offset = fCurrentEntity.position;
 602         boolean vc = false;
 603         char c;
 604         while (true){
 605             //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
 606             c = fCurrentEntity.ch[fCurrentEntity.position];
 607             if(c < 127){
 608                 vc = VALID_NAMES[c];
 609             }else{
 610                 vc = XMLChar.isName(c);
 611             }
 612             if(!vc)break;
 613 
 614             if (++fCurrentEntity.position == fCurrentEntity.count) {
 615                 int length = fCurrentEntity.position - offset;
 616                 invokeListeners(length);
 617                 if (length == fCurrentEntity.fBufferSize) {
 618                     // bad luck we have to resize our buffer
 619                     char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 620                     System.arraycopy(fCurrentEntity.ch, offset,
 621                             tmp, 0, length);
 622                     fCurrentEntity.ch = tmp;
 623                     fCurrentEntity.fBufferSize *= 2;
 624                 } else {
 625                     System.arraycopy(fCurrentEntity.ch, offset,
 626                             fCurrentEntity.ch, 0, length);
 627                 }
 628                 offset = 0;
 629                 if (load(length, false)) {
 630                     break;
 631                 }
 632             }
 633         }
 634         int length = fCurrentEntity.position - offset;
 635         fCurrentEntity.columnNumber += length;
 636 
 637         // return nmtoken
 638         String symbol = null;
 639         if (length > 0) {
 640             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 641         }
 642         if (DEBUG_BUFFER) {
 643             System.out.print(")scanNmtoken: ");
 644             print();
 645             System.out.println(" -> "+String.valueOf(symbol));
 646         }
 647         return symbol;
 648 
 649     } // scanNmtoken():String
 650 
 651     /**
 652      * Returns a string matching the Name production appearing immediately
 653      * on the input as a symbol, or null if no Name string is present.
 654      * <p>
 655      * <strong>Note:</strong> The Name characters are consumed.
 656      * <p>
 657      * <strong>Note:</strong> The string returned must be a symbol. The
 658      * SymbolTable can be used for this purpose.
 659      *
 660      * @throws IOException  Thrown if i/o error occurs.
 661      * @throws EOFException Thrown on end of file.
 662      *
 663      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 664      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 665      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 666      */
 667     public String scanName() throws IOException {
 668         if (DEBUG_BUFFER) {
 669             System.out.print("(scanName: ");
 670             print();
 671             System.out.println();
 672         }
 673 
 674         // load more characters, if needed
 675         if (fCurrentEntity.position == fCurrentEntity.count) {
 676             invokeListeners(0);
 677             load(0, true);
 678         }
 679 
 680         // scan name
 681         int offset = fCurrentEntity.position;
 682         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 683             if (++fCurrentEntity.position == fCurrentEntity.count) {
 684                 invokeListeners(1);
 685                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 686                 offset = 0;
 687                 if (load(1, false)) {
 688                     fCurrentEntity.columnNumber++;
 689                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 690 
 691                     if (DEBUG_BUFFER) {
 692                         System.out.print(")scanName: ");
 693                         print();
 694                         System.out.println(" -> "+String.valueOf(symbol));
 695                     }
 696                     return symbol;
 697                 }
 698             }
 699             boolean vc =false;
 700             while (true ){
 701                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 702                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 703                 if(c < 127){
 704                     vc = VALID_NAMES[c];
 705                 }else{
 706                     vc = XMLChar.isName(c);
 707                 }
 708                 if(!vc)break;
 709                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 710                     int length = fCurrentEntity.position - offset;
 711                     invokeListeners(length);
 712                     if (length == fCurrentEntity.fBufferSize) {
 713                         // bad luck we have to resize our buffer
 714                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 715                         System.arraycopy(fCurrentEntity.ch, offset,
 716                                 tmp, 0, length);
 717                         fCurrentEntity.ch = tmp;
 718                         fCurrentEntity.fBufferSize *= 2;
 719                     } else {
 720                         System.arraycopy(fCurrentEntity.ch, offset,
 721                                 fCurrentEntity.ch, 0, length);
 722                     }
 723                     offset = 0;
 724                     if (load(length, false)) {
 725                         break;
 726                     }
 727                 }
 728             }
 729         }
 730         int length = fCurrentEntity.position - offset;
 731         fCurrentEntity.columnNumber += length;
 732 
 733         // return name
 734         String symbol;
 735         if (length > 0) {
 736             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 737         } else
 738             symbol = null;
 739         if (DEBUG_BUFFER) {
 740             System.out.print(")scanName: ");
 741             print();
 742             System.out.println(" -> "+String.valueOf(symbol));
 743         }
 744         return symbol;
 745 
 746     } // scanName():String
 747 
 748     /**
 749      * Scans a qualified name from the input, setting the fields of the
 750      * QName structure appropriately.
 751      * <p>
 752      * <strong>Note:</strong> The qualified name characters are consumed.
 753      * <p>
 754      * <strong>Note:</strong> The strings used to set the values of the
 755      * QName structure must be symbols. The SymbolTable can be used for
 756      * this purpose.
 757      *
 758      * @param qname The qualified name structure to fill.
 759      *
 760      * @return Returns true if a qualified name appeared immediately on
 761      *         the input and was scanned, false otherwise.
 762      *
 763      * @throws IOException  Thrown if i/o error occurs.
 764      * @throws EOFException Thrown on end of file.
 765      *
 766      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 767      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 768      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 769      */
 770     public boolean scanQName(QName qname) throws IOException {
 771         if (DEBUG_BUFFER) {
 772             System.out.print("(scanQName, "+qname+": ");
 773             print();
 774             System.out.println();
 775         }
 776 
 777         // load more characters, if needed
 778         if (fCurrentEntity.position == fCurrentEntity.count) {
 779             invokeListeners(0);
 780             load(0, true);
 781         }
 782 
 783         // scan qualified name
 784         int offset = fCurrentEntity.position;
 785 
 786         //making a check if if the specified character is a valid name start character
 787         //as defined by production [5] in the XML 1.0 specification.
 788         // Name ::= (Letter | '_' | ':') (NameChar)*
 789 
 790         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 791             if (++fCurrentEntity.position == fCurrentEntity.count) {
 792                 invokeListeners(1);
 793                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 794                 offset = 0;
 795 
 796                 if (load(1, false)) {
 797                     fCurrentEntity.columnNumber++;
 798                     //adding into symbol table.
 799                     //XXX We are trying to add single character in SymbolTable??????
 800                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 801                     qname.setValues(null, name, name, null);
 802                     if (DEBUG_BUFFER) {
 803                         System.out.print(")scanQName, "+qname+": ");
 804                         print();
 805                         System.out.println(" -> true");
 806                     }
 807                     return true;
 808                 }
 809             }
 810             int index = -1;
 811             boolean vc = false;
 812             while ( true){
 813 
 814                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 815                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 816                 if(c < 127){
 817                     vc = VALID_NAMES[c];
 818                 }else{
 819                     vc = XMLChar.isName(c);
 820                 }
 821                 if(!vc)break;
 822                 if (c == ':') {
 823                     if (index != -1) {
 824                         break;
 825                     }
 826                     index = fCurrentEntity.position;
 827                 }
 828                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 829                     int length = fCurrentEntity.position - offset;
 830                     invokeListeners(length);
 831                     if (length == fCurrentEntity.fBufferSize) {
 832                         // bad luck we have to resize our buffer
 833                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 834                         System.arraycopy(fCurrentEntity.ch, offset,
 835                                 tmp, 0, length);
 836                         fCurrentEntity.ch = tmp;
 837                         fCurrentEntity.fBufferSize *= 2;
 838                     } else {
 839                         System.arraycopy(fCurrentEntity.ch, offset,
 840                                 fCurrentEntity.ch, 0, length);
 841                     }
 842                     if (index != -1) {
 843                         index = index - offset;
 844                     }
 845                     offset = 0;
 846                     if (load(length, false)) {
 847                         break;
 848                     }
 849                 }
 850             }
 851             int length = fCurrentEntity.position - offset;
 852             fCurrentEntity.columnNumber += length;
 853             if (length > 0) {
 854                 String prefix = null;
 855                 String localpart = null;
 856                 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
 857                         offset, length);
 858 
 859                 if (index != -1) {
 860                     int prefixLength = index - offset;
 861                     prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
 862                             offset, prefixLength);
 863                     int len = length - prefixLength - 1;
 864                     localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
 865                             index + 1, len);
 866 
 867                 } else {
 868                     localpart = rawname;
 869                 }
 870                 qname.setValues(prefix, localpart, rawname, null);
 871                 if (DEBUG_BUFFER) {
 872                     System.out.print(")scanQName, "+qname+": ");
 873                     print();
 874                     System.out.println(" -> true");
 875                 }
 876                 return true;
 877             }
 878         }
 879 
 880         // no qualified name found
 881         if (DEBUG_BUFFER) {
 882             System.out.print(")scanQName, "+qname+": ");
 883             print();
 884             System.out.println(" -> false");
 885         }
 886         return false;
 887 
 888     } // scanQName(QName):boolean
 889 
 890     /**
 891      * CHANGED:
 892      * Scans a range of parsed character data, This function appends the character data to
 893      * the supplied buffer.
 894      * <p>
 895      * <strong>Note:</strong> The characters are consumed.
 896      * <p>
 897      * <strong>Note:</strong> This method does not guarantee to return
 898      * the longest run of parsed character data. This method may return
 899      * before markup due to reaching the end of the input buffer or any
 900      * other reason.
 901      * <p>
 902      *
 903      * @param content The content structure to fill.
 904      *
 905      * @return Returns the next character on the input, if known. This
 906      *         value may be -1 but this does <em>note</em> designate
 907      *         end of file.
 908      *
 909      * @throws IOException  Thrown if i/o error occurs.
 910      * @throws EOFException Thrown on end of file.
 911      */
 912     public int scanContent(XMLString content) throws IOException {
 913         if (DEBUG_BUFFER) {
 914             System.out.print("(scanContent: ");
 915             print();
 916             System.out.println();
 917         }
 918 
 919         // load more characters, if needed
 920         if (fCurrentEntity.position == fCurrentEntity.count) {
 921             invokeListeners(0);
 922             load(0, true);
 923         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 924             invokeListeners(0);
 925             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 926             load(1, false);
 927             fCurrentEntity.position = 0;
 928         }
 929 
 930         // normalize newlines
 931         int offset = fCurrentEntity.position;
 932         int c = fCurrentEntity.ch[offset];
 933         int newlines = 0;
 934         if (c == '\n' || (c == '\r' && isExternal)) {
 935             if (DEBUG_BUFFER) {
 936                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 937                 print();
 938                 System.out.println();
 939             }
 940             do {
 941                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 942                 if (c == '\r' && isExternal) {
 943                     newlines++;
 944                     fCurrentEntity.lineNumber++;
 945                     fCurrentEntity.columnNumber = 1;
 946                     if (fCurrentEntity.position == fCurrentEntity.count) {
 947                         offset = 0;
 948                         invokeListeners(newlines);
 949                         fCurrentEntity.position = newlines;
 950                         if (load(newlines, false)) {
 951                             break;
 952                         }
 953                     }
 954                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 955                         fCurrentEntity.position++;
 956                         offset++;
 957                     }
 958                     /*** NEWLINE NORMALIZATION ***/
 959                     else {
 960                         newlines++;
 961                     }
 962                 } else if (c == '\n') {
 963                     newlines++;
 964                     fCurrentEntity.lineNumber++;
 965                     fCurrentEntity.columnNumber = 1;
 966                     if (fCurrentEntity.position == fCurrentEntity.count) {
 967                         offset = 0;
 968                         invokeListeners(newlines);
 969                         fCurrentEntity.position = newlines;
 970                         if (load(newlines, false)) {
 971                             break;
 972                         }
 973                     }
 974                 } else {
 975                     fCurrentEntity.position--;
 976                     break;
 977                 }
 978             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 979             for (int i = offset; i < fCurrentEntity.position; i++) {
 980                 fCurrentEntity.ch[i] = '\n';
 981             }
 982             int length = fCurrentEntity.position - offset;
 983             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 984                 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
 985                 //on buffering the data..
 986                 content.setValues(fCurrentEntity.ch, offset, length);
 987                 //content.append(fCurrentEntity.ch, offset, length);
 988                 if (DEBUG_BUFFER) {
 989                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 990                     print();
 991                     System.out.println();
 992                 }
 993                 return -1;
 994             }
 995             if (DEBUG_BUFFER) {
 996                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 997                 print();
 998                 System.out.println();
 999             }
1000         }
1001 
1002         while (fCurrentEntity.position < fCurrentEntity.count) {
1003             c = fCurrentEntity.ch[fCurrentEntity.position++];
1004             if (!XMLChar.isContent(c)) {
1005                 fCurrentEntity.position--;
1006                 break;
1007             }
1008         }
1009         int length = fCurrentEntity.position - offset;
1010         fCurrentEntity.columnNumber += length - newlines;
1011 
1012         //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
1013         //on buffering the data..
1014         content.setValues(fCurrentEntity.ch, offset, length);
1015         //content.append(fCurrentEntity.ch, offset, length);
1016         // return next character
1017         if (fCurrentEntity.position != fCurrentEntity.count) {
1018             c = fCurrentEntity.ch[fCurrentEntity.position];
1019             // REVISIT: Does this need to be updated to fix the
1020             //          #x0D ^#x0A newline normalization problem? -Ac
1021             if (c == '\r' && isExternal) {
1022                 c = '\n';
1023             }
1024         } else {
1025             c = -1;
1026         }
1027         if (DEBUG_BUFFER) {
1028             System.out.print(")scanContent: ");
1029             print();
1030             System.out.println(" -> '"+(char)c+"'");
1031         }
1032         return c;
1033 
1034     } // scanContent(XMLString):int
1035 
1036     /**
1037      * Scans a range of attribute value data, setting the fields of the
1038      * XMLString structure, appropriately.
1039      * <p>
1040      * <strong>Note:</strong> The characters are consumed.
1041      * <p>
1042      * <strong>Note:</strong> This method does not guarantee to return
1043      * the longest run of attribute value data. This method may return
1044      * before the quote character due to reaching the end of the input
1045      * buffer or any other reason.
1046      * <p>
1047      * <strong>Note:</strong> The fields contained in the XMLString
1048      * structure are not guaranteed to remain valid upon subsequent calls
1049      * to the entity scanner. Therefore, the caller is responsible for
1050      * immediately using the returned character data or making a copy of
1051      * the character data.
1052      *
1053      * @param quote   The quote character that signifies the end of the
1054      *                attribute value data.
1055      * @param content The content structure to fill.
1056      *
1057      * @return Returns the next character on the input, if known. This
1058      *         value may be -1 but this does <em>note</em> designate
1059      *         end of file.
1060      *
1061      * @throws IOException  Thrown if i/o error occurs.
1062      * @throws EOFException Thrown on end of file.
1063      */
1064     public int scanLiteral(int quote, XMLString content)
1065     throws IOException {
1066         if (DEBUG_BUFFER) {
1067             System.out.print("(scanLiteral, '"+(char)quote+"': ");
1068             print();
1069             System.out.println();
1070         }
1071         // load more characters, if needed
1072         if (fCurrentEntity.position == fCurrentEntity.count) {
1073             invokeListeners(0);
1074             load(0, true);
1075         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1076             invokeListeners(0);
1077             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1078 
1079             load(1, false);
1080             fCurrentEntity.position = 0;
1081         }
1082 
1083         // normalize newlines
1084         int offset = fCurrentEntity.position;
1085         int c = fCurrentEntity.ch[offset];
1086         int newlines = 0;
1087         if(whiteSpaceInfoNeeded)
1088             whiteSpaceLen=0;
1089         if (c == '\n' || (c == '\r' && isExternal)) {
1090             if (DEBUG_BUFFER) {
1091                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1092                 print();
1093                 System.out.println();
1094             }
1095             do {
1096                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1097                 if (c == '\r' && isExternal) {
1098                     newlines++;
1099                     fCurrentEntity.lineNumber++;
1100                     fCurrentEntity.columnNumber = 1;
1101                     if (fCurrentEntity.position == fCurrentEntity.count) {
1102                         invokeListeners(newlines);
1103                         offset = 0;
1104                         fCurrentEntity.position = newlines;
1105                         if (load(newlines, false)) {
1106                             break;
1107                         }
1108                     }
1109                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1110                         fCurrentEntity.position++;
1111                         offset++;
1112                     }
1113                     /*** NEWLINE NORMALIZATION ***/
1114                     else {
1115                         newlines++;
1116                     }
1117                     /***/
1118                 } else if (c == '\n') {
1119                     newlines++;
1120                     fCurrentEntity.lineNumber++;
1121                     fCurrentEntity.columnNumber = 1;
1122                     if (fCurrentEntity.position == fCurrentEntity.count) {
1123                         offset = 0;
1124                         invokeListeners(newlines);
1125                         fCurrentEntity.position = newlines;
1126                         if (load(newlines, false)) {
1127                             break;
1128                         }
1129                     }
1130                     /*** NEWLINE NORMALIZATION ***
1131                      * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
1132                      * && external) {
1133                      * fCurrentEntity.position++;
1134                      * offset++;
1135                      * }
1136                      * /***/
1137                 } else {
1138                     fCurrentEntity.position--;
1139                     break;
1140                 }
1141             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1142             int i=0;
1143             for ( i = offset; i < fCurrentEntity.position; i++) {
1144                 fCurrentEntity.ch[i] = '\n';
1145                 whiteSpaceLookup[whiteSpaceLen++]=i;
1146             }
1147 
1148             int length = fCurrentEntity.position - offset;
1149             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1150                 content.setValues(fCurrentEntity.ch, offset, length);
1151                 if (DEBUG_BUFFER) {
1152                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1153                     print();
1154                     System.out.println();
1155                 }
1156                 return -1;
1157             }
1158             if (DEBUG_BUFFER) {
1159                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1160                 print();
1161                 System.out.println();
1162             }
1163         }
1164 
1165         // scan literal value
1166         while (fCurrentEntity.position < fCurrentEntity.count) {
1167             c = fCurrentEntity.ch[fCurrentEntity.position++];
1168             if ((c == quote &&
1169                  (!fCurrentEntity.literal || isExternal))
1170                 || c == '%' || !XMLChar.isContent(c)) {
1171                 fCurrentEntity.position--;
1172                 break;
1173             }
1174             if(whiteSpaceInfoNeeded){
1175                 if(c == 0x20 || c == 0x9){
1176                     if(whiteSpaceLen < whiteSpaceLookup.length){
1177                         whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position-1;
1178                     }else{
1179                         int [] tmp = new int[whiteSpaceLookup.length*2];
1180                         System.arraycopy(whiteSpaceLookup,0,tmp,0,whiteSpaceLookup.length);
1181                         whiteSpaceLookup = tmp;
1182                         whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position - 1;
1183                     }
1184                 }
1185             }
1186         }
1187         int length = fCurrentEntity.position - offset;
1188         fCurrentEntity.columnNumber += length - newlines;
1189         content.setValues(fCurrentEntity.ch, offset, length);
1190 
1191         // return next character
1192         if (fCurrentEntity.position != fCurrentEntity.count) {
1193             c = fCurrentEntity.ch[fCurrentEntity.position];
1194             // NOTE: We don't want to accidentally signal the
1195             //       end of the literal if we're expanding an
1196             //       entity appearing in the literal. -Ac
1197             if (c == quote && fCurrentEntity.literal) {
1198                 c = -1;
1199             }
1200         } else {
1201             c = -1;
1202         }
1203         if (DEBUG_BUFFER) {
1204             System.out.print(")scanLiteral, '"+(char)quote+"': ");
1205             print();
1206             System.out.println(" -> '"+(char)c+"'");
1207         }
1208         return c;
1209 
1210     } // scanLiteral(int,XMLString):int
1211 
1212     //CHANGED:
1213     /**
1214      * Scans a range of character data up to the specified delimiter,
1215      * setting the fields of the XMLString structure, appropriately.
1216      * <p>
1217      * <strong>Note:</strong> The characters are consumed.
1218      * <p>
1219      * <strong>Note:</strong> This assumes that the length of the delimiter
1220      * and that the delimiter contains at least one character.
1221      * <p>
1222      * <strong>Note:</strong> This method does not guarantee to return
1223      * the longest run of character data. This method may return before
1224      * the delimiter due to reaching the end of the input buffer or any
1225      * other reason.
1226      * <p>
1227      * @param delimiter The string that signifies the end of the character
1228      *                  data to be scanned.
1229      * @param buffer    The XMLStringBuffer to fill.
1230      *
1231      * @return Returns true if there is more data to scan, false otherwise.
1232      *
1233      * @throws IOException  Thrown if i/o error occurs.
1234      * @throws EOFException Thrown on end of file.
1235      */
1236     public boolean scanData(String delimiter, XMLStringBuffer buffer)
1237     throws IOException {
1238 
1239         boolean done = false;
1240         int delimLen = delimiter.length();
1241         char charAt0 = delimiter.charAt(0);
1242         do {
1243             if (DEBUG_BUFFER) {
1244                 System.out.print("(scanData: ");
1245                 print();
1246                 System.out.println();
1247             }
1248 
1249             // load more characters, if needed
1250 
1251             if (fCurrentEntity.position == fCurrentEntity.count) {
1252                 load(0, true);
1253             }
1254 
1255             boolean bNextEntity = false;
1256 
1257             while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
1258                 && (!bNextEntity))
1259             {
1260               System.arraycopy(fCurrentEntity.ch,
1261                                fCurrentEntity.position,
1262                                fCurrentEntity.ch,
1263                                0,
1264                                fCurrentEntity.count - fCurrentEntity.position);
1265 
1266               bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false);
1267               fCurrentEntity.position = 0;
1268               fCurrentEntity.startPosition = 0;
1269             }
1270 
1271             if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1272                 // something must be wrong with the input:  e.g., file ends in an unterminated comment
1273                 int length = fCurrentEntity.count - fCurrentEntity.position;
1274                 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1275                 fCurrentEntity.columnNumber += fCurrentEntity.count;
1276                 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1277                 fCurrentEntity.position = fCurrentEntity.count;
1278                 fCurrentEntity.startPosition = fCurrentEntity.count;
1279                 load(0, true);
1280                 return false;
1281             }
1282 
1283             // normalize newlines
1284             int offset = fCurrentEntity.position;
1285             int c = fCurrentEntity.ch[offset];
1286             int newlines = 0;
1287             if (c == '\n' || (c == '\r' && isExternal)) {
1288                 if (DEBUG_BUFFER) {
1289                     System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1290                     print();
1291                     System.out.println();
1292                 }
1293                 do {
1294                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1295                     if (c == '\r' && isExternal) {
1296                         newlines++;
1297                         fCurrentEntity.lineNumber++;
1298                         fCurrentEntity.columnNumber = 1;
1299                         if (fCurrentEntity.position == fCurrentEntity.count) {
1300                             offset = 0;
1301                             invokeListeners(newlines);
1302                             fCurrentEntity.position = newlines;
1303                             if (load(newlines, false)) {
1304                                 break;
1305                             }
1306                         }
1307                         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1308                             fCurrentEntity.position++;
1309                             offset++;
1310                         }
1311                         /*** NEWLINE NORMALIZATION ***/
1312                         else {
1313                             newlines++;
1314                         }
1315                     } else if (c == '\n') {
1316                         newlines++;
1317                         fCurrentEntity.lineNumber++;
1318                         fCurrentEntity.columnNumber = 1;
1319                         if (fCurrentEntity.position == fCurrentEntity.count) {
1320                             offset = 0;
1321                             invokeListeners(newlines);
1322                             fCurrentEntity.position = newlines;
1323                             fCurrentEntity.count = newlines;
1324                             if (load(newlines, false)) {
1325                                 break;
1326                             }
1327                         }
1328                     } else {
1329                         fCurrentEntity.position--;
1330                         break;
1331                     }
1332                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1333                 for (int i = offset; i < fCurrentEntity.position; i++) {
1334                     fCurrentEntity.ch[i] = '\n';
1335                 }
1336                 int length = fCurrentEntity.position - offset;
1337                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1338                     buffer.append(fCurrentEntity.ch, offset, length);
1339                     if (DEBUG_BUFFER) {
1340                         System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1341                         print();
1342                         System.out.println();
1343                     }
1344                     return true;
1345                 }
1346                 if (DEBUG_BUFFER) {
1347                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1348                     print();
1349                     System.out.println();
1350                 }
1351             }
1352 
1353             // iterate over buffer looking for delimiter
1354             OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1355                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1356                 if (c == charAt0) {
1357                     // looks like we just hit the delimiter
1358                     int delimOffset = fCurrentEntity.position - 1;
1359                     for (int i = 1; i < delimLen; i++) {
1360                         if (fCurrentEntity.position == fCurrentEntity.count) {
1361                             fCurrentEntity.position -= i;
1362                             break OUTER;
1363                         }
1364                         c = fCurrentEntity.ch[fCurrentEntity.position++];
1365                         if (delimiter.charAt(i) != c) {
1366                             fCurrentEntity.position -= i;
1367                             break;
1368                         }
1369                     }
1370                     if (fCurrentEntity.position == delimOffset + delimLen) {
1371                         done = true;
1372                         break;
1373                     }
1374                 } else if (c == '\n' || (isExternal && c == '\r')) {
1375                     fCurrentEntity.position--;
1376                     break;
1377                 } else if (XMLChar.isInvalid(c)) {
1378                     fCurrentEntity.position--;
1379                     int length = fCurrentEntity.position - offset;
1380                     fCurrentEntity.columnNumber += length - newlines;
1381                     buffer.append(fCurrentEntity.ch, offset, length);
1382                     return true;
1383                 }
1384             }
1385             int length = fCurrentEntity.position - offset;
1386             fCurrentEntity.columnNumber += length - newlines;
1387             if (done) {
1388                 length -= delimLen;
1389             }
1390             buffer.append(fCurrentEntity.ch, offset, length);
1391 
1392             // return true if string was skipped
1393             if (DEBUG_BUFFER) {
1394                 System.out.print(")scanData: ");
1395                 print();
1396                 System.out.println(" -> " + done);
1397             }
1398         } while (!done);
1399         return !done;
1400 
1401     } // scanData(String,XMLString)
1402 
1403     /**
1404      * Skips a character appearing immediately on the input.
1405      * <p>
1406      * <strong>Note:</strong> The character is consumed only if it matches
1407      * the specified character.
1408      *
1409      * @param c The character to skip.
1410      *
1411      * @return Returns true if the character was skipped.
1412      *
1413      * @throws IOException  Thrown if i/o error occurs.
1414      * @throws EOFException Thrown on end of file.
1415      */
1416     public boolean skipChar(int c) throws IOException {
1417         if (DEBUG_BUFFER) {
1418             System.out.print("(skipChar, '"+(char)c+"': ");
1419             print();
1420             System.out.println();
1421         }
1422 
1423         // load more characters, if needed
1424         if (fCurrentEntity.position == fCurrentEntity.count) {
1425             invokeListeners(0);
1426             load(0, true);
1427         }
1428 
1429         // skip character
1430         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1431         if (cc == c) {
1432             fCurrentEntity.position++;
1433             if (c == '\n') {
1434                 fCurrentEntity.lineNumber++;
1435                 fCurrentEntity.columnNumber = 1;
1436             } else {
1437                 fCurrentEntity.columnNumber++;
1438             }
1439             if (DEBUG_BUFFER) {
1440                 System.out.print(")skipChar, '"+(char)c+"': ");
1441                 print();
1442                 System.out.println(" -> true");
1443             }
1444             return true;
1445         } else if (c == '\n' && cc == '\r' && isExternal) {
1446             // handle newlines
1447             if (fCurrentEntity.position == fCurrentEntity.count) {
1448                 invokeListeners(1);
1449                 fCurrentEntity.ch[0] = (char)cc;
1450                 load(1, false);
1451             }
1452             fCurrentEntity.position++;
1453             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1454                 fCurrentEntity.position++;
1455             }
1456             fCurrentEntity.lineNumber++;
1457             fCurrentEntity.columnNumber = 1;
1458             if (DEBUG_BUFFER) {
1459                 System.out.print(")skipChar, '"+(char)c+"': ");
1460                 print();
1461                 System.out.println(" -> true");
1462             }
1463             return true;
1464         }
1465 
1466         // character was not skipped
1467         if (DEBUG_BUFFER) {
1468             System.out.print(")skipChar, '"+(char)c+"': ");
1469             print();
1470             System.out.println(" -> false");
1471         }
1472         return false;
1473 
1474     } // skipChar(int):boolean
1475 
1476     public boolean isSpace(char ch){
1477         return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
1478     }
1479     /**
1480      * Skips space characters appearing immediately on the input.
1481      * <p>
1482      * <strong>Note:</strong> The characters are consumed only if they are
1483      * space characters.
1484      *
1485      * @return Returns true if at least one space character was skipped.
1486      *
1487      * @throws IOException  Thrown if i/o error occurs.
1488      * @throws EOFException Thrown on end of file.
1489      *
1490      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1491      */
1492     public boolean skipSpaces() throws IOException {
1493         if (DEBUG_BUFFER) {
1494             System.out.print("(skipSpaces: ");
1495             print();
1496             System.out.println();
1497         }
1498         //boolean entityChanged = false;
1499         // load more characters, if needed
1500         if (fCurrentEntity.position == fCurrentEntity.count) {
1501             invokeListeners(0);
1502             load(0, true);
1503         }
1504 
1505         //we are doing this check only in skipSpace() because it is called by
1506         //fMiscDispatcher and we want the parser to exit gracefully when document
1507         //is well-formed.
1508         //it is possible that end of document is reached and
1509         //fCurrentEntity becomes null
1510         //nothing was read so entity changed  'false' should be returned.
1511         if(fCurrentEntity == null){
1512             return false ;
1513         }
1514 
1515         // skip spaces
1516         int c = fCurrentEntity.ch[fCurrentEntity.position];
1517         if (XMLChar.isSpace(c)) {
1518             do {
1519                 boolean entityChanged = false;
1520                 // handle newlines
1521                 if (c == '\n' || (isExternal && c == '\r')) {
1522                     fCurrentEntity.lineNumber++;
1523                     fCurrentEntity.columnNumber = 1;
1524                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1525                         invokeListeners(0);
1526                         fCurrentEntity.ch[0] = (char)c;
1527                         entityChanged = load(1, true);
1528                         if (!entityChanged){
1529                             // the load change the position to be 1,
1530                             // need to restore it when entity not changed
1531                             fCurrentEntity.position = 0;
1532                         }else if(fCurrentEntity == null){
1533                             return true ;
1534                         }
1535                     }
1536                     if (c == '\r' && isExternal) {
1537                         // REVISIT: Does this need to be updated to fix the
1538                         //          #x0D ^#x0A newline normalization problem? -Ac
1539                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1540                             fCurrentEntity.position--;
1541                         }
1542                     }
1543                 } else {
1544                     fCurrentEntity.columnNumber++;
1545                 }
1546                 // load more characters, if needed
1547                 if (!entityChanged){
1548                     fCurrentEntity.position++;
1549                 }
1550 
1551                 if (fCurrentEntity.position == fCurrentEntity.count) {
1552                     invokeListeners(0);
1553                     load(0, true);
1554 
1555                     //we are doing this check only in skipSpace() because it is called by
1556                     //fMiscDispatcher and we want the parser to exit gracefully when document
1557                     //is well-formed.
1558 
1559                     //it is possible that end of document is reached and
1560                     //fCurrentEntity becomes null
1561                     //nothing was read so entity changed  'false' should be returned.
1562                     if(fCurrentEntity == null){
1563                         return true ;
1564                     }
1565 
1566                 }
1567             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1568             if (DEBUG_BUFFER) {
1569                 System.out.print(")skipSpaces: ");
1570                 print();
1571                 System.out.println(" -> true");
1572             }
1573             return true;
1574         }
1575 
1576         // no spaces were found
1577         if (DEBUG_BUFFER) {
1578             System.out.print(")skipSpaces: ");
1579             print();
1580             System.out.println(" -> false");
1581         }
1582         return false;
1583 
1584     } // skipSpaces():boolean
1585 
1586 
1587     /**
1588      * @param legnth This function checks that following number of characters are available.
1589      * to the underlying buffer.
1590      * @return This function returns true if capacity asked is available.
1591      */
1592     public boolean arrangeCapacity(int length) throws IOException{
1593         return arrangeCapacity(length, false);
1594     }
1595 
1596     /**
1597      * @param legnth This function checks that following number of characters are available.
1598      * to the underlying buffer.
1599      * @param if the underlying function should change the entity
1600      * @return This function returns true if capacity asked is available.
1601      *
1602      */
1603     public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
1604         //check if the capacity is availble in the current buffer
1605         //count is no. of characters in the buffer   [x][m][l]
1606         //position is '0' based
1607         //System.out.println("fCurrent Entity " + fCurrentEntity);
1608         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1609             return true;
1610         }
1611         if(DEBUG_SKIP_STRING){
1612             System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1613             System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1614             System.out.println("length = " + length);
1615         }
1616         boolean entityChanged = false;
1617         //load more characters -- this function shouldn't change the entity
1618         while((fCurrentEntity.count - fCurrentEntity.position) < length){
1619             if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
1620                 invokeListeners(0);
1621                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
1622                 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
1623                 fCurrentEntity.position = 0;
1624             }
1625 
1626             if((fCurrentEntity.count - fCurrentEntity.position) < length){
1627                 int pos = fCurrentEntity.position;
1628                 invokeListeners(pos);
1629                 entityChanged = load(fCurrentEntity.count, changeEntity);
1630                 fCurrentEntity.position = pos;
1631                 if(entityChanged)break;
1632             }
1633             if(DEBUG_SKIP_STRING){
1634                 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1635                 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1636                 System.out.println("length = " + length);
1637             }
1638         }
1639         //load changes the position.. set it back to the point where we started.
1640 
1641         //after loading check again.
1642         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1643             return true;
1644         } else {
1645             return false;
1646         }
1647     }
1648 
1649     /**
1650      * Skips the specified string appearing immediately on the input.
1651      * <p>
1652      * <strong>Note:</strong> The characters are consumed only if all
1653      * the characters are skipped.
1654      *
1655      * @param s The string to skip.
1656      *
1657      * @return Returns true if the string was skipped.
1658      *
1659      * @throws IOException  Thrown if i/o error occurs.
1660      * @throws EOFException Thrown on end of file.
1661      */
1662     public boolean skipString(String s) throws IOException {
1663 
1664         final int length = s.length();
1665 
1666         //first make sure that required capacity is avaible
1667         if(arrangeCapacity(length, false)){
1668             final int beforeSkip = fCurrentEntity.position ;
1669             int afterSkip = fCurrentEntity.position + length - 1 ;
1670             if(DEBUG_SKIP_STRING){
1671                 System.out.println("skipString,length = " + s + "," + length);
1672                 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip,  length));
1673             }
1674 
1675             //s.charAt() indexes are 0 to 'Length -1' based.
1676             int i = length - 1 ;
1677             //check from reverse
1678             while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
1679                 if(afterSkip-- == beforeSkip){
1680                     fCurrentEntity.position = fCurrentEntity.position + length ;
1681                     fCurrentEntity.columnNumber += length;
1682                     return true;
1683                 }
1684             }
1685         }
1686 
1687         return false;
1688     } // skipString(String):boolean
1689 
1690     public boolean skipString(char [] s) throws IOException {
1691 
1692         final int length = s.length;
1693         //first make sure that required capacity is avaible
1694         if(arrangeCapacity(length, false)){
1695             int beforeSkip = fCurrentEntity.position ;
1696             int afterSkip = fCurrentEntity.position + length  ;
1697 
1698             if(DEBUG_SKIP_STRING){
1699                 System.out.println("skipString,length = " + new String(s) + "," + length);
1700                 System.out.println("skipString,length = " + new String(s) + "," + length);
1701             }
1702 
1703             for(int i=0;i<length;i++){
1704                 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
1705                    return false;
1706                 }
1707             }
1708             fCurrentEntity.position = fCurrentEntity.position + length ;
1709             fCurrentEntity.columnNumber += length;
1710             return true;
1711 
1712         }
1713 
1714         return false;
1715     }
1716 
1717     //
1718     // Locator methods
1719     //
1720     //
1721     // Private methods
1722     //
1723 
1724     /**
1725      * Loads a chunk of text.
1726      *
1727      * @param offset       The offset into the character buffer to
1728      *                     read the next batch of characters.
1729      * @param changeEntity True if the load should change entities
1730      *                     at the end of the entity, otherwise leave
1731      *                     the current entity in place and the entity
1732      *                     boundary will be signaled by the return
1733      *                     value.
1734      *
1735      * @returns Returns true if the entity changed as a result of this
1736      *          load operation.
1737      */
1738     final boolean load(int offset, boolean changeEntity)
1739     throws IOException {
1740         if (DEBUG_BUFFER) {
1741             System.out.print("(load, "+offset+": ");
1742             print();
1743             System.out.println();
1744         }
1745         //maintaing the count till last load
1746         fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
1747         // read characters
1748         int length = fCurrentEntity.ch.length - offset;
1749         if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) {
1750             length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE;
1751         }
1752         if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
1753         int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
1754         if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
1755 
1756         // reset count and position
1757         boolean entityChanged = false;
1758         if (count != -1) {
1759             if (count != 0) {
1760                 // record the last count
1761                 fCurrentEntity.fLastCount = count;
1762                 fCurrentEntity.count = count + offset;
1763                 fCurrentEntity.position = offset;
1764             }
1765         }
1766         // end of this entity
1767         else {
1768             fCurrentEntity.count = offset;
1769             fCurrentEntity.position = offset;
1770             entityChanged = true;
1771 
1772             if (changeEntity) {
1773                 //notify the entity manager about the end of entity
1774                 fEntityManager.endEntity();
1775                 //return if the current entity becomes null
1776                 if(fCurrentEntity == null){
1777                     throw END_OF_DOCUMENT_ENTITY;
1778                 }
1779                 // handle the trailing edges
1780                 if (fCurrentEntity.position == fCurrentEntity.count) {
1781                     load(0, true);
1782                 }
1783             }
1784 
1785         }
1786         if (DEBUG_BUFFER) {
1787             System.out.print(")load, "+offset+": ");
1788             print();
1789             System.out.println();
1790         }
1791 
1792         return entityChanged;
1793 
1794     } // load(int, boolean):boolean
1795 
1796     /**
1797      * Creates a reader capable of reading the given input stream in
1798      * the specified encoding.
1799      *
1800      * @param inputStream  The input stream.
1801      * @param encoding     The encoding name that the input stream is
1802      *                     encoded using. If the user has specified that
1803      *                     Java encoding names are allowed, then the
1804      *                     encoding name may be a Java encoding name;
1805      *                     otherwise, it is an ianaEncoding name.
1806      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
1807      *                      specify a byte order, this tells whether the order is bigEndian.  null menas
1808      *                      unknown or not relevant.
1809      *
1810      * @return Returns a reader.
1811      */
1812     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
1813     throws IOException {
1814 
1815         // normalize encoding name
1816         if (encoding == null) {
1817             encoding = "UTF-8";
1818         }
1819 
1820         // try to use an optimized reader
1821         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
1822         if (ENCODING.equals("UTF-8")) {
1823             if (DEBUG_ENCODINGS) {
1824                 System.out.println("$$$ creating UTF8Reader");
1825             }
1826             return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
1827         }
1828         if (ENCODING.equals("US-ASCII")) {
1829             if (DEBUG_ENCODINGS) {
1830                 System.out.println("$$$ creating ASCIIReader");
1831             }
1832             return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1833         }
1834         if(ENCODING.equals("ISO-10646-UCS-4")) {
1835             if(isBigEndian != null) {
1836                 boolean isBE = isBigEndian.booleanValue();
1837                 if(isBE) {
1838                     return new UCSReader(inputStream, UCSReader.UCS4BE);
1839                 } else {
1840                     return new UCSReader(inputStream, UCSReader.UCS4LE);
1841                 }
1842             } else {
1843                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1844                         "EncodingByteOrderUnsupported",
1845                         new Object[] { encoding },
1846                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1847             }
1848         }
1849         if(ENCODING.equals("ISO-10646-UCS-2")) {
1850             if(isBigEndian != null) { // sould never happen with this encoding...
1851                 boolean isBE = isBigEndian.booleanValue();
1852                 if(isBE) {
1853                     return new UCSReader(inputStream, UCSReader.UCS2BE);
1854                 } else {
1855                     return new UCSReader(inputStream, UCSReader.UCS2LE);
1856                 }
1857             } else {
1858                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1859                         "EncodingByteOrderUnsupported",
1860                         new Object[] { encoding },
1861                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1862             }
1863         }
1864 
1865         // check for valid name
1866         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
1867         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
1868         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
1869             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1870                     "EncodingDeclInvalid",
1871                     new Object[] { encoding },
1872                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
1873                     // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
1874                     //       because every byte is a valid ISO Latin 1 character.
1875                     //       It may not translate correctly but if we failed on
1876                     //       the encoding anyway, then we're expecting the content
1877                     //       of the document to be bad. This will just prevent an
1878                     //       invalid UTF-8 sequence to be detected. This is only
1879                     //       important when continue-after-fatal-error is turned
1880                     //       on. -Ac
1881                     encoding = "ISO-8859-1";
1882         }
1883 
1884         // try to use a Java reader
1885         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
1886         if (javaEncoding == null) {
1887             if(fAllowJavaEncodings) {
1888                 javaEncoding = encoding;
1889             } else {
1890                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1891                         "EncodingDeclInvalid",
1892                         new Object[] { encoding },
1893                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1894                         // see comment above.
1895                         javaEncoding = "ISO8859_1";
1896             }
1897         }
1898         else if (javaEncoding.equals("ASCII")) {
1899             if (DEBUG_ENCODINGS) {
1900                 System.out.println("$$$ creating ASCIIReader");
1901             }
1902             return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1903         }
1904 
1905         if (DEBUG_ENCODINGS) {
1906             System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
1907             if (javaEncoding == encoding) {
1908                 System.out.print(" (IANA encoding)");
1909             }
1910             System.out.println();
1911         }
1912         return new InputStreamReader(inputStream, javaEncoding);
1913 
1914     } // createReader(InputStream,String, Boolean): Reader
1915 
1916     /**
1917      * Returns the IANA encoding name that is auto-detected from
1918      * the bytes specified, with the endian-ness of that encoding where appropriate.
1919      *
1920      * @param b4    The first four bytes of the input.
1921      * @param count The number of bytes actually read.
1922      * @return a 2-element array:  the first element, an IANA-encoding string,
1923      *  the second element a Boolean which is true iff the document is big endian, false
1924      *  if it's little-endian, and null if the distinction isn't relevant.
1925      */
1926     protected Object[] getEncodingName(byte[] b4, int count) {
1927 
1928         if (count < 2) {
1929             return new Object[]{"UTF-8", null};
1930         }
1931 
1932         // UTF-16, with BOM
1933         int b0 = b4[0] & 0xFF;
1934         int b1 = b4[1] & 0xFF;
1935         if (b0 == 0xFE && b1 == 0xFF) {
1936             // UTF-16, big-endian
1937             return new Object [] {"UTF-16BE", new Boolean(true)};
1938         }
1939         if (b0 == 0xFF && b1 == 0xFE) {
1940             // UTF-16, little-endian
1941             return new Object [] {"UTF-16LE", new Boolean(false)};
1942         }
1943 
1944         // default to UTF-8 if we don't have enough bytes to make a
1945         // good determination of the encoding
1946         if (count < 3) {
1947             return new Object [] {"UTF-8", null};
1948         }
1949 
1950         // UTF-8 with a BOM
1951         int b2 = b4[2] & 0xFF;
1952         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1953             return new Object [] {"UTF-8", null};
1954         }
1955 
1956         // default to UTF-8 if we don't have enough bytes to make a
1957         // good determination of the encoding
1958         if (count < 4) {
1959             return new Object [] {"UTF-8", null};
1960         }
1961 
1962         // other encodings
1963         int b3 = b4[3] & 0xFF;
1964         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
1965             // UCS-4, big endian (1234)
1966             return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
1967         }
1968         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
1969             // UCS-4, little endian (4321)
1970             return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
1971         }
1972         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
1973             // UCS-4, unusual octet order (2143)
1974             // REVISIT: What should this be?
1975             return new Object [] {"ISO-10646-UCS-4", null};
1976         }
1977         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
1978             // UCS-4, unusual octect order (3412)
1979             // REVISIT: What should this be?
1980             return new Object [] {"ISO-10646-UCS-4", null};
1981         }
1982         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
1983             // UTF-16, big-endian, no BOM
1984             // (or could turn out to be UCS-2...
1985             // REVISIT: What should this be?
1986             return new Object [] {"UTF-16BE", new Boolean(true)};
1987         }
1988         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
1989             // UTF-16, little-endian, no BOM
1990             // (or could turn out to be UCS-2...
1991             return new Object [] {"UTF-16LE", new Boolean(false)};
1992         }
1993         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
1994             // EBCDIC
1995             // a la xerces1, return CP037 instead of EBCDIC here
1996             return new Object [] {"CP037", null};
1997         }
1998 
1999         // default encoding
2000         return new Object [] {"UTF-8", null};
2001 
2002     } // getEncodingName(byte[],int):Object[]
2003 
2004     /**
2005      * xxx not removing endEntity() so that i remember that we need to implement it.
2006      * Ends an entity.
2007      *
2008      * @throws XNIException Thrown by entity handler to signal an error.
2009      */
2010     //
2011     /** Prints the contents of the buffer. */
2012     final void print() {
2013         if (DEBUG_BUFFER) {
2014             if (fCurrentEntity != null) {
2015                 System.out.print('[');
2016                 System.out.print(fCurrentEntity.count);
2017                 System.out.print(' ');
2018                 System.out.print(fCurrentEntity.position);
2019                 if (fCurrentEntity.count > 0) {
2020                     System.out.print(" \"");
2021                     for (int i = 0; i < fCurrentEntity.count; i++) {
2022                         if (i == fCurrentEntity.position) {
2023                             System.out.print('^');
2024                         }
2025                         char c = fCurrentEntity.ch[i];
2026                         switch (c) {
2027                             case '\n': {
2028                                 System.out.print("\\n");
2029                                 break;
2030                             }
2031                             case '\r': {
2032                                 System.out.print("\\r");
2033                                 break;
2034                             }
2035                             case '\t': {
2036                                 System.out.print("\\t");
2037                                 break;
2038                             }
2039                             case '\\': {
2040                                 System.out.print("\\\\");
2041                                 break;
2042                             }
2043                             default: {
2044                                 System.out.print(c);
2045                             }
2046                         }
2047                     }
2048                     if (fCurrentEntity.position == fCurrentEntity.count) {
2049                         System.out.print('^');
2050                     }
2051                     System.out.print('"');
2052                 }
2053                 System.out.print(']');
2054                 System.out.print(" @ ");
2055                 System.out.print(fCurrentEntity.lineNumber);
2056                 System.out.print(',');
2057                 System.out.print(fCurrentEntity.columnNumber);
2058             } else {
2059                 System.out.print("*NO CURRENT ENTITY*");
2060             }
2061         }
2062     }
2063 
2064     /**
2065      * Registers the listener object and provides callback.
2066      * @param listener listener to which call back should be provided when scanner buffer
2067      * is being changed.
2068      */
2069     public void registerListener(XMLBufferListener listener) {
2070         if(!listeners.contains(listener))
2071             listeners.add(listener);
2072     }
2073 
2074     /**
2075      *
2076      * @param loadPos Starting position from which new data is being loaded into scanner buffer.
2077      */
2078     private void invokeListeners(int loadPos){
2079         for(int i=0;i<listeners.size();i++){
2080             XMLBufferListener listener =(XMLBufferListener) listeners.get(i);
2081             listener.refresh(loadPos);
2082         }
2083     }
2084 
2085     /**
2086      * Skips space characters appearing immediately on the input that would
2087      * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
2088      * normalization is performed. This is useful when scanning structures
2089      * such as the XMLDecl and TextDecl that can only contain US-ASCII
2090      * characters.
2091      * <p>
2092      * <strong>Note:</strong> The characters are consumed only if they would
2093      * match non-terminal S before end of line normalization is performed.
2094      *
2095      * @return Returns true if at least one space character was skipped.
2096      *
2097      * @throws IOException  Thrown if i/o error occurs.
2098      * @throws EOFException Thrown on end of file.
2099      *
2100      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
2101      */
2102     public final boolean skipDeclSpaces() throws IOException {
2103         if (DEBUG_BUFFER) {
2104             System.out.print("(skipDeclSpaces: ");
2105             //XMLEntityManager.print(fCurrentEntity);
2106             System.out.println();
2107         }
2108 
2109         // load more characters, if needed
2110         if (fCurrentEntity.position == fCurrentEntity.count) {
2111             load(0, true);
2112         }
2113 
2114         // skip spaces
2115         int c = fCurrentEntity.ch[fCurrentEntity.position];
2116         if (XMLChar.isSpace(c)) {
2117             boolean external = fCurrentEntity.isExternal();
2118             do {
2119                 boolean entityChanged = false;
2120                 // handle newlines
2121                 if (c == '\n' || (external && c == '\r')) {
2122                     fCurrentEntity.lineNumber++;
2123                     fCurrentEntity.columnNumber = 1;
2124                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
2125                         fCurrentEntity.ch[0] = (char)c;
2126                         entityChanged = load(1, true);
2127                         if (!entityChanged)
2128                             // the load change the position to be 1,
2129                             // need to restore it when entity not changed
2130                             fCurrentEntity.position = 0;
2131                     }
2132                     if (c == '\r' && external) {
2133                         // REVISIT: Does this need to be updated to fix the
2134                         //          #x0D ^#x0A newline normalization problem? -Ac
2135                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
2136                             fCurrentEntity.position--;
2137                         }
2138                     }
2139                     /*** NEWLINE NORMALIZATION ***
2140                      * else {
2141                      * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
2142                      * && external) {
2143                      * fCurrentEntity.position++;
2144                      * }
2145                      * }
2146                      * /***/
2147                 } else {
2148                     fCurrentEntity.columnNumber++;
2149                 }
2150                 // load more characters, if needed
2151                 if (!entityChanged)
2152                     fCurrentEntity.position++;
2153                 if (fCurrentEntity.position == fCurrentEntity.count) {
2154                     load(0, true);
2155                 }
2156             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
2157             if (DEBUG_BUFFER) {
2158                 System.out.print(")skipDeclSpaces: ");
2159                 //  XMLEntityManager.print(fCurrentEntity);
2160                 System.out.println(" -> true");
2161             }
2162             return true;
2163         }
2164 
2165         // no spaces were found
2166         if (DEBUG_BUFFER) {
2167             System.out.print(")skipDeclSpaces: ");
2168             //XMLEntityManager.print(fCurrentEntity);
2169             System.out.println(" -> false");
2170         }
2171         return false;
2172 
2173     } // skipDeclSpaces():boolean
2174 
2175 
2176 } // class XMLEntityScanner