1 /*
   2  * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 
   5 /*
   6  * Copyright 2005 The Apache Software Foundation.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl;
  22 
  23 
  24 
  25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
  26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
  27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
  28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  29 import com.sun.org.apache.xerces.internal.util.EncodingMap;
  30 import com.sun.org.apache.xerces.internal.util.SymbolTable;
  31 import com.sun.org.apache.xerces.internal.util.XMLChar;
  32 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  33 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer;
  34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
  35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit;
  36 import com.sun.org.apache.xerces.internal.xni.*;
  37 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  38 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  39 import com.sun.xml.internal.stream.Entity;
  40 import com.sun.xml.internal.stream.Entity.ScannedEntity;
  41 import com.sun.xml.internal.stream.XMLBufferListener;
  42 import java.io.EOFException;
  43 import java.io.IOException;
  44 import java.io.InputStream;
  45 import java.io.InputStreamReader;
  46 import java.io.Reader;
  47 import java.util.Locale;
  48 import java.util.Vector;
  49 
  50 /**
  51  * Implements the entity scanner methods.
  52  *
  53  * @author Neeraj Bajaj, Sun Microsystems
  54  * @author Andy Clark, IBM
  55  * @author Arnaud  Le Hors, IBM
  56  * @author K.Venugopal Sun Microsystems
  57  *
  58  */
  59 public class XMLEntityScanner implements XMLLocator  {
  60 
  61 
  62     protected Entity.ScannedEntity fCurrentEntity = null ;
  63     protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
  64 
  65     protected XMLEntityManager fEntityManager ;
  66 
  67     /** Security manager. */
  68     protected XMLSecurityManager fSecurityManager = null;
  69 
  70     /** Limit analyzer. */
  71     protected XMLLimitAnalyzer fLimitAnalyzer = null;
  72 
  73     /** Debug switching readers for encodings. */
  74     private static final boolean DEBUG_ENCODINGS = false;
  75     /** Listeners which should know when load is being called */
  76     private Vector listeners = new Vector();
  77 
  78     private static final boolean [] VALID_NAMES = new boolean[127];
  79 
  80     /**
  81      * Debug printing of buffer. This debugging flag works best when you
  82      * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
  83      * 64 characters.
  84      */
  85     private static final boolean DEBUG_BUFFER = false;
  86     private static final boolean DEBUG_SKIP_STRING = false;
  87     /**
  88      * To signal the end of the document entity, this exception will be thrown.
  89      */
  90     private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
  91         private static final long serialVersionUID = 980337771224675268L;
  92         public Throwable fillInStackTrace() {
  93             return this;
  94         }
  95     };
  96 
  97     protected SymbolTable fSymbolTable = null;
  98     protected XMLErrorReporter fErrorReporter = null;
  99     int [] whiteSpaceLookup = new int[100];
 100     int whiteSpaceLen = 0;
 101     boolean whiteSpaceInfoNeeded = true;
 102 
 103     /**
 104      * Allow Java encoding names. This feature identifier is:
 105      * http://apache.org/xml/features/allow-java-encodings
 106      */
 107     protected boolean fAllowJavaEncodings;
 108 
 109     //Will be used only during internal subsets.
 110     //for appending data.
 111 
 112     /** Property identifier: symbol table. */
 113     protected static final String SYMBOL_TABLE =
 114             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
 115 
 116     /** Property identifier: error reporter. */
 117     protected static final String ERROR_REPORTER =
 118             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
 119 
 120     /** Feature identifier: allow Java encodings. */
 121     protected static final String ALLOW_JAVA_ENCODINGS =
 122             Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
 123 
 124     protected PropertyManager fPropertyManager = null ;
 125 
 126     boolean isExternal = false;
 127     static {
 128 
 129         for(int i=0x0041;i<=0x005A ; i++){
 130             VALID_NAMES[i]=true;
 131         }
 132         for(int i=0x0061;i<=0x007A; i++){
 133             VALID_NAMES[i]=true;
 134         }
 135         for(int i=0x0030;i<=0x0039; i++){
 136             VALID_NAMES[i]=true;
 137         }
 138         VALID_NAMES[45]=true;
 139         VALID_NAMES[46]=true;
 140         VALID_NAMES[58]=true;
 141         VALID_NAMES[95]=true;
 142     }
 143     // SAPJVM: Remember, that the XML version has explicitly been set,
 144     // so that XMLStreamReader.getVersion() can find that out.
 145     boolean xmlVersionSetExplicitly = false;
 146     //
 147     // Constructors
 148     //
 149 
 150     /** Default constructor. */
 151     public XMLEntityScanner() {
 152     } // <init>()
 153 
 154 
 155     /**  private constructor, this class can only be instantiated within this class. Instance of this class should
 156      *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
 157      *    @see getEntityScanner()
 158      *    @see getEntityScanner(ScannedEntity)
 159      */
 160     public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
 161         fEntityManager = entityManager ;
 162         reset(propertyManager);
 163     } // <init>()
 164 
 165 
 166     // set buffer size:
 167     public final void setBufferSize(int size) {
 168         // REVISIT: Buffer size passed to entity scanner
 169         // was not being kept in synch with the actual size
 170         // of the buffers in each scanned entity. If any
 171         // of the buffers were actually resized, it was possible
 172         // that the parser would throw an ArrayIndexOutOfBoundsException
 173         // for documents which contained names which are longer than
 174         // the current buffer size. Conceivably the buffer size passed
 175         // to entity scanner could be used to determine a minimum size
 176         // for resizing, if doubling its size is smaller than this
 177         // minimum. -- mrglavas
 178         fBufferSize = size;
 179     }
 180 
 181     /**
 182      * Resets the components.
 183      */
 184     public void reset(PropertyManager propertyManager){
 185         fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
 186         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
 187         resetCommon();
 188     }
 189 
 190     /**
 191      * Resets the component. The component can query the component manager
 192      * about any features and properties that affect the operation of the
 193      * component.
 194      *
 195      * @param componentManager The component manager.
 196      *
 197      * @throws SAXException Thrown by component on initialization error.
 198      *                      For example, if a feature or property is
 199      *                      required for the operation of the component, the
 200      *                      component manager may throw a
 201      *                      SAXNotRecognizedException or a
 202      *                      SAXNotSupportedException.
 203      */
 204     public void reset(XMLComponentManager componentManager)
 205     throws XMLConfigurationException {
 206         // xerces features
 207         fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
 208 
 209         //xerces properties
 210         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
 211         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
 212         resetCommon();
 213     } // reset(XMLComponentManager)
 214 
 215 
 216     public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
 217             XMLErrorReporter reporter) {
 218         fCurrentEntity = null;
 219         fSymbolTable = symbolTable;
 220         fEntityManager = entityManager;
 221         fErrorReporter = reporter;
 222         fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
 223         fSecurityManager = fEntityManager.fSecurityManager;
 224     }
 225 
 226     private void resetCommon() {
 227         fCurrentEntity = null;
 228         whiteSpaceLen = 0;
 229         whiteSpaceInfoNeeded = true;
 230         listeners.clear();
 231         fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
 232         fSecurityManager = fEntityManager.fSecurityManager;
 233     }
 234 
 235     /**
 236      * Returns the XML version of the current entity. This will normally be the
 237      * value from the XML or text declaration or defaulted by the parser. Note that
 238      * that this value may be different than the version of the processing rules
 239      * applied to the current entity. For instance, an XML 1.1 document may refer to
 240      * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
 241      * document. Also note that, for a given entity, this value can only be considered
 242      * final once the XML or text declaration has been read or once it has been
 243      * determined that there is no such declaration.
 244      */
 245     public final String getXMLVersion() {
 246         if (fCurrentEntity != null) {
 247             return fCurrentEntity.xmlVersion;
 248         }
 249         return null;
 250     } // getXMLVersion():String
 251 
 252     /**
 253      * Sets the XML version. This method is used by the
 254      * scanners to report the value of the version pseudo-attribute
 255      * in an XML or text declaration.
 256      *
 257      * @param xmlVersion the XML version of the current entity
 258      */
 259     public final void setXMLVersion(String xmlVersion) {
 260         xmlVersionSetExplicitly = true; // SAPJVM
 261         fCurrentEntity.xmlVersion = xmlVersion;
 262     } // setXMLVersion(String)
 263 
 264 
 265     /** set the instance of current scanned entity.
 266      *   @param ScannedEntity
 267      */
 268 
 269     public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
 270         fCurrentEntity = scannedEntity ;
 271         if(fCurrentEntity != null){
 272             isExternal = fCurrentEntity.isExternal();
 273             if(DEBUG_BUFFER)
 274                 System.out.println("Current Entity is "+scannedEntity.name);
 275         }
 276     }
 277 
 278     public  Entity.ScannedEntity getCurrentEntity(){
 279         return fCurrentEntity ;
 280     }
 281     //
 282     // XMLEntityReader methods
 283     //
 284 
 285     /**
 286      * Returns the base system identifier of the currently scanned
 287      * entity, or null if none is available.
 288      */
 289     public final String getBaseSystemId() {
 290         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 291     } // getBaseSystemId():String
 292 
 293     /**
 294      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
 295      */
 296     public void setBaseSystemId(String systemId) {
 297         //no-op
 298     }
 299 
 300     ///////////// Locator methods start.
 301     public final int getLineNumber(){
 302         //if the entity is closed, we should return -1
 303         //xxx at first place why such call should be there...
 304         return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
 305     }
 306 
 307     /**
 308      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
 309      */
 310     public void setLineNumber(int line) {
 311         //no-op
 312     }
 313 
 314 
 315     public final int getColumnNumber(){
 316         //if the entity is closed, we should return -1
 317         //xxx at first place why such call should be there...
 318         return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
 319     }
 320 
 321     /**
 322      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
 323      */
 324     public void setColumnNumber(int col) {
 325         // no-op
 326     }
 327 
 328 
 329     public final int getCharacterOffset(){
 330         return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
 331     }
 332 
 333     /** Returns the expanded system identifier.  */
 334     public final String getExpandedSystemId() {
 335         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 336     }
 337 
 338     /**
 339      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
 340      */
 341     public void setExpandedSystemId(String systemId) {
 342         //no-op
 343     }
 344 
 345     /** Returns the literal system identifier.  */
 346     public final String getLiteralSystemId() {
 347         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
 348     }
 349 
 350     /**
 351      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
 352      */
 353     public void setLiteralSystemId(String systemId) {
 354         //no-op
 355     }
 356 
 357     /** Returns the public identifier.  */
 358     public final String getPublicId() {
 359         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
 360     }
 361 
 362     /**
 363      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
 364      */
 365     public void setPublicId(String publicId) {
 366         //no-op
 367     }
 368 
 369     ///////////////// Locator methods finished.
 370 
 371     /** the version of the current entity being scanned */
 372     public void setVersion(String version){
 373         fCurrentEntity.version = version;
 374     }
 375 
 376     public String getVersion(){
 377         if (fCurrentEntity != null)
 378             return fCurrentEntity.version ;
 379         return null;
 380     }
 381 
 382     /**
 383      * Returns the encoding of the current entity.
 384      * Note that, for a given entity, this value can only be
 385      * considered final once the encoding declaration has been read (or once it
 386      * has been determined that there is no such declaration) since, no encoding
 387      * having been specified on the XMLInputSource, the parser
 388      * will make an initial "guess" which could be in error.
 389      */
 390     public final String getEncoding() {
 391         if (fCurrentEntity != null) {
 392             return fCurrentEntity.encoding;
 393         }
 394         return null;
 395     } // getEncoding():String
 396 
 397     /**
 398      * Sets the encoding of the scanner. This method is used by the
 399      * scanners if the XMLDecl or TextDecl line contains an encoding
 400      * pseudo-attribute.
 401      * <p>
 402      * <strong>Note:</strong> The underlying character reader on the
 403      * current entity will be changed to accomodate the new encoding.
 404      * However, the new encoding is ignored if the current reader was
 405      * not constructed from an input stream (e.g. an external entity
 406      * that is resolved directly to the appropriate java.io.Reader
 407      * object).
 408      *
 409      * @param encoding The IANA encoding name of the new encoding.
 410      *
 411      * @throws IOException Thrown if the new encoding is not supported.
 412      *
 413      * @see com.sun.org.apache.xerces.internal.util.EncodingMap
 414      */
 415     public final void setEncoding(String encoding) throws IOException {
 416 
 417         if (DEBUG_ENCODINGS) {
 418             System.out.println("$$$ setEncoding: "+encoding);
 419         }
 420 
 421         if (fCurrentEntity.stream != null) {
 422             // if the encoding is the same, don't change the reader and
 423             // re-use the original reader used by the OneCharReader
 424             // NOTE: Besides saving an object, this overcomes deficiencies
 425             //       in the UTF-16 reader supplied with the standard Java
 426             //       distribution (up to and including 1.3). The UTF-16
 427             //       decoder buffers 8K blocks even when only asked to read
 428             //       a single char! -Ac
 429             if (fCurrentEntity.encoding == null ||
 430                     !fCurrentEntity.encoding.equals(encoding)) {
 431                 // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
 432                 // and we know the endian-ness, we shouldn't change readers.
 433                 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
 434                 // the endian-ness from the encoding we presently have.
 435                 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
 436                     String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
 437                     if(ENCODING.equals("UTF-16")) return;
 438                     if(ENCODING.equals("ISO-10646-UCS-4")) {
 439                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 440                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
 441                         } else {
 442                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
 443                         }
 444                         return;
 445                     }
 446                     if(ENCODING.equals("ISO-10646-UCS-2")) {
 447                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 448                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
 449                         } else {
 450                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
 451                         }
 452                         return;
 453                     }
 454                 }
 455                 // wrap a new reader around the input stream, changing
 456                 // the encoding
 457                 if (DEBUG_ENCODINGS) {
 458                     System.out.println("$$$ creating new reader from stream: "+
 459                             fCurrentEntity.stream);
 460                 }
 461                 //fCurrentEntity.stream.reset();
 462                 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
 463                 fCurrentEntity.encoding = encoding;
 464 
 465             } else {
 466                 if (DEBUG_ENCODINGS)
 467                     System.out.println("$$$ reusing old reader on stream");
 468             }
 469         }
 470 
 471     } // setEncoding(String)
 472 
 473     /** Returns true if the current entity being scanned is external. */
 474     public final boolean isExternal() {
 475         return fCurrentEntity.isExternal();
 476     } // isExternal():boolean
 477 
 478     public int getChar(int relative) throws IOException{
 479         if(arrangeCapacity(relative + 1, false)){
 480             return fCurrentEntity.ch[fCurrentEntity.position + relative];
 481         }else{
 482             return -1;
 483         }
 484     }//getChar()
 485 
 486     /**
 487      * Returns the next character on the input.
 488      * <p>
 489      * <strong>Note:</strong> The character is <em>not</em> consumed.
 490      *
 491      * @throws IOException  Thrown if i/o error occurs.
 492      * @throws EOFException Thrown on end of file.
 493      */
 494     public int peekChar() throws IOException {
 495         if (DEBUG_BUFFER) {
 496             System.out.print("(peekChar: ");
 497             print();
 498             System.out.println();
 499         }
 500 
 501         // load more characters, if needed
 502         if (fCurrentEntity.position == fCurrentEntity.count) {
 503             load(0, true, true);
 504         }
 505 
 506         // peek at character
 507         int c = fCurrentEntity.ch[fCurrentEntity.position];
 508 
 509         // return peeked character
 510         if (DEBUG_BUFFER) {
 511             System.out.print(")peekChar: ");
 512             print();
 513             if (isExternal) {
 514                 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
 515             } else {
 516                 System.out.println(" -> '"+(char)c+"'");
 517             }
 518         }
 519         if (isExternal) {
 520             return c != '\r' ? c : '\n';
 521         } else {
 522             return c;
 523         }
 524 
 525     } // peekChar():int
 526 
 527     /**
 528      * Returns the next character on the input.
 529      * <p>
 530      * <strong>Note:</strong> The character is consumed.
 531      *
 532      * @throws IOException  Thrown if i/o error occurs.
 533      * @throws EOFException Thrown on end of file.
 534      */
 535     public int scanChar() throws IOException {
 536         if (DEBUG_BUFFER) {
 537             System.out.print("(scanChar: ");
 538             print();
 539             System.out.println();
 540         }
 541 
 542         // load more characters, if needed
 543         if (fCurrentEntity.position == fCurrentEntity.count) {
 544             load(0, true, true);
 545         }
 546 
 547         // scan character
 548         int c = fCurrentEntity.ch[fCurrentEntity.position++];
 549         if (c == '\n' ||
 550                 (c == '\r' && isExternal)) {
 551             fCurrentEntity.lineNumber++;
 552             fCurrentEntity.columnNumber = 1;
 553             if (fCurrentEntity.position == fCurrentEntity.count) {
 554                 invokeListeners(1);
 555                 fCurrentEntity.ch[0] = (char)c;
 556                 load(1, false, false);
 557             }
 558             if (c == '\r' && isExternal) {
 559                 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
 560                     fCurrentEntity.position--;
 561                 }
 562                 c = '\n';
 563             }
 564         }
 565 
 566         // return character that was scanned
 567         if (DEBUG_BUFFER) {
 568             System.out.print(")scanChar: ");
 569             print();
 570             System.out.println(" -> '"+(char)c+"'");
 571         }
 572         fCurrentEntity.columnNumber++;
 573         return c;
 574 
 575     } // scanChar():int
 576 
 577     /**
 578      * Returns a string matching the NMTOKEN production appearing immediately
 579      * on the input as a symbol, or null if NMTOKEN Name string is present.
 580      * <p>
 581      * <strong>Note:</strong> The NMTOKEN characters are consumed.
 582      * <p>
 583      * <strong>Note:</strong> The string returned must be a symbol. The
 584      * SymbolTable can be used for this purpose.
 585      *
 586      * @throws IOException  Thrown if i/o error occurs.
 587      * @throws EOFException Thrown on end of file.
 588      *
 589      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 590      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 591      */
 592     public String scanNmtoken() throws IOException {
 593         if (DEBUG_BUFFER) {
 594             System.out.print("(scanNmtoken: ");
 595             print();
 596             System.out.println();
 597         }
 598 
 599         // load more characters, if needed
 600         if (fCurrentEntity.position == fCurrentEntity.count) {
 601             load(0, true, true);
 602         }
 603 
 604         // scan nmtoken
 605         int offset = fCurrentEntity.position;
 606         boolean vc = false;
 607         char c;
 608         while (true){
 609             //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
 610             c = fCurrentEntity.ch[fCurrentEntity.position];
 611             if(c < 127){
 612                 vc = VALID_NAMES[c];
 613             }else{
 614                 vc = XMLChar.isName(c);
 615             }
 616             if(!vc)break;
 617 
 618             if (++fCurrentEntity.position == fCurrentEntity.count) {
 619                 int length = fCurrentEntity.position - offset;
 620                 invokeListeners(length);
 621                 if (length == fCurrentEntity.fBufferSize) {
 622                     // bad luck we have to resize our buffer
 623                     char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 624                     System.arraycopy(fCurrentEntity.ch, offset,
 625                             tmp, 0, length);
 626                     fCurrentEntity.ch = tmp;
 627                     fCurrentEntity.fBufferSize *= 2;
 628                 } else {
 629                     System.arraycopy(fCurrentEntity.ch, offset,
 630                             fCurrentEntity.ch, 0, length);
 631                 }
 632                 offset = 0;
 633                 if (load(length, false, false)) {
 634                     break;
 635                 }
 636             }
 637         }
 638         int length = fCurrentEntity.position - offset;
 639         fCurrentEntity.columnNumber += length;
 640 
 641         // return nmtoken
 642         String symbol = null;
 643         if (length > 0) {
 644             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 645         }
 646         if (DEBUG_BUFFER) {
 647             System.out.print(")scanNmtoken: ");
 648             print();
 649             System.out.println(" -> "+String.valueOf(symbol));
 650         }
 651         return symbol;
 652 
 653     } // scanNmtoken():String
 654 
 655     /**
 656      * Returns a string matching the Name production appearing immediately
 657      * on the input as a symbol, or null if no Name string is present.
 658      * <p>
 659      * <strong>Note:</strong> The Name characters are consumed.
 660      * <p>
 661      * <strong>Note:</strong> The string returned must be a symbol. The
 662      * SymbolTable can be used for this purpose.
 663      *
 664      * @throws IOException  Thrown if i/o error occurs.
 665      * @throws EOFException Thrown on end of file.
 666      *
 667      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 668      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 669      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 670      */
 671     public String scanName() throws IOException {
 672         if (DEBUG_BUFFER) {
 673             System.out.print("(scanName: ");
 674             print();
 675             System.out.println();
 676         }
 677 
 678         // load more characters, if needed
 679         if (fCurrentEntity.position == fCurrentEntity.count) {
 680             load(0, true, true);
 681         }
 682 
 683         // scan name
 684         int offset = fCurrentEntity.position;
 685         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 686             if (++fCurrentEntity.position == fCurrentEntity.count) {
 687                 invokeListeners(1);
 688                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 689                 offset = 0;
 690                 if (load(1, false, false)) {
 691                     fCurrentEntity.columnNumber++;
 692                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 693 
 694                     if (DEBUG_BUFFER) {
 695                         System.out.print(")scanName: ");
 696                         print();
 697                         System.out.println(" -> "+String.valueOf(symbol));
 698                     }
 699                     return symbol;
 700                 }
 701             }
 702             boolean vc =false;
 703             while (true ){
 704                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 705                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 706                 if(c < 127){
 707                     vc = VALID_NAMES[c];
 708                 }else{
 709                     vc = XMLChar.isName(c);
 710                 }
 711                 if(!vc)break;
 712                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 713                     int length = fCurrentEntity.position - offset;
 714                     invokeListeners(length);
 715                     if (length == fCurrentEntity.fBufferSize) {
 716                         // bad luck we have to resize our buffer
 717                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 718                         System.arraycopy(fCurrentEntity.ch, offset,
 719                                 tmp, 0, length);
 720                         fCurrentEntity.ch = tmp;
 721                         fCurrentEntity.fBufferSize *= 2;
 722                     } else {
 723                         System.arraycopy(fCurrentEntity.ch, offset,
 724                                 fCurrentEntity.ch, 0, length);
 725                     }
 726                     offset = 0;
 727                     if (load(length, false, false)) {
 728                         break;
 729                     }
 730                 }
 731             }
 732         }
 733         int length = fCurrentEntity.position - offset;
 734         fCurrentEntity.columnNumber += length;
 735 
 736         // return name
 737         String symbol;
 738         if (length > 0) {
 739             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 740         } else
 741             symbol = null;
 742         if (DEBUG_BUFFER) {
 743             System.out.print(")scanName: ");
 744             print();
 745             System.out.println(" -> "+String.valueOf(symbol));
 746         }
 747         return symbol;
 748 
 749     } // scanName():String
 750 
 751     /**
 752      * Scans a qualified name from the input, setting the fields of the
 753      * QName structure appropriately.
 754      * <p>
 755      * <strong>Note:</strong> The qualified name characters are consumed.
 756      * <p>
 757      * <strong>Note:</strong> The strings used to set the values of the
 758      * QName structure must be symbols. The SymbolTable can be used for
 759      * this purpose.
 760      *
 761      * @param qname The qualified name structure to fill.
 762      *
 763      * @return Returns true if a qualified name appeared immediately on
 764      *         the input and was scanned, false otherwise.
 765      *
 766      * @throws IOException  Thrown if i/o error occurs.
 767      * @throws EOFException Thrown on end of file.
 768      *
 769      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 770      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 771      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 772      */
 773     public boolean scanQName(QName qname) throws IOException {
 774         if (DEBUG_BUFFER) {
 775             System.out.print("(scanQName, "+qname+": ");
 776             print();
 777             System.out.println();
 778         }
 779 
 780         // load more characters, if needed
 781         if (fCurrentEntity.position == fCurrentEntity.count) {
 782             load(0, true, true);
 783         }
 784 
 785         // scan qualified name
 786         int offset = fCurrentEntity.position;
 787 
 788         //making a check if if the specified character is a valid name start character
 789         //as defined by production [5] in the XML 1.0 specification.
 790         // Name ::= (Letter | '_' | ':') (NameChar)*
 791 
 792         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 793             if (++fCurrentEntity.position == fCurrentEntity.count) {
 794                 invokeListeners(1);
 795                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 796                 offset = 0;
 797 
 798                 if (load(1, false, false)) {
 799                     fCurrentEntity.columnNumber++;
 800                     //adding into symbol table.
 801                     //XXX We are trying to add single character in SymbolTable??????
 802                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 803                     qname.setValues(null, name, name, null);
 804                     if (DEBUG_BUFFER) {
 805                         System.out.print(")scanQName, "+qname+": ");
 806                         print();
 807                         System.out.println(" -> true");
 808                     }
 809                     return true;
 810                 }
 811             }
 812             int index = -1;
 813             boolean vc = false;
 814             while ( true){
 815 
 816                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 817                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 818                 if(c < 127){
 819                     vc = VALID_NAMES[c];
 820                 }else{
 821                     vc = XMLChar.isName(c);
 822                 }
 823                 if(!vc)break;
 824                 if (c == ':') {
 825                     if (index != -1) {
 826                         break;
 827                     }
 828                     index = fCurrentEntity.position;
 829                     //check prefix before further read
 830                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset);
 831                 }
 832                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 833                     int length = fCurrentEntity.position - offset;
 834                     //check localpart before loading more data
 835                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length - index - 1);
 836                     invokeListeners(length);
 837                     if (length == fCurrentEntity.fBufferSize) {
 838                         // bad luck we have to resize our buffer
 839                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 840                         System.arraycopy(fCurrentEntity.ch, offset,
 841                                 tmp, 0, length);
 842                         fCurrentEntity.ch = tmp;
 843                         fCurrentEntity.fBufferSize *= 2;
 844                     } else {
 845                         System.arraycopy(fCurrentEntity.ch, offset,
 846                                 fCurrentEntity.ch, 0, length);
 847                     }
 848                     if (index != -1) {
 849                         index = index - offset;
 850                     }
 851                     offset = 0;
 852                     if (load(length, false, false)) {
 853                         break;
 854                     }
 855                 }
 856             }
 857             int length = fCurrentEntity.position - offset;
 858             fCurrentEntity.columnNumber += length;
 859             if (length > 0) {
 860                 String prefix = null;
 861                 String localpart = null;
 862                 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
 863                         offset, length);
 864 
 865                 if (index != -1) {
 866                     int prefixLength = index - offset;
 867                     //check the result: prefix
 868                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength);
 869                     prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
 870                             offset, prefixLength);
 871                     int len = length - prefixLength - 1;
 872                     //check the result: localpart
 873                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len);
 874                     localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
 875                             index + 1, len);
 876 
 877                 } else {
 878                     localpart = rawname;
 879                     //check the result: localpart
 880                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
 881                 }
 882                 qname.setValues(prefix, localpart, rawname, null);
 883                 if (DEBUG_BUFFER) {
 884                     System.out.print(")scanQName, "+qname+": ");
 885                     print();
 886                     System.out.println(" -> true");
 887                 }
 888                 return true;
 889             }
 890         }
 891 
 892         // no qualified name found
 893         if (DEBUG_BUFFER) {
 894             System.out.print(")scanQName, "+qname+": ");
 895             print();
 896             System.out.println(" -> false");
 897         }
 898         return false;
 899 
 900     } // scanQName(QName):boolean
 901 
 902     /**
 903      * Checks whether the value of the specified Limit exceeds its limit
 904      *
 905      * @param limit The Limit to be checked.
 906      * @param entity The current entity.
 907      * @param offset The index of the first byte
 908      * @param length The length of the entity scanned.
 909      */
 910     protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) {
 911         fLimitAnalyzer.addValue(limit, null, length);
 912         if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) {
 913             fSecurityManager.debugPrint(fLimitAnalyzer);
 914             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(),
 915                     new Object[]{new String(entity.ch, offset, length),
 916                 fLimitAnalyzer.getTotalValue(limit),
 917                 fSecurityManager.getLimit(limit),
 918                 fSecurityManager.getStateLiteral(limit)},
 919                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
 920         }
 921     }
 922 
 923     /**
 924      * CHANGED:
 925      * Scans a range of parsed character data, This function appends the character data to
 926      * the supplied buffer.
 927      * <p>
 928      * <strong>Note:</strong> The characters are consumed.
 929      * <p>
 930      * <strong>Note:</strong> This method does not guarantee to return
 931      * the longest run of parsed character data. This method may return
 932      * before markup due to reaching the end of the input buffer or any
 933      * other reason.
 934      * <p>
 935      *
 936      * @param content The content structure to fill.
 937      *
 938      * @return Returns the next character on the input, if known. This
 939      *         value may be -1 but this does <em>note</em> designate
 940      *         end of file.
 941      *
 942      * @throws IOException  Thrown if i/o error occurs.
 943      * @throws EOFException Thrown on end of file.
 944      */
 945     public int scanContent(XMLString content) throws IOException {
 946         if (DEBUG_BUFFER) {
 947             System.out.print("(scanContent: ");
 948             print();
 949             System.out.println();
 950         }
 951 
 952         // load more characters, if needed
 953         if (fCurrentEntity.position == fCurrentEntity.count) {
 954             load(0, true, true);
 955         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 956             invokeListeners(0);
 957             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 958             load(1, false, false);
 959             fCurrentEntity.position = 0;
 960         }
 961 
 962         // normalize newlines
 963         int offset = fCurrentEntity.position;
 964         int c = fCurrentEntity.ch[offset];
 965         int newlines = 0;
 966         if (c == '\n' || (c == '\r' && isExternal)) {
 967             if (DEBUG_BUFFER) {
 968                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 969                 print();
 970                 System.out.println();
 971             }
 972             do {
 973                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 974                 if (c == '\r' && isExternal) {
 975                     newlines++;
 976                     fCurrentEntity.lineNumber++;
 977                     fCurrentEntity.columnNumber = 1;
 978                     if (fCurrentEntity.position == fCurrentEntity.count) {
 979                         offset = 0;
 980                         fCurrentEntity.position = newlines;
 981                         if (load(newlines, false, true)) {
 982                             break;
 983                         }
 984                     }
 985                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 986                         fCurrentEntity.position++;
 987                         offset++;
 988                     }
 989                     /*** NEWLINE NORMALIZATION ***/
 990                     else {
 991                         newlines++;
 992                     }
 993                 } else if (c == '\n') {
 994                     newlines++;
 995                     fCurrentEntity.lineNumber++;
 996                     fCurrentEntity.columnNumber = 1;
 997                     if (fCurrentEntity.position == fCurrentEntity.count) {
 998                         offset = 0;
 999                         fCurrentEntity.position = newlines;
1000                         if (load(newlines, false, true)) {
1001                             break;
1002                         }
1003                     }
1004                 } else {
1005                     fCurrentEntity.position--;
1006                     break;
1007                 }
1008             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1009             for (int i = offset; i < fCurrentEntity.position; i++) {
1010                 fCurrentEntity.ch[i] = '\n';
1011             }
1012             int length = fCurrentEntity.position - offset;
1013             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1014                 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
1015                 //on buffering the data..
1016                 content.setValues(fCurrentEntity.ch, offset, length);
1017                 //content.append(fCurrentEntity.ch, offset, length);
1018                 if (DEBUG_BUFFER) {
1019                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1020                     print();
1021                     System.out.println();
1022                 }
1023                 return -1;
1024             }
1025             if (DEBUG_BUFFER) {
1026                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1027                 print();
1028                 System.out.println();
1029             }
1030         }
1031 
1032         while (fCurrentEntity.position < fCurrentEntity.count) {
1033             c = fCurrentEntity.ch[fCurrentEntity.position++];
1034             if (!XMLChar.isContent(c)) {
1035                 fCurrentEntity.position--;
1036                 break;
1037             }
1038         }
1039         int length = fCurrentEntity.position - offset;
1040         fCurrentEntity.columnNumber += length - newlines;
1041         if (fCurrentEntity.isGE) {
1042             checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length);
1043         }
1044 
1045         //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
1046         //on buffering the data..
1047         content.setValues(fCurrentEntity.ch, offset, length);
1048         //content.append(fCurrentEntity.ch, offset, length);
1049         // return next character
1050         if (fCurrentEntity.position != fCurrentEntity.count) {
1051             c = fCurrentEntity.ch[fCurrentEntity.position];
1052             // REVISIT: Does this need to be updated to fix the
1053             //          #x0D ^#x0A newline normalization problem? -Ac
1054             if (c == '\r' && isExternal) {
1055                 c = '\n';
1056             }
1057         } else {
1058             c = -1;
1059         }
1060         if (DEBUG_BUFFER) {
1061             System.out.print(")scanContent: ");
1062             print();
1063             System.out.println(" -> '"+(char)c+"'");
1064         }
1065         return c;
1066 
1067     } // scanContent(XMLString):int
1068 
1069     /**
1070      * Scans a range of attribute value data, setting the fields of the
1071      * XMLString structure, appropriately.
1072      * <p>
1073      * <strong>Note:</strong> The characters are consumed.
1074      * <p>
1075      * <strong>Note:</strong> This method does not guarantee to return
1076      * the longest run of attribute value data. This method may return
1077      * before the quote character due to reaching the end of the input
1078      * buffer or any other reason.
1079      * <p>
1080      * <strong>Note:</strong> The fields contained in the XMLString
1081      * structure are not guaranteed to remain valid upon subsequent calls
1082      * to the entity scanner. Therefore, the caller is responsible for
1083      * immediately using the returned character data or making a copy of
1084      * the character data.
1085      *
1086      * @param quote   The quote character that signifies the end of the
1087      *                attribute value data.
1088      * @param content The content structure to fill.
1089      *
1090      * @return Returns the next character on the input, if known. This
1091      *         value may be -1 but this does <em>note</em> designate
1092      *         end of file.
1093      *
1094      * @throws IOException  Thrown if i/o error occurs.
1095      * @throws EOFException Thrown on end of file.
1096      */
1097     public int scanLiteral(int quote, XMLString content)
1098     throws IOException {
1099         if (DEBUG_BUFFER) {
1100             System.out.print("(scanLiteral, '"+(char)quote+"': ");
1101             print();
1102             System.out.println();
1103         }
1104         // load more characters, if needed
1105         if (fCurrentEntity.position == fCurrentEntity.count) {
1106             load(0, true, true);
1107         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1108             invokeListeners(0);
1109             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1110             load(1, false, false);
1111             fCurrentEntity.position = 0;
1112         }
1113 
1114         // normalize newlines
1115         int offset = fCurrentEntity.position;
1116         int c = fCurrentEntity.ch[offset];
1117         int newlines = 0;
1118         if(whiteSpaceInfoNeeded)
1119             whiteSpaceLen=0;
1120         if (c == '\n' || (c == '\r' && isExternal)) {
1121             if (DEBUG_BUFFER) {
1122                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1123                 print();
1124                 System.out.println();
1125             }
1126             do {
1127                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1128                 if (c == '\r' && isExternal) {
1129                     newlines++;
1130                     fCurrentEntity.lineNumber++;
1131                     fCurrentEntity.columnNumber = 1;
1132                     if (fCurrentEntity.position == fCurrentEntity.count) {
1133                         offset = 0;
1134                         fCurrentEntity.position = newlines;
1135                         if (load(newlines, false, true)) {
1136                             break;
1137                         }
1138                     }
1139                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1140                         fCurrentEntity.position++;
1141                         offset++;
1142                     }
1143                     /*** NEWLINE NORMALIZATION ***/
1144                     else {
1145                         newlines++;
1146                     }
1147                     /***/
1148                 } else if (c == '\n') {
1149                     newlines++;
1150                     fCurrentEntity.lineNumber++;
1151                     fCurrentEntity.columnNumber = 1;
1152                     if (fCurrentEntity.position == fCurrentEntity.count) {
1153                         offset = 0;
1154                         fCurrentEntity.position = newlines;
1155                         if (load(newlines, false, true)) {
1156                             break;
1157                         }
1158                     }
1159                     /*** NEWLINE NORMALIZATION ***
1160                      * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
1161                      * && external) {
1162                      * fCurrentEntity.position++;
1163                      * offset++;
1164                      * }
1165                      * /***/
1166                 } else {
1167                     fCurrentEntity.position--;
1168                     break;
1169                 }
1170             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1171             int i=0;
1172             for ( i = offset; i < fCurrentEntity.position; i++) {
1173                 fCurrentEntity.ch[i] = '\n';
1174                 storeWhiteSpace(i);
1175             }
1176 
1177             int length = fCurrentEntity.position - offset;
1178             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1179                 content.setValues(fCurrentEntity.ch, offset, length);
1180                 if (DEBUG_BUFFER) {
1181                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1182                     print();
1183                     System.out.println();
1184                 }
1185                 return -1;
1186             }
1187             if (DEBUG_BUFFER) {
1188                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1189                 print();
1190                 System.out.println();
1191             }
1192         }
1193 
1194         // scan literal value
1195         for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
1196             c = fCurrentEntity.ch[fCurrentEntity.position];
1197             if ((c == quote &&
1198                     (!fCurrentEntity.literal || isExternal)) ||
1199                     c == '%' || !XMLChar.isContent(c)) {
1200                 break;
1201             }
1202             if (whiteSpaceInfoNeeded && c == '\t') {
1203                 storeWhiteSpace(fCurrentEntity.position);
1204             }
1205         }
1206         int length = fCurrentEntity.position - offset;
1207         fCurrentEntity.columnNumber += length - newlines;
1208         if (fCurrentEntity.isGE) {
1209             checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length);
1210         }
1211         content.setValues(fCurrentEntity.ch, offset, length);
1212 
1213         // return next character
1214         if (fCurrentEntity.position != fCurrentEntity.count) {
1215             c = fCurrentEntity.ch[fCurrentEntity.position];
1216             // NOTE: We don't want to accidentally signal the
1217             //       end of the literal if we're expanding an
1218             //       entity appearing in the literal. -Ac
1219             if (c == quote && fCurrentEntity.literal) {
1220                 c = -1;
1221             }
1222         } else {
1223             c = -1;
1224         }
1225         if (DEBUG_BUFFER) {
1226             System.out.print(")scanLiteral, '"+(char)quote+"': ");
1227             print();
1228             System.out.println(" -> '"+(char)c+"'");
1229         }
1230         return c;
1231 
1232     } // scanLiteral(int,XMLString):int
1233 
1234     /**
1235      * Save whitespace information. Increase the whitespace buffer by 100
1236      * when needed.
1237      *
1238      * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR).
1239      *
1240      * @param whiteSpacePos position of a whitespace in the scanner entity buffer
1241      */
1242     private void storeWhiteSpace(int whiteSpacePos) {
1243         if (whiteSpaceLen >= whiteSpaceLookup.length) {
1244             int [] tmp = new int[whiteSpaceLookup.length + 100];
1245             System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
1246             whiteSpaceLookup = tmp;
1247         }
1248 
1249         whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos;
1250     }
1251 
1252     //CHANGED:
1253     /**
1254      * Scans a range of character data up to the specified delimiter,
1255      * setting the fields of the XMLString structure, appropriately.
1256      * <p>
1257      * <strong>Note:</strong> The characters are consumed.
1258      * <p>
1259      * <strong>Note:</strong> This assumes that the length of the delimiter
1260      * and that the delimiter contains at least one character.
1261      * <p>
1262      * <strong>Note:</strong> This method does not guarantee to return
1263      * the longest run of character data. This method may return before
1264      * the delimiter due to reaching the end of the input buffer or any
1265      * other reason.
1266      * <p>
1267      * @param delimiter The string that signifies the end of the character
1268      *                  data to be scanned.
1269      * @param buffer    The XMLStringBuffer to fill.
1270      *
1271      * @return Returns true if there is more data to scan, false otherwise.
1272      *
1273      * @throws IOException  Thrown if i/o error occurs.
1274      * @throws EOFException Thrown on end of file.
1275      */
1276     public boolean scanData(String delimiter, XMLStringBuffer buffer)
1277     throws IOException {
1278 
1279         boolean done = false;
1280         int delimLen = delimiter.length();
1281         char charAt0 = delimiter.charAt(0);
1282         do {
1283             if (DEBUG_BUFFER) {
1284                 System.out.print("(scanData: ");
1285                 print();
1286                 System.out.println();
1287             }
1288 
1289             // load more characters, if needed
1290 
1291             if (fCurrentEntity.position == fCurrentEntity.count) {
1292                 load(0, true, false);
1293             }
1294 
1295             boolean bNextEntity = false;
1296 
1297             while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
1298                 && (!bNextEntity))
1299             {
1300               System.arraycopy(fCurrentEntity.ch,
1301                                fCurrentEntity.position,
1302                                fCurrentEntity.ch,
1303                                0,
1304                                fCurrentEntity.count - fCurrentEntity.position);
1305 
1306               bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false);
1307               fCurrentEntity.position = 0;
1308               fCurrentEntity.startPosition = 0;
1309             }
1310 
1311             if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1312                 // something must be wrong with the input:  e.g., file ends in an unterminated comment
1313                 int length = fCurrentEntity.count - fCurrentEntity.position;
1314                 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1315                 fCurrentEntity.columnNumber += fCurrentEntity.count;
1316                 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1317                 fCurrentEntity.position = fCurrentEntity.count;
1318                 fCurrentEntity.startPosition = fCurrentEntity.count;
1319                 load(0, true, false);
1320                 return false;
1321             }
1322 
1323             // normalize newlines
1324             int offset = fCurrentEntity.position;
1325             int c = fCurrentEntity.ch[offset];
1326             int newlines = 0;
1327             if (c == '\n' || (c == '\r' && isExternal)) {
1328                 if (DEBUG_BUFFER) {
1329                     System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1330                     print();
1331                     System.out.println();
1332                 }
1333                 do {
1334                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1335                     if (c == '\r' && isExternal) {
1336                         newlines++;
1337                         fCurrentEntity.lineNumber++;
1338                         fCurrentEntity.columnNumber = 1;
1339                         if (fCurrentEntity.position == fCurrentEntity.count) {
1340                             offset = 0;
1341                             fCurrentEntity.position = newlines;
1342                             if (load(newlines, false, true)) {
1343                                 break;
1344                             }
1345                         }
1346                         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1347                             fCurrentEntity.position++;
1348                             offset++;
1349                         }
1350                         /*** NEWLINE NORMALIZATION ***/
1351                         else {
1352                             newlines++;
1353                         }
1354                     } else if (c == '\n') {
1355                         newlines++;
1356                         fCurrentEntity.lineNumber++;
1357                         fCurrentEntity.columnNumber = 1;
1358                         if (fCurrentEntity.position == fCurrentEntity.count) {
1359                             offset = 0;
1360                             fCurrentEntity.position = newlines;
1361                             fCurrentEntity.count = newlines;
1362                             if (load(newlines, false, true)) {
1363                                 break;
1364                             }
1365                         }
1366                     } else {
1367                         fCurrentEntity.position--;
1368                         break;
1369                     }
1370                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1371                 for (int i = offset; i < fCurrentEntity.position; i++) {
1372                     fCurrentEntity.ch[i] = '\n';
1373                 }
1374                 int length = fCurrentEntity.position - offset;
1375                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1376                     buffer.append(fCurrentEntity.ch, offset, length);
1377                     if (DEBUG_BUFFER) {
1378                         System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1379                         print();
1380                         System.out.println();
1381                     }
1382                     return true;
1383                 }
1384                 if (DEBUG_BUFFER) {
1385                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1386                     print();
1387                     System.out.println();
1388                 }
1389             }
1390 
1391             // iterate over buffer looking for delimiter
1392             OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1393                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1394                 if (c == charAt0) {
1395                     // looks like we just hit the delimiter
1396                     int delimOffset = fCurrentEntity.position - 1;
1397                     for (int i = 1; i < delimLen; i++) {
1398                         if (fCurrentEntity.position == fCurrentEntity.count) {
1399                             fCurrentEntity.position -= i;
1400                             break OUTER;
1401                         }
1402                         c = fCurrentEntity.ch[fCurrentEntity.position++];
1403                         if (delimiter.charAt(i) != c) {
1404                             fCurrentEntity.position -= i;
1405                             break;
1406                         }
1407                     }
1408                     if (fCurrentEntity.position == delimOffset + delimLen) {
1409                         done = true;
1410                         break;
1411                     }
1412                 } else if (c == '\n' || (isExternal && c == '\r')) {
1413                     fCurrentEntity.position--;
1414                     break;
1415                 } else if (XMLChar.isInvalid(c)) {
1416                     fCurrentEntity.position--;
1417                     int length = fCurrentEntity.position - offset;
1418                     fCurrentEntity.columnNumber += length - newlines;
1419                     buffer.append(fCurrentEntity.ch, offset, length);
1420                     return true;
1421                 }
1422             }
1423             int length = fCurrentEntity.position - offset;
1424             fCurrentEntity.columnNumber += length - newlines;
1425             if (done) {
1426                 length -= delimLen;
1427             }
1428             buffer.append(fCurrentEntity.ch, offset, length);
1429 
1430             // return true if string was skipped
1431             if (DEBUG_BUFFER) {
1432                 System.out.print(")scanData: ");
1433                 print();
1434                 System.out.println(" -> " + done);
1435             }
1436         } while (!done);
1437         return !done;
1438 
1439     } // scanData(String,XMLString)
1440 
1441     /**
1442      * Skips a character appearing immediately on the input.
1443      * <p>
1444      * <strong>Note:</strong> The character is consumed only if it matches
1445      * the specified character.
1446      *
1447      * @param c The character to skip.
1448      *
1449      * @return Returns true if the character was skipped.
1450      *
1451      * @throws IOException  Thrown if i/o error occurs.
1452      * @throws EOFException Thrown on end of file.
1453      */
1454     public boolean skipChar(int c) throws IOException {
1455         if (DEBUG_BUFFER) {
1456             System.out.print("(skipChar, '"+(char)c+"': ");
1457             print();
1458             System.out.println();
1459         }
1460 
1461         // load more characters, if needed
1462         if (fCurrentEntity.position == fCurrentEntity.count) {
1463             load(0, true, true);
1464         }
1465 
1466         // skip character
1467         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1468         if (cc == c) {
1469             fCurrentEntity.position++;
1470             if (c == '\n') {
1471                 fCurrentEntity.lineNumber++;
1472                 fCurrentEntity.columnNumber = 1;
1473             } else {
1474                 fCurrentEntity.columnNumber++;
1475             }
1476             if (DEBUG_BUFFER) {
1477                 System.out.print(")skipChar, '"+(char)c+"': ");
1478                 print();
1479                 System.out.println(" -> true");
1480             }
1481             return true;
1482         } else if (c == '\n' && cc == '\r' && isExternal) {
1483             // handle newlines
1484             if (fCurrentEntity.position == fCurrentEntity.count) {
1485                 invokeListeners(1);
1486                 fCurrentEntity.ch[0] = (char)cc;
1487                 load(1, false, false);
1488             }
1489             fCurrentEntity.position++;
1490             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1491                 fCurrentEntity.position++;
1492             }
1493             fCurrentEntity.lineNumber++;
1494             fCurrentEntity.columnNumber = 1;
1495             if (DEBUG_BUFFER) {
1496                 System.out.print(")skipChar, '"+(char)c+"': ");
1497                 print();
1498                 System.out.println(" -> true");
1499             }
1500             return true;
1501         }
1502 
1503         // character was not skipped
1504         if (DEBUG_BUFFER) {
1505             System.out.print(")skipChar, '"+(char)c+"': ");
1506             print();
1507             System.out.println(" -> false");
1508         }
1509         return false;
1510 
1511     } // skipChar(int):boolean
1512 
1513     public boolean isSpace(char ch){
1514         return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
1515     }
1516     /**
1517      * Skips space characters appearing immediately on the input.
1518      * <p>
1519      * <strong>Note:</strong> The characters are consumed only if they are
1520      * space characters.
1521      *
1522      * @return Returns true if at least one space character was skipped.
1523      *
1524      * @throws IOException  Thrown if i/o error occurs.
1525      * @throws EOFException Thrown on end of file.
1526      *
1527      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1528      */
1529     public boolean skipSpaces() throws IOException {
1530         if (DEBUG_BUFFER) {
1531             System.out.print("(skipSpaces: ");
1532             print();
1533             System.out.println();
1534         }
1535         //boolean entityChanged = false;
1536         // load more characters, if needed
1537         if (fCurrentEntity.position == fCurrentEntity.count) {
1538             load(0, true, true);
1539         }
1540 
1541         //we are doing this check only in skipSpace() because it is called by
1542         //fMiscDispatcher and we want the parser to exit gracefully when document
1543         //is well-formed.
1544         //it is possible that end of document is reached and
1545         //fCurrentEntity becomes null
1546         //nothing was read so entity changed  'false' should be returned.
1547         if(fCurrentEntity == null){
1548             return false ;
1549         }
1550 
1551         // skip spaces
1552         int c = fCurrentEntity.ch[fCurrentEntity.position];
1553         if (XMLChar.isSpace(c)) {
1554             do {
1555                 boolean entityChanged = false;
1556                 // handle newlines
1557                 if (c == '\n' || (isExternal && c == '\r')) {
1558                     fCurrentEntity.lineNumber++;
1559                     fCurrentEntity.columnNumber = 1;
1560                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1561                         invokeListeners(0);
1562                         fCurrentEntity.ch[0] = (char)c;
1563                         entityChanged = load(1, true, false);
1564                         if (!entityChanged){
1565                             // the load change the position to be 1,
1566                             // need to restore it when entity not changed
1567                             fCurrentEntity.position = 0;
1568                         }else if(fCurrentEntity == null){
1569                             return true ;
1570                         }
1571                     }
1572                     if (c == '\r' && isExternal) {
1573                         // REVISIT: Does this need to be updated to fix the
1574                         //          #x0D ^#x0A newline normalization problem? -Ac
1575                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1576                             fCurrentEntity.position--;
1577                         }
1578                     }
1579                 } else {
1580                     fCurrentEntity.columnNumber++;
1581                 }
1582                 // load more characters, if needed
1583                 if (!entityChanged){
1584                     fCurrentEntity.position++;
1585                 }
1586 
1587                 if (fCurrentEntity.position == fCurrentEntity.count) {
1588                     load(0, true, true);
1589 
1590                     //we are doing this check only in skipSpace() because it is called by
1591                     //fMiscDispatcher and we want the parser to exit gracefully when document
1592                     //is well-formed.
1593 
1594                     //it is possible that end of document is reached and
1595                     //fCurrentEntity becomes null
1596                     //nothing was read so entity changed  'false' should be returned.
1597                     if(fCurrentEntity == null){
1598                         return true ;
1599                     }
1600 
1601                 }
1602             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1603             if (DEBUG_BUFFER) {
1604                 System.out.print(")skipSpaces: ");
1605                 print();
1606                 System.out.println(" -> true");
1607             }
1608             return true;
1609         }
1610 
1611         // no spaces were found
1612         if (DEBUG_BUFFER) {
1613             System.out.print(")skipSpaces: ");
1614             print();
1615             System.out.println(" -> false");
1616         }
1617         return false;
1618 
1619     } // skipSpaces():boolean
1620 
1621 
1622     /**
1623      * @param legnth This function checks that following number of characters are available.
1624      * to the underlying buffer.
1625      * @return This function returns true if capacity asked is available.
1626      */
1627     public boolean arrangeCapacity(int length) throws IOException{
1628         return arrangeCapacity(length, false);
1629     }
1630 
1631     /**
1632      * @param legnth This function checks that following number of characters are available.
1633      * to the underlying buffer.
1634      * @param if the underlying function should change the entity
1635      * @return This function returns true if capacity asked is available.
1636      *
1637      */
1638     public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
1639         //check if the capacity is availble in the current buffer
1640         //count is no. of characters in the buffer   [x][m][l]
1641         //position is '0' based
1642         //System.out.println("fCurrent Entity " + fCurrentEntity);
1643         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1644             return true;
1645         }
1646         if(DEBUG_SKIP_STRING){
1647             System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1648             System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1649             System.out.println("length = " + length);
1650         }
1651         boolean entityChanged = false;
1652         //load more characters -- this function shouldn't change the entity
1653         while((fCurrentEntity.count - fCurrentEntity.position) < length){
1654             if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
1655                 invokeListeners(0);
1656                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
1657                 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
1658                 fCurrentEntity.position = 0;
1659             }
1660 
1661             if((fCurrentEntity.count - fCurrentEntity.position) < length){
1662                 int pos = fCurrentEntity.position;
1663                 invokeListeners(pos);
1664                 entityChanged = load(fCurrentEntity.count, changeEntity, false);
1665                 fCurrentEntity.position = pos;
1666                 if(entityChanged)break;
1667             }
1668             if(DEBUG_SKIP_STRING){
1669                 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1670                 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1671                 System.out.println("length = " + length);
1672             }
1673         }
1674         //load changes the position.. set it back to the point where we started.
1675 
1676         //after loading check again.
1677         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1678             return true;
1679         } else {
1680             return false;
1681         }
1682     }
1683 
1684     /**
1685      * Skips the specified string appearing immediately on the input.
1686      * <p>
1687      * <strong>Note:</strong> The characters are consumed only if all
1688      * the characters are skipped.
1689      *
1690      * @param s The string to skip.
1691      *
1692      * @return Returns true if the string was skipped.
1693      *
1694      * @throws IOException  Thrown if i/o error occurs.
1695      * @throws EOFException Thrown on end of file.
1696      */
1697     public boolean skipString(String s) throws IOException {
1698 
1699         final int length = s.length();
1700 
1701         //first make sure that required capacity is avaible
1702         if(arrangeCapacity(length, false)){
1703             final int beforeSkip = fCurrentEntity.position ;
1704             int afterSkip = fCurrentEntity.position + length - 1 ;
1705             if(DEBUG_SKIP_STRING){
1706                 System.out.println("skipString,length = " + s + "," + length);
1707                 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip,  length));
1708             }
1709 
1710             //s.charAt() indexes are 0 to 'Length -1' based.
1711             int i = length - 1 ;
1712             //check from reverse
1713             while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
1714                 if(afterSkip-- == beforeSkip){
1715                     fCurrentEntity.position = fCurrentEntity.position + length ;
1716                     fCurrentEntity.columnNumber += length;
1717                     return true;
1718                 }
1719             }
1720         }
1721 
1722         return false;
1723     } // skipString(String):boolean
1724 
1725     public boolean skipString(char [] s) throws IOException {
1726 
1727         final int length = s.length;
1728         //first make sure that required capacity is avaible
1729         if(arrangeCapacity(length, false)){
1730             int beforeSkip = fCurrentEntity.position ;
1731             int afterSkip = fCurrentEntity.position + length  ;
1732 
1733             if(DEBUG_SKIP_STRING){
1734                 System.out.println("skipString,length = " + new String(s) + "," + length);
1735                 System.out.println("skipString,length = " + new String(s) + "," + length);
1736             }
1737 
1738             for(int i=0;i<length;i++){
1739                 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
1740                    return false;
1741                 }
1742             }
1743             fCurrentEntity.position = fCurrentEntity.position + length ;
1744             fCurrentEntity.columnNumber += length;
1745             return true;
1746 
1747         }
1748 
1749         return false;
1750     }
1751 
1752     //
1753     // Locator methods
1754     //
1755     //
1756     // Private methods
1757     //
1758 
1759     /**
1760      * Loads a chunk of text.
1761      *
1762      * @param offset       The offset into the character buffer to
1763      *                     read the next batch of characters.
1764      * @param changeEntity True if the load should change entities
1765      *                     at the end of the entity, otherwise leave
1766      *                     the current entity in place and the entity
1767      *                     boundary will be signaled by the return
1768      *                     value.
1769      * @param notify       Determine whether to notify listeners of
1770      *                     the event
1771      *
1772      * @returns Returns true if the entity changed as a result of this
1773      *          load operation.
1774      */
1775     final boolean load(int offset, boolean changeEntity, boolean notify)
1776     throws IOException {
1777         if (DEBUG_BUFFER) {
1778             System.out.print("(load, "+offset+": ");
1779             print();
1780             System.out.println();
1781         }
1782         if (notify) {
1783             invokeListeners(offset);
1784         }
1785         //maintaing the count till last load
1786         fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
1787         // read characters
1788         int length = fCurrentEntity.ch.length - offset;
1789         if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) {
1790             length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE;
1791         }
1792         if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
1793         int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
1794         if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
1795 
1796         // reset count and position
1797         boolean entityChanged = false;
1798         if (count != -1) {
1799             if (count != 0) {
1800                 // record the last count
1801                 fCurrentEntity.fLastCount = count;
1802                 fCurrentEntity.count = count + offset;
1803                 fCurrentEntity.position = offset;
1804             }
1805         }
1806         // end of this entity
1807         else {
1808             fCurrentEntity.count = offset;
1809             fCurrentEntity.position = offset;
1810             entityChanged = true;
1811 
1812             if (changeEntity) {
1813                 //notify the entity manager about the end of entity
1814                 fEntityManager.endEntity();
1815                 //return if the current entity becomes null
1816                 if(fCurrentEntity == null){
1817                     throw END_OF_DOCUMENT_ENTITY;
1818                 }
1819                 // handle the trailing edges
1820                 if (fCurrentEntity.position == fCurrentEntity.count) {
1821                     load(0, true, false);
1822                 }
1823             }
1824 
1825         }
1826         if (DEBUG_BUFFER) {
1827             System.out.print(")load, "+offset+": ");
1828             print();
1829             System.out.println();
1830         }
1831 
1832         return entityChanged;
1833 
1834     } // load(int, boolean):boolean
1835 
1836     /**
1837      * Creates a reader capable of reading the given input stream in
1838      * the specified encoding.
1839      *
1840      * @param inputStream  The input stream.
1841      * @param encoding     The encoding name that the input stream is
1842      *                     encoded using. If the user has specified that
1843      *                     Java encoding names are allowed, then the
1844      *                     encoding name may be a Java encoding name;
1845      *                     otherwise, it is an ianaEncoding name.
1846      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
1847      *                      specify a byte order, this tells whether the order is bigEndian.  null menas
1848      *                      unknown or not relevant.
1849      *
1850      * @return Returns a reader.
1851      */
1852     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
1853     throws IOException {
1854 
1855         // normalize encoding name
1856         if (encoding == null) {
1857             encoding = "UTF-8";
1858         }
1859 
1860         // try to use an optimized reader
1861         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
1862         if (ENCODING.equals("UTF-8")) {
1863             if (DEBUG_ENCODINGS) {
1864                 System.out.println("$$$ creating UTF8Reader");
1865             }
1866             return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
1867         }
1868         if (ENCODING.equals("US-ASCII")) {
1869             if (DEBUG_ENCODINGS) {
1870                 System.out.println("$$$ creating ASCIIReader");
1871             }
1872             return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1873         }
1874         if(ENCODING.equals("ISO-10646-UCS-4")) {
1875             if(isBigEndian != null) {
1876                 boolean isBE = isBigEndian.booleanValue();
1877                 if(isBE) {
1878                     return new UCSReader(inputStream, UCSReader.UCS4BE);
1879                 } else {
1880                     return new UCSReader(inputStream, UCSReader.UCS4LE);
1881                 }
1882             } else {
1883                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1884                         "EncodingByteOrderUnsupported",
1885                         new Object[] { encoding },
1886                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1887             }
1888         }
1889         if(ENCODING.equals("ISO-10646-UCS-2")) {
1890             if(isBigEndian != null) { // sould never happen with this encoding...
1891                 boolean isBE = isBigEndian.booleanValue();
1892                 if(isBE) {
1893                     return new UCSReader(inputStream, UCSReader.UCS2BE);
1894                 } else {
1895                     return new UCSReader(inputStream, UCSReader.UCS2LE);
1896                 }
1897             } else {
1898                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1899                         "EncodingByteOrderUnsupported",
1900                         new Object[] { encoding },
1901                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1902             }
1903         }
1904 
1905         // check for valid name
1906         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
1907         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
1908         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
1909             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1910                     "EncodingDeclInvalid",
1911                     new Object[] { encoding },
1912                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
1913                     // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
1914                     //       because every byte is a valid ISO Latin 1 character.
1915                     //       It may not translate correctly but if we failed on
1916                     //       the encoding anyway, then we're expecting the content
1917                     //       of the document to be bad. This will just prevent an
1918                     //       invalid UTF-8 sequence to be detected. This is only
1919                     //       important when continue-after-fatal-error is turned
1920                     //       on. -Ac
1921                     encoding = "ISO-8859-1";
1922         }
1923 
1924         // try to use a Java reader
1925         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
1926         if (javaEncoding == null) {
1927             if(fAllowJavaEncodings) {
1928                 javaEncoding = encoding;
1929             } else {
1930                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1931                         "EncodingDeclInvalid",
1932                         new Object[] { encoding },
1933                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1934                         // see comment above.
1935                         javaEncoding = "ISO8859_1";
1936             }
1937         }
1938         else if (javaEncoding.equals("ASCII")) {
1939             if (DEBUG_ENCODINGS) {
1940                 System.out.println("$$$ creating ASCIIReader");
1941             }
1942             return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1943         }
1944 
1945         if (DEBUG_ENCODINGS) {
1946             System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
1947             if (javaEncoding == encoding) {
1948                 System.out.print(" (IANA encoding)");
1949             }
1950             System.out.println();
1951         }
1952         return new InputStreamReader(inputStream, javaEncoding);
1953 
1954     } // createReader(InputStream,String, Boolean): Reader
1955 
1956     /**
1957      * Returns the IANA encoding name that is auto-detected from
1958      * the bytes specified, with the endian-ness of that encoding where appropriate.
1959      *
1960      * @param b4    The first four bytes of the input.
1961      * @param count The number of bytes actually read.
1962      * @return a 2-element array:  the first element, an IANA-encoding string,
1963      *  the second element a Boolean which is true iff the document is big endian, false
1964      *  if it's little-endian, and null if the distinction isn't relevant.
1965      */
1966     protected Object[] getEncodingName(byte[] b4, int count) {
1967 
1968         if (count < 2) {
1969             return new Object[]{"UTF-8", null};
1970         }
1971 
1972         // UTF-16, with BOM
1973         int b0 = b4[0] & 0xFF;
1974         int b1 = b4[1] & 0xFF;
1975         if (b0 == 0xFE && b1 == 0xFF) {
1976             // UTF-16, big-endian
1977             return new Object [] {"UTF-16BE", new Boolean(true)};
1978         }
1979         if (b0 == 0xFF && b1 == 0xFE) {
1980             // UTF-16, little-endian
1981             return new Object [] {"UTF-16LE", new Boolean(false)};
1982         }
1983 
1984         // default to UTF-8 if we don't have enough bytes to make a
1985         // good determination of the encoding
1986         if (count < 3) {
1987             return new Object [] {"UTF-8", null};
1988         }
1989 
1990         // UTF-8 with a BOM
1991         int b2 = b4[2] & 0xFF;
1992         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1993             return new Object [] {"UTF-8", null};
1994         }
1995 
1996         // default to UTF-8 if we don't have enough bytes to make a
1997         // good determination of the encoding
1998         if (count < 4) {
1999             return new Object [] {"UTF-8", null};
2000         }
2001 
2002         // other encodings
2003         int b3 = b4[3] & 0xFF;
2004         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
2005             // UCS-4, big endian (1234)
2006             return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
2007         }
2008         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
2009             // UCS-4, little endian (4321)
2010             return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
2011         }
2012         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
2013             // UCS-4, unusual octet order (2143)
2014             // REVISIT: What should this be?
2015             return new Object [] {"ISO-10646-UCS-4", null};
2016         }
2017         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
2018             // UCS-4, unusual octect order (3412)
2019             // REVISIT: What should this be?
2020             return new Object [] {"ISO-10646-UCS-4", null};
2021         }
2022         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2023             // UTF-16, big-endian, no BOM
2024             // (or could turn out to be UCS-2...
2025             // REVISIT: What should this be?
2026             return new Object [] {"UTF-16BE", new Boolean(true)};
2027         }
2028         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2029             // UTF-16, little-endian, no BOM
2030             // (or could turn out to be UCS-2...
2031             return new Object [] {"UTF-16LE", new Boolean(false)};
2032         }
2033         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
2034             // EBCDIC
2035             // a la xerces1, return CP037 instead of EBCDIC here
2036             return new Object [] {"CP037", null};
2037         }
2038 
2039         // default encoding
2040         return new Object [] {"UTF-8", null};
2041 
2042     } // getEncodingName(byte[],int):Object[]
2043 
2044     /**
2045      * xxx not removing endEntity() so that i remember that we need to implement it.
2046      * Ends an entity.
2047      *
2048      * @throws XNIException Thrown by entity handler to signal an error.
2049      */
2050     //
2051     /** Prints the contents of the buffer. */
2052     final void print() {
2053         if (DEBUG_BUFFER) {
2054             if (fCurrentEntity != null) {
2055                 System.out.print('[');
2056                 System.out.print(fCurrentEntity.count);
2057                 System.out.print(' ');
2058                 System.out.print(fCurrentEntity.position);
2059                 if (fCurrentEntity.count > 0) {
2060                     System.out.print(" \"");
2061                     for (int i = 0; i < fCurrentEntity.count; i++) {
2062                         if (i == fCurrentEntity.position) {
2063                             System.out.print('^');
2064                         }
2065                         char c = fCurrentEntity.ch[i];
2066                         switch (c) {
2067                             case '\n': {
2068                                 System.out.print("\\n");
2069                                 break;
2070                             }
2071                             case '\r': {
2072                                 System.out.print("\\r");
2073                                 break;
2074                             }
2075                             case '\t': {
2076                                 System.out.print("\\t");
2077                                 break;
2078                             }
2079                             case '\\': {
2080                                 System.out.print("\\\\");
2081                                 break;
2082                             }
2083                             default: {
2084                                 System.out.print(c);
2085                             }
2086                         }
2087                     }
2088                     if (fCurrentEntity.position == fCurrentEntity.count) {
2089                         System.out.print('^');
2090                     }
2091                     System.out.print('"');
2092                 }
2093                 System.out.print(']');
2094                 System.out.print(" @ ");
2095                 System.out.print(fCurrentEntity.lineNumber);
2096                 System.out.print(',');
2097                 System.out.print(fCurrentEntity.columnNumber);
2098             } else {
2099                 System.out.print("*NO CURRENT ENTITY*");
2100             }
2101         }
2102     }
2103 
2104     /**
2105      * Registers the listener object and provides callback.
2106      * @param listener listener to which call back should be provided when scanner buffer
2107      * is being changed.
2108      */
2109     public void registerListener(XMLBufferListener listener) {
2110         if(!listeners.contains(listener))
2111             listeners.add(listener);
2112     }
2113 
2114     /**
2115      *
2116      * @param loadPos Starting position from which new data is being loaded into scanner buffer.
2117      */
2118     public void invokeListeners(int loadPos){
2119         for(int i=0;i<listeners.size();i++){
2120             XMLBufferListener listener =(XMLBufferListener) listeners.get(i);
2121             listener.refresh(loadPos);
2122         }
2123     }
2124 
2125     /**
2126      * Skips space characters appearing immediately on the input that would
2127      * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
2128      * normalization is performed. This is useful when scanning structures
2129      * such as the XMLDecl and TextDecl that can only contain US-ASCII
2130      * characters.
2131      * <p>
2132      * <strong>Note:</strong> The characters are consumed only if they would
2133      * match non-terminal S before end of line normalization is performed.
2134      *
2135      * @return Returns true if at least one space character was skipped.
2136      *
2137      * @throws IOException  Thrown if i/o error occurs.
2138      * @throws EOFException Thrown on end of file.
2139      *
2140      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
2141      */
2142     public final boolean skipDeclSpaces() throws IOException {
2143         if (DEBUG_BUFFER) {
2144             System.out.print("(skipDeclSpaces: ");
2145             //XMLEntityManager.print(fCurrentEntity);
2146             System.out.println();
2147         }
2148 
2149         // load more characters, if needed
2150         if (fCurrentEntity.position == fCurrentEntity.count) {
2151             load(0, true, false);
2152         }
2153 
2154         // skip spaces
2155         int c = fCurrentEntity.ch[fCurrentEntity.position];
2156         if (XMLChar.isSpace(c)) {
2157             boolean external = fCurrentEntity.isExternal();
2158             do {
2159                 boolean entityChanged = false;
2160                 // handle newlines
2161                 if (c == '\n' || (external && c == '\r')) {
2162                     fCurrentEntity.lineNumber++;
2163                     fCurrentEntity.columnNumber = 1;
2164                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
2165                         fCurrentEntity.ch[0] = (char)c;
2166                         entityChanged = load(1, true, false);
2167                         if (!entityChanged)
2168                             // the load change the position to be 1,
2169                             // need to restore it when entity not changed
2170                             fCurrentEntity.position = 0;
2171                     }
2172                     if (c == '\r' && external) {
2173                         // REVISIT: Does this need to be updated to fix the
2174                         //          #x0D ^#x0A newline normalization problem? -Ac
2175                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
2176                             fCurrentEntity.position--;
2177                         }
2178                     }
2179                     /*** NEWLINE NORMALIZATION ***
2180                      * else {
2181                      * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
2182                      * && external) {
2183                      * fCurrentEntity.position++;
2184                      * }
2185                      * }
2186                      * /***/
2187                 } else {
2188                     fCurrentEntity.columnNumber++;
2189                 }
2190                 // load more characters, if needed
2191                 if (!entityChanged)
2192                     fCurrentEntity.position++;
2193                 if (fCurrentEntity.position == fCurrentEntity.count) {
2194                     load(0, true, false);
2195                 }
2196             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
2197             if (DEBUG_BUFFER) {
2198                 System.out.print(")skipDeclSpaces: ");
2199                 //  XMLEntityManager.print(fCurrentEntity);
2200                 System.out.println(" -> true");
2201             }
2202             return true;
2203         }
2204 
2205         // no spaces were found
2206         if (DEBUG_BUFFER) {
2207             System.out.print(")skipDeclSpaces: ");
2208             //XMLEntityManager.print(fCurrentEntity);
2209             System.out.println(" -> false");
2210         }
2211         return false;
2212 
2213     } // skipDeclSpaces():boolean
2214 
2215 
2216 } // class XMLEntityScanner