1 /*
   2  * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *     http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 package com.sun.org.apache.xerces.internal.impl;
  23 
  24 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
  25 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
  26 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
  27 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  28 import com.sun.org.apache.xerces.internal.util.EncodingMap;
  29 import com.sun.org.apache.xerces.internal.util.SymbolTable;
  30 import com.sun.org.apache.xerces.internal.util.XMLChar;
  31 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  32 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer;
  33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
  34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit;
  35 import com.sun.org.apache.xerces.internal.xni.*;
  36 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  37 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  38 import com.sun.xml.internal.stream.Entity;
  39 import com.sun.xml.internal.stream.Entity.ScannedEntity;
  40 import com.sun.xml.internal.stream.XMLBufferListener;
  41 import java.io.EOFException;
  42 import java.io.IOException;
  43 import java.io.InputStream;
  44 import java.io.InputStreamReader;
  45 import java.io.Reader;
  46 import java.util.ArrayList;
  47 import java.util.Locale;
  48 
  49 /**
  50  * Implements the entity scanner methods.
  51  *
  52  * @author Neeraj Bajaj, Sun Microsystems
  53  * @author Andy Clark, IBM
  54  * @author Arnaud  Le Hors, IBM
  55  * @author K.Venugopal Sun Microsystems
  56  *
  57  */
  58 public class XMLEntityScanner implements XMLLocator  {
  59 
  60     protected Entity.ScannedEntity fCurrentEntity = null;
  61     protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
  62 
  63     protected XMLEntityManager fEntityManager;
  64 
  65     /** Security manager. */
  66     protected XMLSecurityManager fSecurityManager = null;
  67 
  68     /** Limit analyzer. */
  69     protected XMLLimitAnalyzer fLimitAnalyzer = null;
  70 
  71     /** Debug switching readers for encodings. */
  72     private static final boolean DEBUG_ENCODINGS = false;
  73 
  74     /** Listeners which should know when load is being called */
  75     private ArrayList<XMLBufferListener> listeners = new ArrayList<>();
  76 
  77     private static final boolean [] VALID_NAMES = new boolean[127];
  78 
  79     /**
  80      * Debug printing of buffer. This debugging flag works best when you
  81      * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
  82      * 64 characters.
  83      */
  84     private static final boolean DEBUG_BUFFER = false;
  85     private static final boolean DEBUG_SKIP_STRING = false;
  86     /**
  87      * To signal the end of the document entity, this exception will be thrown.
  88      */
  89     private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
  90         private static final long serialVersionUID = 980337771224675268L;
  91         public Throwable fillInStackTrace() {
  92             return this;
  93         }
  94     };
  95 
  96     protected SymbolTable fSymbolTable = null;
  97     protected XMLErrorReporter fErrorReporter = null;
  98     int [] whiteSpaceLookup = new int[100];
  99     int whiteSpaceLen = 0;
 100     boolean whiteSpaceInfoNeeded = true;
 101 
 102     /**
 103      * Allow Java encoding names. This feature identifier is:
 104      * http://apache.org/xml/features/allow-java-encodings
 105      */
 106     protected boolean fAllowJavaEncodings;
 107 
 108     //Will be used only during internal subsets.
 109     //for appending data.
 110 
 111     /** Property identifier: symbol table. */
 112     protected static final String SYMBOL_TABLE =
 113             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
 114 
 115     /** Property identifier: error reporter. */
 116     protected static final String ERROR_REPORTER =
 117             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
 118 
 119     /** Feature identifier: allow Java encodings. */
 120     protected static final String ALLOW_JAVA_ENCODINGS =
 121             Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
 122 
 123     protected PropertyManager fPropertyManager = null ;
 124 
 125     boolean isExternal = false;
 126     static {
 127 
 128         for(int i=0x0041;i<=0x005A ; i++){
 129             VALID_NAMES[i]=true;
 130         }
 131         for(int i=0x0061;i<=0x007A; i++){
 132             VALID_NAMES[i]=true;
 133         }
 134         for(int i=0x0030;i<=0x0039; i++){
 135             VALID_NAMES[i]=true;
 136         }
 137         VALID_NAMES[45]=true;
 138         VALID_NAMES[46]=true;
 139         VALID_NAMES[58]=true;
 140         VALID_NAMES[95]=true;
 141     }
 142 
 143     // Remember, that the XML version has explicitly been set,
 144     // so that XMLStreamReader.getVersion() can find that out.
 145     protected boolean xmlVersionSetExplicitly = false;
 146 
 147     //
 148     // Constructors
 149     //
 150 
 151     /** Default constructor. */
 152     public XMLEntityScanner() {
 153     } // <init>()
 154 
 155 
 156     /**  private constructor, this class can only be instantiated within this class. Instance of this class should
 157      *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
 158      *    @see getEntityScanner()
 159      *    @see getEntityScanner(ScannedEntity)
 160      */
 161     public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
 162         fEntityManager = entityManager ;
 163         reset(propertyManager);
 164     } // <init>()
 165 
 166 
 167     // set buffer size:
 168     public final void setBufferSize(int size) {
 169         // REVISIT: Buffer size passed to entity scanner
 170         // was not being kept in synch with the actual size
 171         // of the buffers in each scanned entity. If any
 172         // of the buffers were actually resized, it was possible
 173         // that the parser would throw an ArrayIndexOutOfBoundsException
 174         // for documents which contained names which are longer than
 175         // the current buffer size. Conceivably the buffer size passed
 176         // to entity scanner could be used to determine a minimum size
 177         // for resizing, if doubling its size is smaller than this
 178         // minimum. -- mrglavas
 179         fBufferSize = size;
 180     }
 181 
 182     /**
 183      * Resets the components.
 184      */
 185     public void reset(PropertyManager propertyManager){
 186         fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
 187         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
 188         resetCommon();
 189     }
 190 
 191     /**
 192      * Resets the component. The component can query the component manager
 193      * about any features and properties that affect the operation of the
 194      * component.
 195      *
 196      * @param componentManager The component manager.
 197      *
 198      * @throws SAXException Thrown by component on initialization error.
 199      *                      For example, if a feature or property is
 200      *                      required for the operation of the component, the
 201      *                      component manager may throw a
 202      *                      SAXNotRecognizedException or a
 203      *                      SAXNotSupportedException.
 204      */
 205     public void reset(XMLComponentManager componentManager)
 206     throws XMLConfigurationException {
 207         // xerces features
 208         fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
 209 
 210         //xerces properties
 211         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
 212         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
 213         resetCommon();
 214     } // reset(XMLComponentManager)
 215 
 216 
 217     public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
 218             XMLErrorReporter reporter) {
 219         fCurrentEntity = null;
 220         fSymbolTable = symbolTable;
 221         fEntityManager = entityManager;
 222         fErrorReporter = reporter;
 223         fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
 224         fSecurityManager = fEntityManager.fSecurityManager;
 225     }
 226 
 227     private void resetCommon() {
 228         fCurrentEntity = null;
 229         whiteSpaceLen = 0;
 230         whiteSpaceInfoNeeded = true;
 231         listeners.clear();
 232         fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
 233         fSecurityManager = fEntityManager.fSecurityManager;
 234     }
 235 
 236     /**
 237      * Returns the XML version of the current entity. This will normally be the
 238      * value from the XML or text declaration or defaulted by the parser. Note that
 239      * that this value may be different than the version of the processing rules
 240      * applied to the current entity. For instance, an XML 1.1 document may refer to
 241      * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
 242      * document. Also note that, for a given entity, this value can only be considered
 243      * final once the XML or text declaration has been read or once it has been
 244      * determined that there is no such declaration.
 245      */
 246     public final String getXMLVersion() {
 247         if (fCurrentEntity != null) {
 248             return fCurrentEntity.xmlVersion;
 249         }
 250         return null;
 251     } // getXMLVersion():String
 252 
 253     /**
 254      * Sets the XML version. This method is used by the
 255      * scanners to report the value of the version pseudo-attribute
 256      * in an XML or text declaration.
 257      *
 258      * @param xmlVersion the XML version of the current entity
 259      */
 260     public final void setXMLVersion(String xmlVersion) {
 261         xmlVersionSetExplicitly = true;
 262         fCurrentEntity.xmlVersion = xmlVersion;
 263     } // setXMLVersion(String)
 264 
 265 
 266     /** set the instance of current scanned entity.
 267      *   @param ScannedEntity
 268      */
 269 
 270     public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
 271         fCurrentEntity = scannedEntity ;
 272         if(fCurrentEntity != null){
 273             isExternal = fCurrentEntity.isExternal();
 274             if(DEBUG_BUFFER)
 275                 System.out.println("Current Entity is "+scannedEntity.name);
 276         }
 277     }
 278 
 279     public  Entity.ScannedEntity getCurrentEntity(){
 280         return fCurrentEntity ;
 281     }
 282     //
 283     // XMLEntityReader methods
 284     //
 285 
 286     /**
 287      * Returns the base system identifier of the currently scanned
 288      * entity, or null if none is available.
 289      */
 290     public final String getBaseSystemId() {
 291         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 292     } // getBaseSystemId():String
 293 
 294     /**
 295      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
 296      */
 297     public void setBaseSystemId(String systemId) {
 298         //no-op
 299     }
 300 
 301     ///////////// Locator methods start.
 302     public final int getLineNumber(){
 303         //if the entity is closed, we should return -1
 304         //xxx at first place why such call should be there...
 305         return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
 306     }
 307 
 308     /**
 309      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
 310      */
 311     public void setLineNumber(int line) {
 312         //no-op
 313     }
 314 
 315 
 316     public final int getColumnNumber(){
 317         //if the entity is closed, we should return -1
 318         //xxx at first place why such call should be there...
 319         return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
 320     }
 321 
 322     /**
 323      * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
 324      */
 325     public void setColumnNumber(int col) {
 326         // no-op
 327     }
 328 
 329 
 330     public final int getCharacterOffset(){
 331         return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
 332     }
 333 
 334     /** Returns the expanded system identifier.  */
 335     public final String getExpandedSystemId() {
 336         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
 337     }
 338 
 339     /**
 340      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
 341      */
 342     public void setExpandedSystemId(String systemId) {
 343         //no-op
 344     }
 345 
 346     /** Returns the literal system identifier.  */
 347     public final String getLiteralSystemId() {
 348         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
 349     }
 350 
 351     /**
 352      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
 353      */
 354     public void setLiteralSystemId(String systemId) {
 355         //no-op
 356     }
 357 
 358     /** Returns the public identifier.  */
 359     public final String getPublicId() {
 360         return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
 361     }
 362 
 363     /**
 364      * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
 365      */
 366     public void setPublicId(String publicId) {
 367         //no-op
 368     }
 369 
 370     ///////////////// Locator methods finished.
 371 
 372     /** the version of the current entity being scanned */
 373     public void setVersion(String version){
 374         fCurrentEntity.version = version;
 375     }
 376 
 377     public String getVersion(){
 378         if (fCurrentEntity != null)
 379             return fCurrentEntity.version ;
 380         return null;
 381     }
 382 
 383     /**
 384      * Returns the encoding of the current entity.
 385      * Note that, for a given entity, this value can only be
 386      * considered final once the encoding declaration has been read (or once it
 387      * has been determined that there is no such declaration) since, no encoding
 388      * having been specified on the XMLInputSource, the parser
 389      * will make an initial "guess" which could be in error.
 390      */
 391     public final String getEncoding() {
 392         if (fCurrentEntity != null) {
 393             return fCurrentEntity.encoding;
 394         }
 395         return null;
 396     } // getEncoding():String
 397 
 398     /**
 399      * Sets the encoding of the scanner. This method is used by the
 400      * scanners if the XMLDecl or TextDecl line contains an encoding
 401      * pseudo-attribute.
 402      * <p>
 403      * <strong>Note:</strong> The underlying character reader on the
 404      * current entity will be changed to accomodate the new encoding.
 405      * However, the new encoding is ignored if the current reader was
 406      * not constructed from an input stream (e.g. an external entity
 407      * that is resolved directly to the appropriate java.io.Reader
 408      * object).
 409      *
 410      * @param encoding The IANA encoding name of the new encoding.
 411      *
 412      * @throws IOException Thrown if the new encoding is not supported.
 413      *
 414      * @see com.sun.org.apache.xerces.internal.util.EncodingMap
 415      */
 416     public final void setEncoding(String encoding) throws IOException {
 417 
 418         if (DEBUG_ENCODINGS) {
 419             System.out.println("$$$ setEncoding: "+encoding);
 420         }
 421 
 422         if (fCurrentEntity.stream != null) {
 423             // if the encoding is the same, don't change the reader and
 424             // re-use the original reader used by the OneCharReader
 425             // NOTE: Besides saving an object, this overcomes deficiencies
 426             //       in the UTF-16 reader supplied with the standard Java
 427             //       distribution (up to and including 1.3). The UTF-16
 428             //       decoder buffers 8K blocks even when only asked to read
 429             //       a single char! -Ac
 430             if (fCurrentEntity.encoding == null ||
 431                     !fCurrentEntity.encoding.equals(encoding)) {
 432                 // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
 433                 // and we know the endian-ness, we shouldn't change readers.
 434                 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
 435                 // the endian-ness from the encoding we presently have.
 436                 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
 437                     String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
 438                     if(ENCODING.equals("UTF-16")) return;
 439                     if(ENCODING.equals("ISO-10646-UCS-4")) {
 440                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 441                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
 442                         } else {
 443                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
 444                         }
 445                         return;
 446                     }
 447                     if(ENCODING.equals("ISO-10646-UCS-2")) {
 448                         if(fCurrentEntity.encoding.equals("UTF-16BE")) {
 449                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
 450                         } else {
 451                             fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
 452                         }
 453                         return;
 454                     }
 455                 }
 456                 // wrap a new reader around the input stream, changing
 457                 // the encoding
 458                 if (DEBUG_ENCODINGS) {
 459                     System.out.println("$$$ creating new reader from stream: "+
 460                             fCurrentEntity.stream);
 461                 }
 462                 //fCurrentEntity.stream.reset();
 463                 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
 464                 fCurrentEntity.encoding = encoding;
 465 
 466             } else {
 467                 if (DEBUG_ENCODINGS)
 468                     System.out.println("$$$ reusing old reader on stream");
 469             }
 470         }
 471 
 472     } // setEncoding(String)
 473 
 474     /** Returns true if the current entity being scanned is external. */
 475     public final boolean isExternal() {
 476         return fCurrentEntity.isExternal();
 477     } // isExternal():boolean
 478 
 479     public int getChar(int relative) throws IOException{
 480         if(arrangeCapacity(relative + 1, false)){
 481             return fCurrentEntity.ch[fCurrentEntity.position + relative];
 482         }else{
 483             return -1;
 484         }
 485     }//getChar()
 486 
 487     /**
 488      * Returns the next character on the input.
 489      * <p>
 490      * <strong>Note:</strong> The character is <em>not</em> consumed.
 491      *
 492      * @throws IOException  Thrown if i/o error occurs.
 493      * @throws EOFException Thrown on end of file.
 494      */
 495     public int peekChar() throws IOException {
 496         if (DEBUG_BUFFER) {
 497             System.out.print("(peekChar: ");
 498             print();
 499             System.out.println();
 500         }
 501 
 502         // load more characters, if needed
 503         if (fCurrentEntity.position == fCurrentEntity.count) {
 504             load(0, true, true);
 505         }
 506 
 507         // peek at character
 508         int c = fCurrentEntity.ch[fCurrentEntity.position];
 509 
 510         // return peeked character
 511         if (DEBUG_BUFFER) {
 512             System.out.print(")peekChar: ");
 513             print();
 514             if (isExternal) {
 515                 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
 516             } else {
 517                 System.out.println(" -> '"+(char)c+"'");
 518             }
 519         }
 520         if (isExternal) {
 521             return c != '\r' ? c : '\n';
 522         } else {
 523             return c;
 524         }
 525 
 526     } // peekChar():int
 527 
 528     /**
 529      * Returns the next character on the input.
 530      * <p>
 531      * <strong>Note:</strong> The character is consumed.
 532      *
 533      * @throws IOException  Thrown if i/o error occurs.
 534      * @throws EOFException Thrown on end of file.
 535      */
 536     public int scanChar() throws IOException {
 537         if (DEBUG_BUFFER) {
 538             System.out.print("(scanChar: ");
 539             print();
 540             System.out.println();
 541         }
 542 
 543         // load more characters, if needed
 544         if (fCurrentEntity.position == fCurrentEntity.count) {
 545             load(0, true, true);
 546         }
 547 
 548         // scan character
 549         int c = fCurrentEntity.ch[fCurrentEntity.position++];
 550         if (c == '\n' || (c == '\r' && isExternal)) {
 551             fCurrentEntity.lineNumber++;
 552             fCurrentEntity.columnNumber = 1;
 553             if (fCurrentEntity.position == fCurrentEntity.count) {
 554                 invokeListeners(1);
 555                 fCurrentEntity.ch[0] = (char)c;
 556                 load(1, false, false);
 557             }
 558             if (c == '\r' && isExternal) {
 559                 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
 560                     fCurrentEntity.position--;
 561                 }
 562                 c = '\n';
 563             }
 564         }
 565 
 566         // return character that was scanned
 567         if (DEBUG_BUFFER) {
 568             System.out.print(")scanChar: ");
 569             print();
 570             System.out.println(" -> '"+(char)c+"'");
 571         }
 572         fCurrentEntity.columnNumber++;
 573         return c;
 574 
 575     } // scanChar():int
 576 
 577     /**
 578      * Returns a string matching the NMTOKEN production appearing immediately
 579      * on the input as a symbol, or null if NMTOKEN Name string is present.
 580      * <p>
 581      * <strong>Note:</strong> The NMTOKEN characters are consumed.
 582      * <p>
 583      * <strong>Note:</strong> The string returned must be a symbol. The
 584      * SymbolTable can be used for this purpose.
 585      *
 586      * @throws IOException  Thrown if i/o error occurs.
 587      * @throws EOFException Thrown on end of file.
 588      *
 589      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 590      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 591      */
 592     public String scanNmtoken() throws IOException {
 593         if (DEBUG_BUFFER) {
 594             System.out.print("(scanNmtoken: ");
 595             print();
 596             System.out.println();
 597         }
 598 
 599         // load more characters, if needed
 600         if (fCurrentEntity.position == fCurrentEntity.count) {
 601             load(0, true, true);
 602         }
 603 
 604         // scan nmtoken
 605         int offset = fCurrentEntity.position;
 606         boolean vc = false;
 607         char c;
 608         while (true){
 609             //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
 610             c = fCurrentEntity.ch[fCurrentEntity.position];
 611             if(c < 127){
 612                 vc = VALID_NAMES[c];
 613             }else{
 614                 vc = XMLChar.isName(c);
 615             }
 616             if(!vc)break;
 617 
 618             if (++fCurrentEntity.position == fCurrentEntity.count) {
 619                 int length = fCurrentEntity.position - offset;
 620                 invokeListeners(length);
 621                 if (length == fCurrentEntity.fBufferSize) {
 622                     // bad luck we have to resize our buffer
 623                     char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 624                     System.arraycopy(fCurrentEntity.ch, offset,
 625                             tmp, 0, length);
 626                     fCurrentEntity.ch = tmp;
 627                     fCurrentEntity.fBufferSize *= 2;
 628                 } else {
 629                     System.arraycopy(fCurrentEntity.ch, offset,
 630                             fCurrentEntity.ch, 0, length);
 631                 }
 632                 offset = 0;
 633                 if (load(length, false, false)) {
 634                     break;
 635                 }
 636             }
 637         }
 638         int length = fCurrentEntity.position - offset;
 639         fCurrentEntity.columnNumber += length;
 640 
 641         // return nmtoken
 642         String symbol = null;
 643         if (length > 0) {
 644             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 645         }
 646         if (DEBUG_BUFFER) {
 647             System.out.print(")scanNmtoken: ");
 648             print();
 649             System.out.println(" -> "+String.valueOf(symbol));
 650         }
 651         return symbol;
 652 
 653     } // scanNmtoken():String
 654 
 655     /**
 656      * Returns a string matching the Name production appearing immediately
 657      * on the input as a symbol, or null if no Name string is present.
 658      * <p>
 659      * <strong>Note:</strong> The Name characters are consumed.
 660      * <p>
 661      * <strong>Note:</strong> The string returned must be a symbol. The
 662      * SymbolTable can be used for this purpose.
 663      *
 664      * @throws IOException  Thrown if i/o error occurs.
 665      * @throws EOFException Thrown on end of file.
 666      *
 667      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 668      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 669      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 670      */
 671     public String scanName() throws IOException {
 672         if (DEBUG_BUFFER) {
 673             System.out.print("(scanName: ");
 674             print();
 675             System.out.println();
 676         }
 677 
 678         // load more characters, if needed
 679         if (fCurrentEntity.position == fCurrentEntity.count) {
 680             load(0, true, true);
 681         }
 682 
 683         // scan name
 684         int offset = fCurrentEntity.position;
 685         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 686             if (++fCurrentEntity.position == fCurrentEntity.count) {
 687                 invokeListeners(1);
 688                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 689                 offset = 0;
 690                 if (load(1, false, false)) {
 691                     fCurrentEntity.columnNumber++;
 692                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 693 
 694                     if (DEBUG_BUFFER) {
 695                         System.out.print(")scanName: ");
 696                         print();
 697                         System.out.println(" -> "+String.valueOf(symbol));
 698                     }
 699                     return symbol;
 700                 }
 701             }
 702             boolean vc =false;
 703             while (true ){
 704                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 705                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 706                 if(c < 127){
 707                     vc = VALID_NAMES[c];
 708                 }else{
 709                     vc = XMLChar.isName(c);
 710                 }
 711                 if(!vc)break;
 712                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 713                     int length = fCurrentEntity.position - offset;
 714                     invokeListeners(length);
 715                     if (length == fCurrentEntity.fBufferSize) {
 716                         // bad luck we have to resize our buffer
 717                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 718                         System.arraycopy(fCurrentEntity.ch, offset,
 719                                 tmp, 0, length);
 720                         fCurrentEntity.ch = tmp;
 721                         fCurrentEntity.fBufferSize *= 2;
 722                     } else {
 723                         System.arraycopy(fCurrentEntity.ch, offset,
 724                                 fCurrentEntity.ch, 0, length);
 725                     }
 726                     offset = 0;
 727                     if (load(length, false, false)) {
 728                         break;
 729                     }
 730                 }
 731             }
 732         }
 733         int length = fCurrentEntity.position - offset;
 734         fCurrentEntity.columnNumber += length;
 735 
 736         // return name
 737         String symbol;
 738         if (length > 0) {
 739             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 740         } else
 741             symbol = null;
 742         if (DEBUG_BUFFER) {
 743             System.out.print(")scanName: ");
 744             print();
 745             System.out.println(" -> "+String.valueOf(symbol));
 746         }
 747         return symbol;
 748 
 749     } // scanName():String
 750 
 751     /**
 752      * Scans a qualified name from the input, setting the fields of the
 753      * QName structure appropriately.
 754      * <p>
 755      * <strong>Note:</strong> The qualified name characters are consumed.
 756      * <p>
 757      * <strong>Note:</strong> The strings used to set the values of the
 758      * QName structure must be symbols. The SymbolTable can be used for
 759      * this purpose.
 760      *
 761      * @param qname The qualified name structure to fill.
 762      *
 763      * @return Returns true if a qualified name appeared immediately on
 764      *         the input and was scanned, false otherwise.
 765      *
 766      * @throws IOException  Thrown if i/o error occurs.
 767      * @throws EOFException Thrown on end of file.
 768      *
 769      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 770      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
 771      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
 772      */
 773     public boolean scanQName(QName qname) throws IOException {
 774         if (DEBUG_BUFFER) {
 775             System.out.print("(scanQName, "+qname+": ");
 776             print();
 777             System.out.println();
 778         }
 779 
 780         // load more characters, if needed
 781         if (fCurrentEntity.position == fCurrentEntity.count) {
 782             load(0, true, true);
 783         }
 784 
 785         // scan qualified name
 786         int offset = fCurrentEntity.position;
 787 
 788         //making a check if if the specified character is a valid name start character
 789         //as defined by production [5] in the XML 1.0 specification.
 790         // Name ::= (Letter | '_' | ':') (NameChar)*
 791 
 792         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
 793             if (++fCurrentEntity.position == fCurrentEntity.count) {
 794                 invokeListeners(1);
 795                 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
 796                 offset = 0;
 797 
 798                 if (load(1, false, false)) {
 799                     fCurrentEntity.columnNumber++;
 800                     //adding into symbol table.
 801                     //XXX We are trying to add single character in SymbolTable??????
 802                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 803                     qname.setValues(null, name, name, null);
 804                     if (DEBUG_BUFFER) {
 805                         System.out.print(")scanQName, "+qname+": ");
 806                         print();
 807                         System.out.println(" -> true");
 808                     }
 809                     return true;
 810                 }
 811             }
 812             int index = -1;
 813             boolean vc = false;
 814             while ( true){
 815 
 816                 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
 817                 char c = fCurrentEntity.ch[fCurrentEntity.position];
 818                 if(c < 127){
 819                     vc = VALID_NAMES[c];
 820                 }else{
 821                     vc = XMLChar.isName(c);
 822                 }
 823                 if(!vc)break;
 824                 if (c == ':') {
 825                     if (index != -1) {
 826                         break;
 827                     }
 828                     index = fCurrentEntity.position;
 829                     //check prefix before further read
 830                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset);
 831                 }
 832                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 833                     int length = fCurrentEntity.position - offset;
 834                     //check localpart before loading more data
 835                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length - index - 1);
 836                     invokeListeners(length);
 837                     if (length == fCurrentEntity.fBufferSize) {
 838                         // bad luck we have to resize our buffer
 839                         char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
 840                         System.arraycopy(fCurrentEntity.ch, offset,
 841                                 tmp, 0, length);
 842                         fCurrentEntity.ch = tmp;
 843                         fCurrentEntity.fBufferSize *= 2;
 844                     } else {
 845                         System.arraycopy(fCurrentEntity.ch, offset,
 846                                 fCurrentEntity.ch, 0, length);
 847                     }
 848                     if (index != -1) {
 849                         index = index - offset;
 850                     }
 851                     offset = 0;
 852                     if (load(length, false, false)) {
 853                         break;
 854                     }
 855                 }
 856             }
 857             int length = fCurrentEntity.position - offset;
 858             fCurrentEntity.columnNumber += length;
 859             if (length > 0) {
 860                 String prefix = null;
 861                 String localpart = null;
 862                 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
 863                         offset, length);
 864 
 865                 if (index != -1) {
 866                     int prefixLength = index - offset;
 867                     //check the result: prefix
 868                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength);
 869                     prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
 870                             offset, prefixLength);
 871                     int len = length - prefixLength - 1;
 872                     //check the result: localpart
 873                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len);
 874                     localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
 875                             index + 1, len);
 876 
 877                 } else {
 878                     localpart = rawname;
 879                     //check the result: localpart
 880                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
 881                 }
 882                 qname.setValues(prefix, localpart, rawname, null);
 883                 if (DEBUG_BUFFER) {
 884                     System.out.print(")scanQName, "+qname+": ");
 885                     print();
 886                     System.out.println(" -> true");
 887                 }
 888                 return true;
 889             }
 890         }
 891 
 892         // no qualified name found
 893         if (DEBUG_BUFFER) {
 894             System.out.print(")scanQName, "+qname+": ");
 895             print();
 896             System.out.println(" -> false");
 897         }
 898         return false;
 899 
 900     } // scanQName(QName):boolean
 901 
 902     /**
 903      * Checks whether the value of the specified Limit exceeds its limit
 904      *
 905      * @param limit The Limit to be checked.
 906      * @param entity The current entity.
 907      * @param offset The index of the first byte
 908      * @param length The length of the entity scanned.
 909      */
 910     protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) {
 911         fLimitAnalyzer.addValue(limit, null, length);
 912         if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) {
 913             fSecurityManager.debugPrint(fLimitAnalyzer);
 914             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(),
 915                     new Object[]{new String(entity.ch, offset, length),
 916                 fLimitAnalyzer.getTotalValue(limit),
 917                 fSecurityManager.getLimit(limit),
 918                 fSecurityManager.getStateLiteral(limit)},
 919                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
 920         }
 921     }
 922 
 923     /**
 924      * CHANGED:
 925      * Scans a range of parsed character data, This function appends the character data to
 926      * the supplied buffer.
 927      * <p>
 928      * <strong>Note:</strong> The characters are consumed.
 929      * <p>
 930      * <strong>Note:</strong> This method does not guarantee to return
 931      * the longest run of parsed character data. This method may return
 932      * before markup due to reaching the end of the input buffer or any
 933      * other reason.
 934      * <p>
 935      *
 936      * @param content The content structure to fill.
 937      *
 938      * @return Returns the next character on the input, if known. This
 939      *         value may be -1 but this does <em>note</em> designate
 940      *         end of file.
 941      *
 942      * @throws IOException  Thrown if i/o error occurs.
 943      * @throws EOFException Thrown on end of file.
 944      */
 945     public int scanContent(XMLString content) throws IOException {
 946         if (DEBUG_BUFFER) {
 947             System.out.print("(scanContent: ");
 948             print();
 949             System.out.println();
 950         }
 951 
 952         // load more characters, if needed
 953         if (fCurrentEntity.position == fCurrentEntity.count) {
 954             load(0, true, true);
 955         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 956             invokeListeners(1);
 957             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 958             load(1, false, false);
 959             fCurrentEntity.position = 0;
 960         }
 961 
 962         // normalize newlines
 963         int offset = fCurrentEntity.position;
 964         int c = fCurrentEntity.ch[offset];
 965         int newlines = 0;
 966         if (c == '\n' || (c == '\r' && isExternal)) {
 967             if (DEBUG_BUFFER) {
 968                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 969                 print();
 970                 System.out.println();
 971             }
 972             do {
 973                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 974                 if (c == '\r' && isExternal) {
 975                     newlines++;
 976                     fCurrentEntity.lineNumber++;
 977                     fCurrentEntity.columnNumber = 1;
 978                     if (fCurrentEntity.position == fCurrentEntity.count) {
 979                         offset = 0;
 980                         fCurrentEntity.position = newlines;
 981                         if (load(newlines, false, true)) {
 982                             break;
 983                         }
 984                     }
 985                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 986                         fCurrentEntity.position++;
 987                         offset++;
 988                     }
 989                     /*** NEWLINE NORMALIZATION ***/
 990                     else {
 991                         newlines++;
 992                     }
 993                 } else if (c == '\n') {
 994                     newlines++;
 995                     fCurrentEntity.lineNumber++;
 996                     fCurrentEntity.columnNumber = 1;
 997                     if (fCurrentEntity.position == fCurrentEntity.count) {
 998                         offset = 0;
 999                         fCurrentEntity.position = newlines;
1000                         if (load(newlines, false, true)) {
1001                             break;
1002                         }
1003                     }
1004                 } else {
1005                     fCurrentEntity.position--;
1006                     break;
1007                 }
1008             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1009             for (int i = offset; i < fCurrentEntity.position; i++) {
1010                 fCurrentEntity.ch[i] = '\n';
1011             }
1012             int length = fCurrentEntity.position - offset;
1013             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1014                 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
1015                 //on buffering the data..
1016                 content.setValues(fCurrentEntity.ch, offset, length);
1017                 //content.append(fCurrentEntity.ch, offset, length);
1018                 if (DEBUG_BUFFER) {
1019                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1020                     print();
1021                     System.out.println();
1022                 }
1023                 return -1;
1024             }
1025             if (DEBUG_BUFFER) {
1026                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1027                 print();
1028                 System.out.println();
1029             }
1030         }
1031 
1032         while (fCurrentEntity.position < fCurrentEntity.count) {
1033             c = fCurrentEntity.ch[fCurrentEntity.position++];
1034             if (!XMLChar.isContent(c)) {
1035                 fCurrentEntity.position--;
1036                 break;
1037             }
1038         }
1039         int length = fCurrentEntity.position - offset;
1040         fCurrentEntity.columnNumber += length - newlines;
1041         if (fCurrentEntity.isGE) {
1042             checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length);
1043         }
1044 
1045         //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
1046         //on buffering the data..
1047         content.setValues(fCurrentEntity.ch, offset, length);
1048         //content.append(fCurrentEntity.ch, offset, length);
1049         // return next character
1050         if (fCurrentEntity.position != fCurrentEntity.count) {
1051             c = fCurrentEntity.ch[fCurrentEntity.position];
1052             // REVISIT: Does this need to be updated to fix the
1053             //          #x0D ^#x0A newline normalization problem? -Ac
1054             if (c == '\r' && isExternal) {
1055                 c = '\n';
1056             }
1057         } else {
1058             c = -1;
1059         }
1060         if (DEBUG_BUFFER) {
1061             System.out.print(")scanContent: ");
1062             print();
1063             System.out.println(" -> '"+(char)c+"'");
1064         }
1065         return c;
1066 
1067     } // scanContent(XMLString):int
1068 
1069     /**
1070      * Scans a range of attribute value data, setting the fields of the
1071      * XMLString structure, appropriately.
1072      * <p>
1073      * <strong>Note:</strong> The characters are consumed.
1074      * <p>
1075      * <strong>Note:</strong> This method does not guarantee to return
1076      * the longest run of attribute value data. This method may return
1077      * before the quote character due to reaching the end of the input
1078      * buffer or any other reason.
1079      * <p>
1080      * <strong>Note:</strong> The fields contained in the XMLString
1081      * structure are not guaranteed to remain valid upon subsequent calls
1082      * to the entity scanner. Therefore, the caller is responsible for
1083      * immediately using the returned character data or making a copy of
1084      * the character data.
1085      *
1086      * @param quote   The quote character that signifies the end of the
1087      *                attribute value data.
1088      * @param content The content structure to fill.
1089      *
1090      * @return Returns the next character on the input, if known. This
1091      *         value may be -1 but this does <em>note</em> designate
1092      *         end of file.
1093      *
1094      * @throws IOException  Thrown if i/o error occurs.
1095      * @throws EOFException Thrown on end of file.
1096      */
1097     public int scanLiteral(int quote, XMLString content)
1098     throws IOException {
1099         if (DEBUG_BUFFER) {
1100             System.out.print("(scanLiteral, '"+(char)quote+"': ");
1101             print();
1102             System.out.println();
1103         }
1104         // load more characters, if needed
1105         if (fCurrentEntity.position == fCurrentEntity.count) {
1106             load(0, true, true);
1107         } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1108             invokeListeners(1);
1109             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1110             load(1, false, false);
1111             fCurrentEntity.position = 0;
1112         }
1113 
1114         // normalize newlines
1115         int offset = fCurrentEntity.position;
1116         int c = fCurrentEntity.ch[offset];
1117         int newlines = 0;
1118         if(whiteSpaceInfoNeeded)
1119             whiteSpaceLen=0;
1120         if (c == '\n' || (c == '\r' && isExternal)) {
1121             if (DEBUG_BUFFER) {
1122                 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1123                 print();
1124                 System.out.println();
1125             }
1126             do {
1127                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1128                 if (c == '\r' && isExternal) {
1129                     newlines++;
1130                     fCurrentEntity.lineNumber++;
1131                     fCurrentEntity.columnNumber = 1;
1132                     if (fCurrentEntity.position == fCurrentEntity.count) {
1133                         offset = 0;
1134                         fCurrentEntity.position = newlines;
1135                         if (load(newlines, false, true)) {
1136                             break;
1137                         }
1138                     }
1139                     if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1140                         fCurrentEntity.position++;
1141                         offset++;
1142                     }
1143                     /*** NEWLINE NORMALIZATION ***/
1144                     else {
1145                         newlines++;
1146                     }
1147                     /***/
1148                 } else if (c == '\n') {
1149                     newlines++;
1150                     fCurrentEntity.lineNumber++;
1151                     fCurrentEntity.columnNumber = 1;
1152                     if (fCurrentEntity.position == fCurrentEntity.count) {
1153                         offset = 0;
1154                         fCurrentEntity.position = newlines;
1155                         if (load(newlines, false, true)) {
1156                             break;
1157                         }
1158                     }
1159                     /*** NEWLINE NORMALIZATION ***
1160                      * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
1161                      * && external) {
1162                      * fCurrentEntity.position++;
1163                      * offset++;
1164                      * }
1165                      * /***/
1166                 } else {
1167                     fCurrentEntity.position--;
1168                     break;
1169                 }
1170             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1171             int i=0;
1172             for ( i = offset; i < fCurrentEntity.position; i++) {
1173                 fCurrentEntity.ch[i] = '\n';
1174                 storeWhiteSpace(i);
1175             }
1176 
1177             int length = fCurrentEntity.position - offset;
1178             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1179                 content.setValues(fCurrentEntity.ch, offset, length);
1180                 if (DEBUG_BUFFER) {
1181                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1182                     print();
1183                     System.out.println();
1184                 }
1185                 return -1;
1186             }
1187             if (DEBUG_BUFFER) {
1188                 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1189                 print();
1190                 System.out.println();
1191             }
1192         }
1193 
1194         // scan literal value
1195         for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
1196             c = fCurrentEntity.ch[fCurrentEntity.position];
1197             if ((c == quote &&
1198                     (!fCurrentEntity.literal || isExternal)) ||
1199                     c == '%' || !XMLChar.isContent(c)) {
1200                 break;
1201             }
1202             if (whiteSpaceInfoNeeded && c == '\t') {
1203                 storeWhiteSpace(fCurrentEntity.position);
1204             }
1205         }
1206         int length = fCurrentEntity.position - offset;
1207         fCurrentEntity.columnNumber += length - newlines;
1208         if (fCurrentEntity.isGE) {
1209             checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length);
1210         }
1211         content.setValues(fCurrentEntity.ch, offset, length);
1212 
1213         // return next character
1214         if (fCurrentEntity.position != fCurrentEntity.count) {
1215             c = fCurrentEntity.ch[fCurrentEntity.position];
1216             // NOTE: We don't want to accidentally signal the
1217             //       end of the literal if we're expanding an
1218             //       entity appearing in the literal. -Ac
1219             if (c == quote && fCurrentEntity.literal) {
1220                 c = -1;
1221             }
1222         } else {
1223             c = -1;
1224         }
1225         if (DEBUG_BUFFER) {
1226             System.out.print(")scanLiteral, '"+(char)quote+"': ");
1227             print();
1228             System.out.println(" -> '"+(char)c+"'");
1229         }
1230         return c;
1231 
1232     } // scanLiteral(int,XMLString):int
1233 
1234     /**
1235      * Save whitespace information. Increase the whitespace buffer by 100
1236      * when needed.
1237      *
1238      * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR).
1239      *
1240      * @param whiteSpacePos position of a whitespace in the scanner entity buffer
1241      */
1242     private void storeWhiteSpace(int whiteSpacePos) {
1243         if (whiteSpaceLen >= whiteSpaceLookup.length) {
1244             int [] tmp = new int[whiteSpaceLookup.length + 100];
1245             System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
1246             whiteSpaceLookup = tmp;
1247         }
1248 
1249         whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos;
1250     }
1251 
1252     //CHANGED:
1253     /**
1254      * Scans a range of character data up to the specified delimiter,
1255      * setting the fields of the XMLString structure, appropriately.
1256      * <p>
1257      * <strong>Note:</strong> The characters are consumed.
1258      * <p>
1259      * <strong>Note:</strong> This assumes that the delimiter contains at
1260      * least one character.
1261      * <p>
1262      * <strong>Note:</strong> This method does not guarantee to return
1263      * the longest run of character data. This method may return before
1264      * the delimiter due to reaching the end of the input buffer or any
1265      * other reason.
1266      * <p>
1267      * @param delimiter The string that signifies the end of the character
1268      *                  data to be scanned.
1269      * @param buffer    The XMLStringBuffer to fill.
1270      *
1271      * @return Returns true if there is more data to scan, false otherwise.
1272      *
1273      * @throws IOException  Thrown if i/o error occurs.
1274      * @throws EOFException Thrown on end of file.
1275      */
1276     public boolean scanData(String delimiter, XMLStringBuffer buffer)
1277     throws IOException {
1278 
1279         boolean done = false;
1280         int delimLen = delimiter.length();
1281         char charAt0 = delimiter.charAt(0);
1282         do {
1283             if (DEBUG_BUFFER) {
1284                 System.out.print("(scanData: ");
1285                 print();
1286                 System.out.println();
1287             }
1288 
1289             // load more characters, if needed
1290 
1291             if (fCurrentEntity.position == fCurrentEntity.count) {
1292                 load(0, true, false);
1293             }
1294 
1295             boolean bNextEntity = false;
1296 
1297             while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
1298                 && (!bNextEntity))
1299             {
1300               System.arraycopy(fCurrentEntity.ch,
1301                                fCurrentEntity.position,
1302                                fCurrentEntity.ch,
1303                                0,
1304                                fCurrentEntity.count - fCurrentEntity.position);
1305 
1306               bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false);
1307               fCurrentEntity.position = 0;
1308               fCurrentEntity.startPosition = 0;
1309             }
1310 
1311             if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1312                 // something must be wrong with the input:  e.g., file ends in an unterminated comment
1313                 int length = fCurrentEntity.count - fCurrentEntity.position;
1314                 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1315                 fCurrentEntity.columnNumber += fCurrentEntity.count;
1316                 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1317                 fCurrentEntity.position = fCurrentEntity.count;
1318                 fCurrentEntity.startPosition = fCurrentEntity.count;
1319                 load(0, true, false);
1320                 return false;
1321             }
1322 
1323             // normalize newlines
1324             int offset = fCurrentEntity.position;
1325             int c = fCurrentEntity.ch[offset];
1326             int newlines = 0;
1327             if (c == '\n' || (c == '\r' && isExternal)) {
1328                 if (DEBUG_BUFFER) {
1329                     System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1330                     print();
1331                     System.out.println();
1332                 }
1333                 do {
1334                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1335                     if (c == '\r' && isExternal) {
1336                         newlines++;
1337                         fCurrentEntity.lineNumber++;
1338                         fCurrentEntity.columnNumber = 1;
1339                         if (fCurrentEntity.position == fCurrentEntity.count) {
1340                             offset = 0;
1341                             fCurrentEntity.position = newlines;
1342                             if (load(newlines, false, true)) {
1343                                 break;
1344                             }
1345                         }
1346                         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1347                             fCurrentEntity.position++;
1348                             offset++;
1349                         }
1350                         /*** NEWLINE NORMALIZATION ***/
1351                         else {
1352                             newlines++;
1353                         }
1354                     } else if (c == '\n') {
1355                         newlines++;
1356                         fCurrentEntity.lineNumber++;
1357                         fCurrentEntity.columnNumber = 1;
1358                         if (fCurrentEntity.position == fCurrentEntity.count) {
1359                             offset = 0;
1360                             fCurrentEntity.position = newlines;
1361                             fCurrentEntity.count = newlines;
1362                             if (load(newlines, false, true)) {
1363                                 break;
1364                             }
1365                         }
1366                     } else {
1367                         fCurrentEntity.position--;
1368                         break;
1369                     }
1370                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1371                 for (int i = offset; i < fCurrentEntity.position; i++) {
1372                     fCurrentEntity.ch[i] = '\n';
1373                 }
1374                 int length = fCurrentEntity.position - offset;
1375                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1376                     buffer.append(fCurrentEntity.ch, offset, length);
1377                     if (DEBUG_BUFFER) {
1378                         System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1379                         print();
1380                         System.out.println();
1381                     }
1382                     return true;
1383                 }
1384                 if (DEBUG_BUFFER) {
1385                     System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1386                     print();
1387                     System.out.println();
1388                 }
1389             }
1390 
1391             // iterate over buffer looking for delimiter
1392             OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1393                 c = fCurrentEntity.ch[fCurrentEntity.position++];
1394                 if (c == charAt0) {
1395                     // looks like we just hit the delimiter
1396                     int delimOffset = fCurrentEntity.position - 1;
1397                     for (int i = 1; i < delimLen; i++) {
1398                         if (fCurrentEntity.position == fCurrentEntity.count) {
1399                             fCurrentEntity.position -= i;
1400                             break OUTER;
1401                         }
1402                         c = fCurrentEntity.ch[fCurrentEntity.position++];
1403                         if (delimiter.charAt(i) != c) {
1404                             fCurrentEntity.position -= i;
1405                             break;
1406                         }
1407                     }
1408                     if (fCurrentEntity.position == delimOffset + delimLen) {
1409                         done = true;
1410                         break;
1411                     }
1412                 } else if (c == '\n' || (isExternal && c == '\r')) {
1413                     fCurrentEntity.position--;
1414                     break;
1415                 } else if (XMLChar.isInvalid(c)) {
1416                     fCurrentEntity.position--;
1417                     int length = fCurrentEntity.position - offset;
1418                     fCurrentEntity.columnNumber += length - newlines;
1419                     buffer.append(fCurrentEntity.ch, offset, length);
1420                     return true;
1421                 }
1422             }
1423             int length = fCurrentEntity.position - offset;
1424             fCurrentEntity.columnNumber += length - newlines;
1425             if (done) {
1426                 length -= delimLen;
1427             }
1428             buffer.append(fCurrentEntity.ch, offset, length);
1429 
1430             // return true if string was skipped
1431             if (DEBUG_BUFFER) {
1432                 System.out.print(")scanData: ");
1433                 print();
1434                 System.out.println(" -> " + done);
1435             }
1436         } while (!done);
1437         return !done;
1438 
1439     } // scanData(String, XMLStringBuffer)
1440 
1441     /**
1442      * Skips a character appearing immediately on the input.
1443      * <p>
1444      * <strong>Note:</strong> The character is consumed only if it matches
1445      * the specified character.
1446      *
1447      * @param c The character to skip.
1448      *
1449      * @return Returns true if the character was skipped.
1450      *
1451      * @throws IOException  Thrown if i/o error occurs.
1452      * @throws EOFException Thrown on end of file.
1453      */
1454     public boolean skipChar(int c) throws IOException {
1455         if (DEBUG_BUFFER) {
1456             System.out.print("(skipChar, '"+(char)c+"': ");
1457             print();
1458             System.out.println();
1459         }
1460 
1461         // load more characters, if needed
1462         if (fCurrentEntity.position == fCurrentEntity.count) {
1463             load(0, true, true);
1464         }
1465 
1466         // skip character
1467         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1468         if (cc == c) {
1469             fCurrentEntity.position++;
1470             if (c == '\n') {
1471                 fCurrentEntity.lineNumber++;
1472                 fCurrentEntity.columnNumber = 1;
1473             } else {
1474                 fCurrentEntity.columnNumber++;
1475             }
1476             if (DEBUG_BUFFER) {
1477                 System.out.print(")skipChar, '"+(char)c+"': ");
1478                 print();
1479                 System.out.println(" -> true");
1480             }
1481             return true;
1482         } else if (c == '\n' && cc == '\r' && isExternal) {
1483             // handle newlines
1484             if (fCurrentEntity.position == fCurrentEntity.count) {
1485                 invokeListeners(1);
1486                 fCurrentEntity.ch[0] = (char)cc;
1487                 load(1, false, false);
1488             }
1489             fCurrentEntity.position++;
1490             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1491                 fCurrentEntity.position++;
1492             }
1493             fCurrentEntity.lineNumber++;
1494             fCurrentEntity.columnNumber = 1;
1495             if (DEBUG_BUFFER) {
1496                 System.out.print(")skipChar, '"+(char)c+"': ");
1497                 print();
1498                 System.out.println(" -> true");
1499             }
1500             return true;
1501         }
1502 
1503         // character was not skipped
1504         if (DEBUG_BUFFER) {
1505             System.out.print(")skipChar, '"+(char)c+"': ");
1506             print();
1507             System.out.println(" -> false");
1508         }
1509         return false;
1510 
1511     } // skipChar(int):boolean
1512 
1513     public boolean isSpace(char ch){
1514         return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
1515     }
1516     /**
1517      * Skips space characters appearing immediately on the input.
1518      * <p>
1519      * <strong>Note:</strong> The characters are consumed only if they are
1520      * space characters.
1521      *
1522      * @return Returns true if at least one space character was skipped.
1523      *
1524      * @throws IOException  Thrown if i/o error occurs.
1525      * @throws EOFException Thrown on end of file.
1526      *
1527      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1528      */
1529     public boolean skipSpaces() throws IOException {
1530         if (DEBUG_BUFFER) {
1531             System.out.print("(skipSpaces: ");
1532             print();
1533             System.out.println();
1534         }
1535         //boolean entityChanged = false;
1536         // load more characters, if needed
1537         if (fCurrentEntity.position == fCurrentEntity.count) {
1538             load(0, true, true);
1539         }
1540 
1541         //we are doing this check only in skipSpace() because it is called by
1542         //fMiscDispatcher and we want the parser to exit gracefully when document
1543         //is well-formed.
1544         //it is possible that end of document is reached and
1545         //fCurrentEntity becomes null
1546         //nothing was read so entity changed  'false' should be returned.
1547         if(fCurrentEntity == null){
1548             return false ;
1549         }
1550 
1551         // skip spaces
1552         int c = fCurrentEntity.ch[fCurrentEntity.position];
1553         if (XMLChar.isSpace(c)) {
1554             do {
1555                 boolean entityChanged = false;
1556                 // handle newlines
1557                 if (c == '\n' || (isExternal && c == '\r')) {
1558                     fCurrentEntity.lineNumber++;
1559                     fCurrentEntity.columnNumber = 1;
1560                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1561                         invokeListeners(1);
1562                         fCurrentEntity.ch[0] = (char)c;
1563                         entityChanged = load(1, true, false);
1564                         if (!entityChanged){
1565                             // the load change the position to be 1,
1566                             // need to restore it when entity not changed
1567                             fCurrentEntity.position = 0;
1568                         }else if(fCurrentEntity == null){
1569                             return true ;
1570                         }
1571                     }
1572                     if (c == '\r' && isExternal) {
1573                         // REVISIT: Does this need to be updated to fix the
1574                         //          #x0D ^#x0A newline normalization problem? -Ac
1575                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1576                             fCurrentEntity.position--;
1577                         }
1578                     }
1579                 } else {
1580                     fCurrentEntity.columnNumber++;
1581                 }
1582                 // load more characters, if needed
1583                 if (!entityChanged){
1584                     fCurrentEntity.position++;
1585                 }
1586 
1587                 if (fCurrentEntity.position == fCurrentEntity.count) {
1588                     load(0, true, true);
1589 
1590                     //we are doing this check only in skipSpace() because it is called by
1591                     //fMiscDispatcher and we want the parser to exit gracefully when document
1592                     //is well-formed.
1593 
1594                     //it is possible that end of document is reached and
1595                     //fCurrentEntity becomes null
1596                     //nothing was read so entity changed  'false' should be returned.
1597                     if(fCurrentEntity == null){
1598                         return true ;
1599                     }
1600 
1601                 }
1602             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1603             if (DEBUG_BUFFER) {
1604                 System.out.print(")skipSpaces: ");
1605                 print();
1606                 System.out.println(" -> true");
1607             }
1608             return true;
1609         }
1610 
1611         // no spaces were found
1612         if (DEBUG_BUFFER) {
1613             System.out.print(")skipSpaces: ");
1614             print();
1615             System.out.println(" -> false");
1616         }
1617         return false;
1618 
1619     } // skipSpaces():boolean
1620 
1621 
1622     /**
1623      * @param legnth This function checks that following number of characters are available.
1624      * to the underlying buffer.
1625      * @return This function returns true if capacity asked is available.
1626      */
1627     public boolean arrangeCapacity(int length) throws IOException{
1628         return arrangeCapacity(length, false);
1629     }
1630 
1631     /**
1632      * @param legnth This function checks that following number of characters are available.
1633      * to the underlying buffer.
1634      * @param if the underlying function should change the entity
1635      * @return This function returns true if capacity asked is available.
1636      *
1637      */
1638     public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
1639         //check if the capacity is availble in the current buffer
1640         //count is no. of characters in the buffer   [x][m][l]
1641         //position is '0' based
1642         //System.out.println("fCurrent Entity " + fCurrentEntity);
1643         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1644             return true;
1645         }
1646         if(DEBUG_SKIP_STRING){
1647             System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1648             System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1649             System.out.println("length = " + length);
1650         }
1651         boolean entityChanged = false;
1652         //load more characters -- this function shouldn't change the entity
1653         while((fCurrentEntity.count - fCurrentEntity.position) < length){
1654             if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
1655                 invokeListeners(0);
1656                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
1657                 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
1658                 fCurrentEntity.position = 0;
1659             }
1660 
1661             if((fCurrentEntity.count - fCurrentEntity.position) < length){
1662                 int pos = fCurrentEntity.position;
1663                 invokeListeners(pos);
1664                 entityChanged = load(fCurrentEntity.count, changeEntity, false);
1665                 fCurrentEntity.position = pos;
1666                 if(entityChanged)break;
1667             }
1668             if(DEBUG_SKIP_STRING){
1669                 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1670                 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1671                 System.out.println("length = " + length);
1672             }
1673         }
1674         //load changes the position.. set it back to the point where we started.
1675 
1676         //after loading check again.
1677         if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1678             return true;
1679         } else {
1680             return false;
1681         }
1682     }
1683 
1684     /**
1685      * Skips the specified string appearing immediately on the input.
1686      * <p>
1687      * <strong>Note:</strong> The characters are consumed only if all
1688      * the characters are skipped.
1689      *
1690      * @param s The string to skip.
1691      *
1692      * @return Returns true if the string was skipped.
1693      *
1694      * @throws IOException  Thrown if i/o error occurs.
1695      * @throws EOFException Thrown on end of file.
1696      */
1697     public boolean skipString(String s) throws IOException {
1698 
1699         final int length = s.length();
1700 
1701         //first make sure that required capacity is avaible
1702         if(arrangeCapacity(length, false)){
1703             final int beforeSkip = fCurrentEntity.position ;
1704             int afterSkip = fCurrentEntity.position + length - 1 ;
1705             if(DEBUG_SKIP_STRING){
1706                 System.out.println("skipString,length = " + s + "," + length);
1707                 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip,  length));
1708             }
1709 
1710             //s.charAt() indexes are 0 to 'Length -1' based.
1711             int i = length - 1 ;
1712             //check from reverse
1713             while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
1714                 if(afterSkip-- == beforeSkip){
1715                     fCurrentEntity.position = fCurrentEntity.position + length ;
1716                     fCurrentEntity.columnNumber += length;
1717                     return true;
1718                 }
1719             }
1720         }
1721 
1722         return false;
1723     } // skipString(String):boolean
1724 
1725     public boolean skipString(char [] s) throws IOException {
1726 
1727         final int length = s.length;
1728         //first make sure that required capacity is avaible
1729         if(arrangeCapacity(length, false)){
1730             int beforeSkip = fCurrentEntity.position;
1731 
1732             if(DEBUG_SKIP_STRING){
1733                 System.out.println("skipString,length = " + new String(s) + "," + length);
1734                 System.out.println("skipString,length = " + new String(s) + "," + length);
1735             }
1736 
1737             for(int i=0;i<length;i++){
1738                 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
1739                    return false;
1740                 }
1741             }
1742             fCurrentEntity.position = fCurrentEntity.position + length ;
1743             fCurrentEntity.columnNumber += length;
1744             return true;
1745 
1746         }
1747 
1748         return false;
1749     }
1750 
1751     //
1752     // Locator methods
1753     //
1754     //
1755     // Private methods
1756     //
1757 
1758     /**
1759      * Loads a chunk of text.
1760      *
1761      * @param offset       The offset into the character buffer to
1762      *                     read the next batch of characters.
1763      * @param changeEntity True if the load should change entities
1764      *                     at the end of the entity, otherwise leave
1765      *                     the current entity in place and the entity
1766      *                     boundary will be signaled by the return
1767      *                     value.
1768      * @param notify       Determine whether to notify listeners of
1769      *                     the event
1770      *
1771      * @returns Returns true if the entity changed as a result of this
1772      *          load operation.
1773      */
1774     final boolean load(int offset, boolean changeEntity, boolean notify)
1775     throws IOException {
1776         if (DEBUG_BUFFER) {
1777             System.out.print("(load, "+offset+": ");
1778             print();
1779             System.out.println();
1780         }
1781         if (notify) {
1782             invokeListeners(offset);
1783         }
1784         //maintaing the count till last load
1785         fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
1786         // read characters
1787         int length = fCurrentEntity.ch.length - offset;
1788         if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) {
1789             length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE;
1790         }
1791         if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
1792         int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
1793         if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
1794 
1795         // reset count and position
1796         boolean entityChanged = false;
1797         if (count != -1) {
1798             if (count != 0) {
1799                 // record the last count
1800                 fCurrentEntity.fLastCount = count;
1801                 fCurrentEntity.count = count + offset;
1802                 fCurrentEntity.position = offset;
1803             }
1804         }
1805         // end of this entity
1806         else {
1807             fCurrentEntity.count = offset;
1808             fCurrentEntity.position = offset;
1809             entityChanged = true;
1810 
1811             if (changeEntity) {
1812                 //notify the entity manager about the end of entity
1813                 fEntityManager.endEntity();
1814                 //return if the current entity becomes null
1815                 if(fCurrentEntity == null){
1816                     throw END_OF_DOCUMENT_ENTITY;
1817                 }
1818                 // handle the trailing edges
1819                 if (fCurrentEntity.position == fCurrentEntity.count) {
1820                     load(0, true, false);
1821                 }
1822             }
1823 
1824         }
1825         if (DEBUG_BUFFER) {
1826             System.out.print(")load, "+offset+": ");
1827             print();
1828             System.out.println();
1829         }
1830 
1831         return entityChanged;
1832 
1833     } // load(int, boolean):boolean
1834 
1835     /**
1836      * Creates a reader capable of reading the given input stream in
1837      * the specified encoding.
1838      *
1839      * @param inputStream  The input stream.
1840      * @param encoding     The encoding name that the input stream is
1841      *                     encoded using. If the user has specified that
1842      *                     Java encoding names are allowed, then the
1843      *                     encoding name may be a Java encoding name;
1844      *                     otherwise, it is an ianaEncoding name.
1845      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
1846      *                      specify a byte order, this tells whether the order is bigEndian.  null menas
1847      *                      unknown or not relevant.
1848      *
1849      * @return Returns a reader.
1850      */
1851     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
1852     throws IOException {
1853 
1854         // normalize encoding name
1855         if (encoding == null) {
1856             encoding = "UTF-8";
1857         }
1858 
1859         // try to use an optimized reader
1860         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
1861         if (ENCODING.equals("UTF-8")) {
1862             if (DEBUG_ENCODINGS) {
1863                 System.out.println("$$$ creating UTF8Reader");
1864             }
1865             return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
1866         }
1867         if (ENCODING.equals("US-ASCII")) {
1868             if (DEBUG_ENCODINGS) {
1869                 System.out.println("$$$ creating ASCIIReader");
1870             }
1871             return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1872         }
1873         if(ENCODING.equals("ISO-10646-UCS-4")) {
1874             if(isBigEndian != null) {
1875                 boolean isBE = isBigEndian.booleanValue();
1876                 if(isBE) {
1877                     return new UCSReader(inputStream, UCSReader.UCS4BE);
1878                 } else {
1879                     return new UCSReader(inputStream, UCSReader.UCS4LE);
1880                 }
1881             } else {
1882                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1883                         "EncodingByteOrderUnsupported",
1884                         new Object[] { encoding },
1885                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1886             }
1887         }
1888         if(ENCODING.equals("ISO-10646-UCS-2")) {
1889             if(isBigEndian != null) { // sould never happen with this encoding...
1890                 boolean isBE = isBigEndian.booleanValue();
1891                 if(isBE) {
1892                     return new UCSReader(inputStream, UCSReader.UCS2BE);
1893                 } else {
1894                     return new UCSReader(inputStream, UCSReader.UCS2LE);
1895                 }
1896             } else {
1897                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1898                         "EncodingByteOrderUnsupported",
1899                         new Object[] { encoding },
1900                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1901             }
1902         }
1903 
1904         // check for valid name
1905         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
1906         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
1907         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
1908             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1909                     "EncodingDeclInvalid",
1910                     new Object[] { encoding },
1911                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
1912                     // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
1913                     //       because every byte is a valid ISO Latin 1 character.
1914                     //       It may not translate correctly but if we failed on
1915                     //       the encoding anyway, then we're expecting the content
1916                     //       of the document to be bad. This will just prevent an
1917                     //       invalid UTF-8 sequence to be detected. This is only
1918                     //       important when continue-after-fatal-error is turned
1919                     //       on. -Ac
1920                     encoding = "ISO-8859-1";
1921         }
1922 
1923         // try to use a Java reader
1924         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
1925         if (javaEncoding == null) {
1926             if(fAllowJavaEncodings) {
1927                 javaEncoding = encoding;
1928             } else {
1929                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1930                         "EncodingDeclInvalid",
1931                         new Object[] { encoding },
1932                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
1933                         // see comment above.
1934                         javaEncoding = "ISO8859_1";
1935             }
1936         }
1937         else if (javaEncoding.equals("ASCII")) {
1938             if (DEBUG_ENCODINGS) {
1939                 System.out.println("$$$ creating ASCIIReader");
1940             }
1941             return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1942         }
1943 
1944         if (DEBUG_ENCODINGS) {
1945             System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
1946             if (javaEncoding == encoding) {
1947                 System.out.print(" (IANA encoding)");
1948             }
1949             System.out.println();
1950         }
1951         return new InputStreamReader(inputStream, javaEncoding);
1952 
1953     } // createReader(InputStream,String, Boolean): Reader
1954 
1955     /**
1956      * Returns the IANA encoding name that is auto-detected from
1957      * the bytes specified, with the endian-ness of that encoding where appropriate.
1958      *
1959      * @param b4    The first four bytes of the input.
1960      * @param count The number of bytes actually read.
1961      * @return a 2-element array:  the first element, an IANA-encoding string,
1962      *  the second element a Boolean which is true iff the document is big endian, false
1963      *  if it's little-endian, and null if the distinction isn't relevant.
1964      */
1965     protected Object[] getEncodingName(byte[] b4, int count) {
1966 
1967         if (count < 2) {
1968             return new Object[]{"UTF-8", null};
1969         }
1970 
1971         // UTF-16, with BOM
1972         int b0 = b4[0] & 0xFF;
1973         int b1 = b4[1] & 0xFF;
1974         if (b0 == 0xFE && b1 == 0xFF) {
1975             // UTF-16, big-endian
1976             return new Object [] {"UTF-16BE", new Boolean(true)};
1977         }
1978         if (b0 == 0xFF && b1 == 0xFE) {
1979             // UTF-16, little-endian
1980             return new Object [] {"UTF-16LE", new Boolean(false)};
1981         }
1982 
1983         // default to UTF-8 if we don't have enough bytes to make a
1984         // good determination of the encoding
1985         if (count < 3) {
1986             return new Object [] {"UTF-8", null};
1987         }
1988 
1989         // UTF-8 with a BOM
1990         int b2 = b4[2] & 0xFF;
1991         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1992             return new Object [] {"UTF-8", null};
1993         }
1994 
1995         // default to UTF-8 if we don't have enough bytes to make a
1996         // good determination of the encoding
1997         if (count < 4) {
1998             return new Object [] {"UTF-8", null};
1999         }
2000 
2001         // other encodings
2002         int b3 = b4[3] & 0xFF;
2003         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
2004             // UCS-4, big endian (1234)
2005             return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
2006         }
2007         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
2008             // UCS-4, little endian (4321)
2009             return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
2010         }
2011         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
2012             // UCS-4, unusual octet order (2143)
2013             // REVISIT: What should this be?
2014             return new Object [] {"ISO-10646-UCS-4", null};
2015         }
2016         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
2017             // UCS-4, unusual octect order (3412)
2018             // REVISIT: What should this be?
2019             return new Object [] {"ISO-10646-UCS-4", null};
2020         }
2021         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2022             // UTF-16, big-endian, no BOM
2023             // (or could turn out to be UCS-2...
2024             // REVISIT: What should this be?
2025             return new Object [] {"UTF-16BE", new Boolean(true)};
2026         }
2027         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2028             // UTF-16, little-endian, no BOM
2029             // (or could turn out to be UCS-2...
2030             return new Object [] {"UTF-16LE", new Boolean(false)};
2031         }
2032         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
2033             // EBCDIC
2034             // a la xerces1, return CP037 instead of EBCDIC here
2035             return new Object [] {"CP037", null};
2036         }
2037 
2038         // default encoding
2039         return new Object [] {"UTF-8", null};
2040 
2041     } // getEncodingName(byte[],int):Object[]
2042 
2043     /**
2044      * xxx not removing endEntity() so that i remember that we need to implement it.
2045      * Ends an entity.
2046      *
2047      * @throws XNIException Thrown by entity handler to signal an error.
2048      */
2049     //
2050     /** Prints the contents of the buffer. */
2051     final void print() {
2052         if (DEBUG_BUFFER) {
2053             if (fCurrentEntity != null) {
2054                 System.out.print('[');
2055                 System.out.print(fCurrentEntity.count);
2056                 System.out.print(' ');
2057                 System.out.print(fCurrentEntity.position);
2058                 if (fCurrentEntity.count > 0) {
2059                     System.out.print(" \"");
2060                     for (int i = 0; i < fCurrentEntity.count; i++) {
2061                         if (i == fCurrentEntity.position) {
2062                             System.out.print('^');
2063                         }
2064                         char c = fCurrentEntity.ch[i];
2065                         switch (c) {
2066                             case '\n': {
2067                                 System.out.print("\\n");
2068                                 break;
2069                             }
2070                             case '\r': {
2071                                 System.out.print("\\r");
2072                                 break;
2073                             }
2074                             case '\t': {
2075                                 System.out.print("\\t");
2076                                 break;
2077                             }
2078                             case '\\': {
2079                                 System.out.print("\\\\");
2080                                 break;
2081                             }
2082                             default: {
2083                                 System.out.print(c);
2084                             }
2085                         }
2086                     }
2087                     if (fCurrentEntity.position == fCurrentEntity.count) {
2088                         System.out.print('^');
2089                     }
2090                     System.out.print('"');
2091                 }
2092                 System.out.print(']');
2093                 System.out.print(" @ ");
2094                 System.out.print(fCurrentEntity.lineNumber);
2095                 System.out.print(',');
2096                 System.out.print(fCurrentEntity.columnNumber);
2097             } else {
2098                 System.out.print("*NO CURRENT ENTITY*");
2099             }
2100         }
2101     }
2102 
2103     /**
2104      * Registers the listener object and provides callback.
2105      * @param listener listener to which call back should be provided when scanner buffer
2106      * is being changed.
2107      */
2108     public void registerListener(XMLBufferListener listener) {
2109         if (!listeners.contains(listener)) {
2110             listeners.add(listener);
2111         }
2112     }
2113 
2114     /**
2115      *
2116      * @param loadPos Starting position from which new data is being loaded into scanner buffer.
2117      */
2118     public void invokeListeners(int loadPos){
2119         for (int i=0; i<listeners.size(); i++) {
2120             listeners.get(i).refresh(loadPos);
2121         }
2122     }
2123 
2124     /**
2125      * Skips space characters appearing immediately on the input that would
2126      * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
2127      * normalization is performed. This is useful when scanning structures
2128      * such as the XMLDecl and TextDecl that can only contain US-ASCII
2129      * characters.
2130      * <p>
2131      * <strong>Note:</strong> The characters are consumed only if they would
2132      * match non-terminal S before end of line normalization is performed.
2133      *
2134      * @return Returns true if at least one space character was skipped.
2135      *
2136      * @throws IOException  Thrown if i/o error occurs.
2137      * @throws EOFException Thrown on end of file.
2138      *
2139      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
2140      */
2141     public final boolean skipDeclSpaces() throws IOException {
2142         if (DEBUG_BUFFER) {
2143             System.out.print("(skipDeclSpaces: ");
2144             //XMLEntityManager.print(fCurrentEntity);
2145             System.out.println();
2146         }
2147 
2148         // load more characters, if needed
2149         if (fCurrentEntity.position == fCurrentEntity.count) {
2150             load(0, true, false);
2151         }
2152 
2153         // skip spaces
2154         int c = fCurrentEntity.ch[fCurrentEntity.position];
2155         if (XMLChar.isSpace(c)) {
2156             boolean external = fCurrentEntity.isExternal();
2157             do {
2158                 boolean entityChanged = false;
2159                 // handle newlines
2160                 if (c == '\n' || (external && c == '\r')) {
2161                     fCurrentEntity.lineNumber++;
2162                     fCurrentEntity.columnNumber = 1;
2163                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
2164                         fCurrentEntity.ch[0] = (char)c;
2165                         entityChanged = load(1, true, false);
2166                         if (!entityChanged)
2167                             // the load change the position to be 1,
2168                             // need to restore it when entity not changed
2169                             fCurrentEntity.position = 0;
2170                     }
2171                     if (c == '\r' && external) {
2172                         // REVISIT: Does this need to be updated to fix the
2173                         //          #x0D ^#x0A newline normalization problem? -Ac
2174                         if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
2175                             fCurrentEntity.position--;
2176                         }
2177                     }
2178                     /*** NEWLINE NORMALIZATION ***
2179                      * else {
2180                      * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
2181                      * && external) {
2182                      * fCurrentEntity.position++;
2183                      * }
2184                      * }
2185                      * /***/
2186                 } else {
2187                     fCurrentEntity.columnNumber++;
2188                 }
2189                 // load more characters, if needed
2190                 if (!entityChanged)
2191                     fCurrentEntity.position++;
2192                 if (fCurrentEntity.position == fCurrentEntity.count) {
2193                     load(0, true, false);
2194                 }
2195             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
2196             if (DEBUG_BUFFER) {
2197                 System.out.print(")skipDeclSpaces: ");
2198                 //  XMLEntityManager.print(fCurrentEntity);
2199                 System.out.println(" -> true");
2200             }
2201             return true;
2202         }
2203 
2204         // no spaces were found
2205         if (DEBUG_BUFFER) {
2206             System.out.print(")skipDeclSpaces: ");
2207             //XMLEntityManager.print(fCurrentEntity);
2208             System.out.println(" -> false");
2209         }
2210         return false;
2211 
2212     } // skipDeclSpaces():boolean
2213 
2214 
2215 } // class XMLEntityScanner