1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *     http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 package com.sun.org.apache.xerces.internal.impl;
  23 
  24 import com.sun.org.apache.xerces.internal.util.Status;
  25 import com.sun.xml.internal.stream.XMLEntityStorage;
  26 import java.io.IOException;
  27 import java.util.ArrayList;
  28 import javax.xml.stream.events.XMLEvent;
  29 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  30 import com.sun.org.apache.xerces.internal.util.SymbolTable;
  31 import com.sun.org.apache.xerces.internal.util.XMLChar;
  32 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl;
  33 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  34 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer;
  35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
  36 import com.sun.org.apache.xerces.internal.xni.Augmentations;
  37 import com.sun.org.apache.xerces.internal.xni.XMLAttributes;
  38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
  39 import com.sun.org.apache.xerces.internal.xni.XMLString;
  40 import com.sun.org.apache.xerces.internal.xni.XNIException;
  41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent;
  42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
  43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
  44 import com.sun.xml.internal.stream.Entity;
  45 
  46 //import com.sun.xml.stream.XMLEntityManager;
  47 //import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter;
  48 
  49 /**
  50  * This class is responsible for holding scanning methods common to
  51  * scanning the XML document structure and content as well as the DTD
  52  * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
  53  * from this base class.
  54  *
  55  * <p>
  56  * This component requires the following features and properties from the
  57  * component manager that uses it:
  58  * <ul>
  59  *  <li>http://xml.org/sax/features/validation</li>
  60  *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
  61  *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
  62  *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
  63  *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
  64  * </ul>
  65  *
  66  * @author Andy Clark, IBM
  67  * @author Arnaud  Le Hors, IBM
  68  * @author Eric Ye, IBM
  69  * @author K.Venugopal SUN Microsystems
  70  * @author Sunitha Reddy, SUN Microsystems
  71  * @LastModified: Feb 2020
  72  */
  73 public abstract class XMLScanner
  74         implements XMLComponent {
  75 
  76     //
  77     // Constants
  78     //
  79 
  80     // feature identifiers
  81 
  82     /** Feature identifier: namespaces. */
  83     protected static final String NAMESPACES =
  84             Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
  85 
  86     /** Feature identifier: validation. */
  87     protected static final String VALIDATION =
  88             Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
  89 
  90     /** Feature identifier: notify character references. */
  91     protected static final String NOTIFY_CHAR_REFS =
  92             Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
  93 
  94     // property identifiers
  95 
  96     protected static final String PARSER_SETTINGS =
  97                                 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
  98     /** Property identifier: symbol table. */
  99     protected static final String SYMBOL_TABLE =
 100             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
 101 
 102     /** Property identifier: error reporter. */
 103     protected static final String ERROR_REPORTER =
 104             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
 105 
 106     /** Property identifier: entity manager. */
 107     protected static final String ENTITY_MANAGER =
 108             Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;
 109 
 110     /** Property identifier: Security manager. */
 111     private static final String SECURITY_MANAGER = Constants.SECURITY_MANAGER;
 112 
 113     // debugging
 114 
 115     /** Debug attribute normalization. */
 116     protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
 117 
 118     /**
 119      * Type of names
 120      */
 121     public static enum NameType {
 122         ATTRIBUTE("attribute"),
 123         ATTRIBUTENAME("attribute name"),
 124         COMMENT("comment"),
 125         DOCTYPE("doctype"),
 126         ELEMENTSTART("startelement"),
 127         ELEMENTEND("endelement"),
 128         ENTITY("entity"),
 129         NOTATION("notation"),
 130         PI("pi"),
 131         REFERENCE("reference");
 132 
 133         final String literal;
 134         NameType(String literal) {
 135             this.literal = literal;
 136         }
 137 
 138         String literal() {
 139             return literal;
 140         }
 141     }
 142 
 143     //xxx: setting the default value as false, as we dont need to calculate this value
 144     //we should have a feature when set to true computes this value
 145     private boolean fNeedNonNormalizedValue = false;
 146 
 147     protected ArrayList<XMLString> attributeValueCache = new ArrayList<>();
 148     protected ArrayList<XMLStringBuffer> stringBufferCache = new ArrayList<>();
 149     protected int fStringBufferIndex = 0;
 150     protected boolean fAttributeCacheInitDone = false;
 151     protected int fAttributeCacheUsedCount = 0;
 152 
 153     //
 154     // Data
 155     //
 156 
 157     // features
 158 
 159     /**
 160      * Validation. This feature identifier is:
 161      * http://xml.org/sax/features/validation
 162      */
 163     protected boolean fValidation = false;
 164 
 165     /** Namespaces. */
 166     protected boolean fNamespaces;
 167 
 168     /** Character references notification. */
 169     protected boolean fNotifyCharRefs = false;
 170 
 171     /** Internal parser-settings feature */
 172     protected boolean fParserSettings = true;
 173 
 174     // properties
 175 
 176     protected PropertyManager fPropertyManager = null ;
 177     /** Symbol table. */
 178     protected SymbolTable fSymbolTable;
 179 
 180     /** Error reporter. */
 181     protected XMLErrorReporter fErrorReporter;
 182 
 183     /** Entity manager. */
 184     //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager();
 185     protected XMLEntityManager fEntityManager = null ;
 186 
 187     /** xxx this should be available from EntityManager Entity storage */
 188     protected XMLEntityStorage fEntityStore = null ;
 189 
 190     /** Security manager. */
 191     protected XMLSecurityManager fSecurityManager = null;
 192 
 193     /** Limit analyzer. */
 194     protected XMLLimitAnalyzer fLimitAnalyzer = null;
 195 
 196     // protected data
 197 
 198     /** event type */
 199     protected XMLEvent fEvent ;
 200 
 201     /** Entity scanner, this always works on last entity that was opened. */
 202     protected XMLEntityScanner fEntityScanner = null;
 203 
 204     /** Entity depth. */
 205     protected int fEntityDepth;
 206 
 207     /** Literal value of the last character reference scanned. */
 208     protected String fCharRefLiteral = null;
 209 
 210     /** Scanning attribute. */
 211     protected boolean fScanningAttribute;
 212 
 213     /** Report entity boundary. */
 214     protected boolean fReportEntity;
 215 
 216     // symbols
 217 
 218     /** Symbol: "version". */
 219     protected final static String fVersionSymbol = "version".intern();
 220 
 221     /** Symbol: "encoding". */
 222     protected final static String fEncodingSymbol = "encoding".intern();
 223 
 224     /** Symbol: "standalone". */
 225     protected final static String fStandaloneSymbol = "standalone".intern();
 226 
 227     /** Symbol: "amp". */
 228     protected final static String fAmpSymbol = "amp".intern();
 229 
 230     /** Symbol: "lt". */
 231     protected final static String fLtSymbol = "lt".intern();
 232 
 233     /** Symbol: "gt". */
 234     protected final static String fGtSymbol = "gt".intern();
 235 
 236     /** Symbol: "quot". */
 237     protected final static String fQuotSymbol = "quot".intern();
 238 
 239     /** Symbol: "apos". */
 240     protected final static String fAposSymbol = "apos".intern();
 241 
 242     // temporary variables
 243 
 244     // NOTE: These objects are private to help prevent accidental modification
 245     //       of values by a subclass. If there were protected *and* the sub-
 246     //       modified the values, it would be difficult to track down the real
 247     //       cause of the bug. By making these private, we avoid this
 248     //       possibility.
 249 
 250     /** String. */
 251     private XMLString fString = new XMLString();
 252 
 253     /** String buffer. */
 254     private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
 255 
 256     /** String buffer. */
 257     private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
 258 
 259     /** String buffer. */
 260     private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
 261 
 262     // temporary location for Resource identification information.
 263     protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
 264     int initialCacheCount = 6;
 265     //
 266     // XMLComponent methods
 267     //
 268 
 269     /**
 270      *
 271      *
 272      * @param componentManager The component manager.
 273      *
 274      * @throws SAXException Throws exception if required features and
 275      *                      properties cannot be found.
 276      */
 277     public void reset(XMLComponentManager componentManager)
 278     throws XMLConfigurationException {
 279 
 280                 fParserSettings = componentManager.getFeature(PARSER_SETTINGS, true);
 281 
 282                 if (!fParserSettings) {
 283                         // parser settings have not been changed
 284                         init();
 285                         return;
 286                 }
 287 
 288 
 289         // Xerces properties
 290         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
 291         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
 292         fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
 293         fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER);
 294 
 295         //this step is extra because we have separated the storage of entity
 296         fEntityStore = fEntityManager.getEntityStore() ;
 297 
 298         // sax features
 299         fValidation = componentManager.getFeature(VALIDATION, false);
 300         fNamespaces = componentManager.getFeature(NAMESPACES, true);
 301         fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS, false);
 302 
 303         init();
 304     } // reset(XMLComponentManager)
 305 
 306     protected void setPropertyManager(PropertyManager propertyManager){
 307         fPropertyManager = propertyManager ;
 308     }
 309 
 310     /**
 311      * Sets the value of a property during parsing.
 312      *
 313      * @param propertyId
 314      * @param value
 315      */
 316     public void setProperty(String propertyId, Object value)
 317     throws XMLConfigurationException {
 318 
 319         // Xerces properties
 320         if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
 321             String property =
 322                     propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length());
 323             if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) {
 324                 fSymbolTable = (SymbolTable)value;
 325             } else if (property.equals(Constants.ERROR_REPORTER_PROPERTY)) {
 326                 fErrorReporter = (XMLErrorReporter)value;
 327             } else if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) {
 328                 fEntityManager = (XMLEntityManager)value;
 329             }
 330         }
 331 
 332         if (propertyId.equals(SECURITY_MANAGER)) {
 333             fSecurityManager = (XMLSecurityManager)value;
 334         }
 335                 /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){
 336             fStaxProperties = (HashMap)value;
 337             //TODO::discuss with neeraj what are his thoughts on passing properties.
 338             //For now use this
 339         }*/
 340 
 341     } // setProperty(String,Object)
 342 
 343     /*
 344      * Sets the feature of the scanner.
 345      */
 346     public void setFeature(String featureId, boolean value)
 347     throws XMLConfigurationException {
 348 
 349         if (VALIDATION.equals(featureId)) {
 350             fValidation = value;
 351         } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
 352             fNotifyCharRefs = value;
 353         }
 354     }
 355 
 356     /*
 357      * Gets the state of the feature of the scanner.
 358      */
 359     public boolean getFeature(String featureId)
 360     throws XMLConfigurationException {
 361 
 362         if (VALIDATION.equals(featureId)) {
 363             return fValidation;
 364         } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
 365             return fNotifyCharRefs;
 366         }
 367         throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId);
 368     }
 369 
 370     //
 371     // Protected methods
 372     //
 373 
 374     // anybody calling this had better have set Symtoltable!
 375     protected void reset() {
 376         init();
 377 
 378         // DTD preparsing defaults:
 379         fValidation = true;
 380         fNotifyCharRefs = false;
 381 
 382     }
 383 
 384     public void reset(PropertyManager propertyManager) {
 385         init();
 386         // Xerces properties
 387         fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY);
 388 
 389         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY);
 390 
 391         fEntityManager = (XMLEntityManager)propertyManager.getProperty(ENTITY_MANAGER);
 392         fEntityStore = fEntityManager.getEntityStore() ;
 393         fEntityScanner = (XMLEntityScanner)fEntityManager.getEntityScanner() ;
 394         fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER);
 395 
 396         //fEntityManager.reset();
 397         // DTD preparsing defaults:
 398         fValidation = false;
 399         fNotifyCharRefs = false;
 400 
 401     }
 402     // common scanning methods
 403 
 404     /**
 405      * Scans an XML or text declaration.
 406      * <p>
 407      * <pre>
 408      * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
 409      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
 410      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
 411      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
 412      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
 413      *                 | ('"' ('yes' | 'no') '"'))
 414      *
 415      * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
 416      * </pre>
 417      *
 418      * @param scanningTextDecl True if a text declaration is to
 419      *                         be scanned instead of an XML
 420      *                         declaration.
 421      * @param pseudoAttributeValues An array of size 3 to return the version,
 422      *                         encoding and standalone pseudo attribute values
 423      *                         (in that order).
 424      *
 425      * <strong>Note:</strong> This method uses fString, anything in it
 426      * at the time of calling is lost.
 427      */
 428     protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
 429             String[] pseudoAttributeValues)
 430             throws IOException, XNIException {
 431 
 432         // pseudo-attribute values
 433         String version = null;
 434         String encoding = null;
 435         String standalone = null;
 436 
 437         // scan pseudo-attributes
 438         final int STATE_VERSION = 0;
 439         final int STATE_ENCODING = 1;
 440         final int STATE_STANDALONE = 2;
 441         final int STATE_DONE = 3;
 442         int state = STATE_VERSION;
 443 
 444         boolean dataFoundForTarget = false;
 445         boolean sawSpace = fEntityScanner.skipSpaces();
 446         // since pseudoattributes are *not* attributes,
 447         // their quotes don't need to be preserved in external parameter entities.
 448         // the XMLEntityScanner#scanLiteral method will continue to
 449         // emit -1 in such cases when it finds a quote; this is
 450         // fine for other methods that parse scanned entities,
 451         // but not for the scanning of pseudoattributes.  So,
 452         // temporarily, we must mark the current entity as not being "literal"
 453         Entity.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
 454         boolean currLiteral = currEnt.literal;
 455         currEnt.literal = false;
 456         while (fEntityScanner.peekChar() != '?') {
 457             dataFoundForTarget = true;
 458             String name = scanPseudoAttribute(scanningTextDecl, fString);
 459             switch (state) {
 460                 case STATE_VERSION: {
 461                     if (name.equals(fVersionSymbol)) {
 462                         if (!sawSpace) {
 463                             reportFatalError(scanningTextDecl
 464                                     ? "SpaceRequiredBeforeVersionInTextDecl"
 465                                     : "SpaceRequiredBeforeVersionInXMLDecl",
 466                                     null);
 467                         }
 468                         version = fString.toString();
 469                         state = STATE_ENCODING;
 470                         if (!versionSupported(version)) {
 471                             reportFatalError("VersionNotSupported",
 472                                     new Object[]{version});
 473                         }
 474 
 475                         if (version.equals("1.1")) {
 476                             Entity.ScannedEntity top = fEntityManager.getTopLevelEntity();
 477                             if (top != null && (top.version == null || top.version.equals("1.0"))) {
 478                                 reportFatalError("VersionMismatch", null);
 479                             }
 480                             fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1);
 481                         }
 482 
 483                     } else if (name.equals(fEncodingSymbol)) {
 484                         if (!scanningTextDecl) {
 485                             reportFatalError("VersionInfoRequired", null);
 486                         }
 487                         if (!sawSpace) {
 488                             reportFatalError(scanningTextDecl
 489                                     ? "SpaceRequiredBeforeEncodingInTextDecl"
 490                                     : "SpaceRequiredBeforeEncodingInXMLDecl",
 491                                     null);
 492                         }
 493                         encoding = fString.toString();
 494                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
 495                     } else {
 496                         if (scanningTextDecl) {
 497                             reportFatalError("EncodingDeclRequired", null);
 498                         } else {
 499                             reportFatalError("VersionInfoRequired", null);
 500                         }
 501                     }
 502                     break;
 503                 }
 504                 case STATE_ENCODING: {
 505                     if (name.equals(fEncodingSymbol)) {
 506                         if (!sawSpace) {
 507                             reportFatalError(scanningTextDecl
 508                                     ? "SpaceRequiredBeforeEncodingInTextDecl"
 509                                     : "SpaceRequiredBeforeEncodingInXMLDecl",
 510                                     null);
 511                         }
 512                         encoding = fString.toString();
 513                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
 514                         // TODO: check encoding name; set encoding on
 515                         //       entity scanner
 516                     } else if (!scanningTextDecl && name.equals(fStandaloneSymbol)) {
 517                         if (!sawSpace) {
 518                             reportFatalError("SpaceRequiredBeforeStandalone",
 519                                     null);
 520                         }
 521                         standalone = fString.toString();
 522                         state = STATE_DONE;
 523                         if (!standalone.equals("yes") && !standalone.equals("no")) {
 524                             reportFatalError("SDDeclInvalid", new Object[] {standalone});
 525                         }
 526                     } else {
 527                         reportFatalError("EncodingDeclRequired", null);
 528                     }
 529                     break;
 530                 }
 531                 case STATE_STANDALONE: {
 532                     if (name.equals(fStandaloneSymbol)) {
 533                         if (!sawSpace) {
 534                             reportFatalError("SpaceRequiredBeforeStandalone",
 535                                     null);
 536                         }
 537                         standalone = fString.toString();
 538                         state = STATE_DONE;
 539                         if (!standalone.equals("yes") && !standalone.equals("no")) {
 540                             reportFatalError("SDDeclInvalid",  new Object[] {standalone});
 541                         }
 542                     } else {
 543                         reportFatalError("SDDeclNameInvalid", null);
 544                     }
 545                     break;
 546                 }
 547                 default: {
 548                     reportFatalError("NoMorePseudoAttributes", null);
 549                 }
 550             }
 551             sawSpace = fEntityScanner.skipSpaces();
 552         }
 553         // restore original literal value
 554         if(currLiteral) {
 555             currEnt.literal = true;
 556         }
 557         // REVISIT: should we remove this error reporting?
 558         if (scanningTextDecl && state != STATE_DONE) {
 559             reportFatalError("MorePseudoAttributes", null);
 560         }
 561 
 562         // If there is no data in the xml or text decl then we fail to report error
 563         // for version or encoding info above.
 564         if (scanningTextDecl) {
 565             if (!dataFoundForTarget && encoding == null) {
 566                 reportFatalError("EncodingDeclRequired", null);
 567             }
 568         } else {
 569             if (!dataFoundForTarget && version == null) {
 570                 reportFatalError("VersionInfoRequired", null);
 571             }
 572         }
 573 
 574         // end
 575         if (!fEntityScanner.skipChar('?', null)) {
 576             reportFatalError("XMLDeclUnterminated", null);
 577         }
 578         if (!fEntityScanner.skipChar('>', null)) {
 579             reportFatalError("XMLDeclUnterminated", null);
 580 
 581         }
 582 
 583         // fill in return array
 584         pseudoAttributeValues[0] = version;
 585         pseudoAttributeValues[1] = encoding;
 586         pseudoAttributeValues[2] = standalone;
 587 
 588     } // scanXMLDeclOrTextDecl(boolean)
 589 
 590     /**
 591      * Scans a pseudo attribute.
 592      *
 593      * @param scanningTextDecl True if scanning this pseudo-attribute for a
 594      *                         TextDecl; false if scanning XMLDecl. This
 595      *                         flag is needed to report the correct type of
 596      *                         error.
 597      * @param value            The string to fill in with the attribute
 598      *                         value.
 599      *
 600      * @return The name of the attribute
 601      *
 602      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
 603      * at the time of calling is lost.
 604      */
 605     protected String scanPseudoAttribute(boolean scanningTextDecl,
 606             XMLString value)
 607             throws IOException, XNIException {
 608 
 609         String name = scanPseudoAttributeName();
 610         // XMLEntityManager.print(fEntityManager.getCurrentEntity());
 611 
 612         if (name == null) {
 613             reportFatalError("PseudoAttrNameExpected", null);
 614         }
 615         fEntityScanner.skipSpaces();
 616         if (!fEntityScanner.skipChar('=', null)) {
 617             reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
 618                     : "EqRequiredInXMLDecl", new Object[]{name});
 619         }
 620         fEntityScanner.skipSpaces();
 621         int quote = fEntityScanner.peekChar();
 622         if (quote != '\'' && quote != '"') {
 623             reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
 624                     : "QuoteRequiredInXMLDecl" , new Object[]{name});
 625         }
 626         fEntityScanner.scanChar(NameType.ATTRIBUTE);
 627         int c = fEntityScanner.scanLiteral(quote, value, false);
 628         if (c != quote) {
 629             fStringBuffer2.clear();
 630             do {
 631                 fStringBuffer2.append(value);
 632                 if (c != -1) {
 633                     if (c == '&' || c == '%' || c == '<' || c == ']') {
 634                         fStringBuffer2.append((char)fEntityScanner.scanChar(NameType.ATTRIBUTE));
 635                     } else if (XMLChar.isHighSurrogate(c)) {
 636                         scanSurrogates(fStringBuffer2);
 637                     } else if (isInvalidLiteral(c)) {
 638                         String key = scanningTextDecl
 639                                 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
 640                         reportFatalError(key,
 641                                 new Object[] {Integer.toString(c, 16)});
 642                                 fEntityScanner.scanChar(null);
 643                     }
 644                 }
 645                 c = fEntityScanner.scanLiteral(quote, value, false);
 646             } while (c != quote);
 647             fStringBuffer2.append(value);
 648             value.setValues(fStringBuffer2);
 649         }
 650         if (!fEntityScanner.skipChar(quote, null)) {
 651             reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
 652                     : "CloseQuoteMissingInXMLDecl",
 653                     new Object[]{name});
 654         }
 655 
 656         // return
 657         return name;
 658 
 659     } // scanPseudoAttribute(XMLString):String
 660 
 661     /**
 662      * Scans the name of a pseudo attribute. The only legal names
 663      * in XML 1.0/1.1 documents are 'version', 'encoding' and 'standalone'.
 664      *
 665      * @return the name of the pseudo attribute or <code>null</code>
 666      * if a legal pseudo attribute name could not be scanned.
 667      */
 668     private String scanPseudoAttributeName() throws IOException, XNIException {
 669         final int ch = fEntityScanner.peekChar();
 670         switch (ch) {
 671             case 'v':
 672                 if (fEntityScanner.skipString(fVersionSymbol)) {
 673                     return fVersionSymbol;
 674                 }
 675                 break;
 676             case 'e':
 677                 if (fEntityScanner.skipString(fEncodingSymbol)) {
 678                     return fEncodingSymbol;
 679                 }
 680                 break;
 681             case 's':
 682                 if (fEntityScanner.skipString(fStandaloneSymbol)) {
 683                     return fStandaloneSymbol;
 684                 }
 685                 break;
 686         }
 687         return null;
 688     } // scanPseudoAttributeName()
 689 
 690     /**
 691      * Scans a processing instruction.
 692      * <p>
 693      * <pre>
 694      * [16] PI ::= '&lt;?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
 695      * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
 696      * </pre>
 697      */
 698     //CHANGED:
 699     //EARLIER: scanPI()
 700     //NOW: scanPI(XMLStringBuffer)
 701     //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same
 702     // as that for scanContent()
 703 
 704     protected void scanPI(XMLStringBuffer data) throws IOException, XNIException {
 705 
 706         // target
 707         fReportEntity = false;
 708         String target = fEntityScanner.scanName(NameType.PI);
 709         if (target == null) {
 710             reportFatalError("PITargetRequired", null);
 711         }
 712 
 713         // scan data
 714         scanPIData(target, data);
 715         fReportEntity = true;
 716 
 717     } // scanPI(XMLStringBuffer)
 718 
 719     /**
 720      * Scans a processing data. This is needed to handle the situation
 721      * where a document starts with a processing instruction whose
 722      * target name <em>starts with</em> "xml". (e.g. xmlfoo)
 723      *
 724      * This method would always read the whole data. We have while loop and data is buffered
 725      * until delimeter is encountered.
 726      *
 727      * @param target The PI target
 728      * @param data The string to fill in with the data
 729      */
 730 
 731     //CHANGED:
 732     //Earlier:This method uses the fStringBuffer and later buffer values are set to
 733     //the supplied XMLString....
 734     //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would
 735     //be appended to that buffer
 736 
 737     protected void scanPIData(String target, XMLStringBuffer data)
 738     throws IOException, XNIException {
 739 
 740         // check target
 741         if (target.length() == 3) {
 742             char c0 = Character.toLowerCase(target.charAt(0));
 743             char c1 = Character.toLowerCase(target.charAt(1));
 744             char c2 = Character.toLowerCase(target.charAt(2));
 745             if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
 746                 reportFatalError("ReservedPITarget", null);
 747             }
 748         }
 749 
 750         // spaces
 751         if (!fEntityScanner.skipSpaces()) {
 752             if (fEntityScanner.skipString("?>")) {
 753                 // we found the end, there is no data just return
 754                 return;
 755             } else {
 756                 // if there is data there should be some space
 757                 reportFatalError("SpaceRequiredInPI", null);
 758             }
 759         }
 760 
 761         // since scanData appends the parsed data to the buffer passed
 762         // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer)
 763         //until all of the data is buffered.
 764         if (fEntityScanner.scanData("?>", data)) {
 765             do {
 766                 int c = fEntityScanner.peekChar();
 767                 if (c != -1) {
 768                     if (XMLChar.isHighSurrogate(c)) {
 769                         scanSurrogates(data);
 770                     } else if (isInvalidLiteral(c)) {
 771                         reportFatalError("InvalidCharInPI",
 772                                 new Object[]{Integer.toHexString(c)});
 773                                 fEntityScanner.scanChar(null);
 774                     }
 775                 }
 776             } while (fEntityScanner.scanData("?>", data));
 777         }
 778 
 779     } // scanPIData(String,XMLString)
 780 
 781     /**
 782      * Scans a comment.
 783      * <p>
 784      * <pre>
 785      * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
 786      * </pre>
 787      * <p>
 788      * <strong>Note:</strong> Called after scanning past '&lt;!--'
 789      * <strong>Note:</strong> This method uses fString, anything in it
 790      * at the time of calling is lost.
 791      *
 792      * @param text The buffer to fill in with the text.
 793      */
 794     protected void scanComment(XMLStringBuffer text)
 795     throws IOException, XNIException {
 796 
 797         //System.out.println( "XMLScanner#scanComment# In Scan Comment" );
 798         // text
 799         // REVISIT: handle invalid character, eof
 800         text.clear();
 801         while (fEntityScanner.scanData("--", text)) {
 802             int c = fEntityScanner.peekChar();
 803 
 804             //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() );
 805             //System.out.println( "XMLScanner#scanComment#c == " + c );
 806 
 807             if (c != -1) {
 808                 if (XMLChar.isHighSurrogate(c)) {
 809                     scanSurrogates(text);
 810                 }
 811                 else if (isInvalidLiteral(c)) {
 812                     reportFatalError("InvalidCharInComment",
 813                             new Object[] { Integer.toHexString(c) });
 814                             fEntityScanner.scanChar(NameType.COMMENT);
 815                 }
 816             }
 817         }
 818         if (!fEntityScanner.skipChar('>', NameType.COMMENT)) {
 819             reportFatalError("DashDashInComment", null);
 820         }
 821 
 822     } // scanComment()
 823 
 824     /**
 825      * Scans an attribute value and normalizes whitespace converting all
 826      * whitespace characters to space characters.
 827      *
 828      * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
 829      *
 830      * @param value The XMLString to fill in with the value.
 831      * @param nonNormalizedValue The XMLString to fill in with the
 832      *                           non-normalized value.
 833      * @param atName The name of the attribute being parsed (for error msgs).
 834      * @param attributes The attributes list for the scanned attribute.
 835      * @param attrIndex The index of the attribute to use from the list.
 836      * @param checkEntities true if undeclared entities should be reported as VC violation,
 837      *                      false if undeclared entities should be reported as WFC violation.
 838      * @param eleName The name of element to which this attribute belongs.
 839      * @param isNSURI a flag indicating whether the content is a Namespace URI
 840      *
 841      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
 842      * at the time of calling is lost.
 843      **/
 844     protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue,
 845             String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities,
 846             String eleName, boolean isNSURI)
 847             throws IOException, XNIException {
 848         XMLStringBuffer stringBuffer = null;
 849         // quote
 850         int quote = fEntityScanner.peekChar();
 851         if (quote != '\'' && quote != '"') {
 852             reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName});
 853         }
 854 
 855         fEntityScanner.scanChar(NameType.ATTRIBUTE);
 856         int entityDepth = fEntityDepth;
 857 
 858         int c = fEntityScanner.scanLiteral(quote, value, isNSURI);
 859         if (DEBUG_ATTR_NORMALIZATION) {
 860             System.out.println("** scanLiteral -> \""
 861                     + value.toString() + "\"");
 862         }
 863         if(fNeedNonNormalizedValue){
 864             fStringBuffer2.clear();
 865             fStringBuffer2.append(value);
 866         }
 867         if(fEntityScanner.whiteSpaceLen > 0)
 868             normalizeWhitespace(value);
 869         if (DEBUG_ATTR_NORMALIZATION) {
 870             System.out.println("** normalizeWhitespace -> \""
 871                     + value.toString() + "\"");
 872         }
 873         if (c != quote) {
 874             fScanningAttribute = true;
 875             stringBuffer = getStringBuffer();
 876             stringBuffer.clear();
 877             do {
 878                 stringBuffer.append(value);
 879                 if (DEBUG_ATTR_NORMALIZATION) {
 880                     System.out.println("** value2: \""
 881                             + stringBuffer.toString() + "\"");
 882                 }
 883                 if (c == '&') {
 884                     fEntityScanner.skipChar('&', NameType.REFERENCE);
 885                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) {
 886                         fStringBuffer2.append('&');
 887                     }
 888                     if (fEntityScanner.skipChar('#', NameType.REFERENCE)) {
 889                         if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) {
 890                             fStringBuffer2.append('#');
 891                         }
 892                         int ch ;
 893                         if (fNeedNonNormalizedValue)
 894                             ch = scanCharReferenceValue(stringBuffer, fStringBuffer2);
 895                         else
 896                             ch = scanCharReferenceValue(stringBuffer, null);
 897 
 898                         if (ch != -1) {
 899                             if (DEBUG_ATTR_NORMALIZATION) {
 900                                 System.out.println("** value3: \""
 901                                         + stringBuffer.toString()
 902                                         + "\"");
 903                             }
 904                         }
 905                     } else {
 906                         String entityName = fEntityScanner.scanName(NameType.ENTITY);
 907                         if (entityName == null) {
 908                             reportFatalError("NameRequiredInReference", null);
 909                         } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
 910                             fStringBuffer2.append(entityName);
 911                         }
 912                         if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
 913                             reportFatalError("SemicolonRequiredInReference",
 914                                     new Object []{entityName});
 915                         } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
 916                             fStringBuffer2.append(';');
 917                         }
 918                         if (resolveCharacter(entityName, stringBuffer)) {
 919                             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
 920                         } else {
 921                             if (fEntityStore.isExternalEntity(entityName)) {
 922                                 reportFatalError("ReferenceToExternalEntity",
 923                                         new Object[] { entityName });
 924                             } else {
 925                                 if (!fEntityStore.isDeclaredEntity(entityName)) {
 926                                     //WFC & VC: Entity Declared
 927                                     if (checkEntities) {
 928                                         if (fValidation) {
 929                                             fErrorReporter.reportError(fEntityScanner,XMLMessageFormatter.XML_DOMAIN,
 930                                                     "EntityNotDeclared",
 931                                                     new Object[]{entityName},
 932                                                     XMLErrorReporter.SEVERITY_ERROR);
 933                                         }
 934                                     } else {
 935                                         reportFatalError("EntityNotDeclared",
 936                                                 new Object[]{entityName});
 937                                     }
 938                                 }
 939                                 fEntityManager.startEntity(true, entityName, true);
 940                             }
 941                         }
 942                     }
 943                 } else if (c == '<') {
 944                     reportFatalError("LessthanInAttValue",
 945                             new Object[] { eleName, atName });
 946                             fEntityScanner.scanChar(null);
 947                             if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
 948                                 fStringBuffer2.append((char)c);
 949                             }
 950                 } else if (c == '%' || c == ']') {
 951                     fEntityScanner.scanChar(null);
 952                     stringBuffer.append((char)c);
 953                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
 954                         fStringBuffer2.append((char)c);
 955                     }
 956                     if (DEBUG_ATTR_NORMALIZATION) {
 957                         System.out.println("** valueF: \""
 958                                 + stringBuffer.toString() + "\"");
 959                     }
 960                 } else if (c == '\n' || c == '\r') {
 961                     fEntityScanner.scanChar(null);
 962                     stringBuffer.append(' ');
 963                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
 964                         fStringBuffer2.append('\n');
 965                     }
 966                 } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
 967                     fStringBuffer3.clear();
 968                     if (scanSurrogates(fStringBuffer3)) {
 969                         stringBuffer.append(fStringBuffer3);
 970                         if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
 971                             fStringBuffer2.append(fStringBuffer3);
 972                         }
 973                         if (DEBUG_ATTR_NORMALIZATION) {
 974                             System.out.println("** valueI: \""
 975                                     + stringBuffer.toString()
 976                                     + "\"");
 977                         }
 978                     }
 979                 } else if (c != -1 && isInvalidLiteral(c)) {
 980                     reportFatalError("InvalidCharInAttValue",
 981                             new Object[] {eleName, atName, Integer.toString(c, 16)});
 982                             fEntityScanner.scanChar(null);
 983                             if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
 984                                 fStringBuffer2.append((char)c);
 985                             }
 986                 }
 987                 c = fEntityScanner.scanLiteral(quote, value, isNSURI);
 988                 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
 989                     fStringBuffer2.append(value);
 990                 }
 991                 if(fEntityScanner.whiteSpaceLen > 0)
 992                     normalizeWhitespace(value);
 993                 //Todo ::Move this check  to Attributes , do conversion
 994                 //only if attribute is being accessed. -Venu
 995             } while (c != quote || entityDepth != fEntityDepth);
 996             stringBuffer.append(value);
 997             if (DEBUG_ATTR_NORMALIZATION) {
 998                 System.out.println("** valueN: \""
 999                         + stringBuffer.toString() + "\"");
1000             }
1001             value.setValues(stringBuffer);
1002             fScanningAttribute = false;
1003         }
1004         if(fNeedNonNormalizedValue)
1005             nonNormalizedValue.setValues(fStringBuffer2);
1006 
1007         // quote
1008         int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE);
1009         if (cquote != quote) {
1010             reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName});
1011         }
1012     } // scanAttributeValue()
1013 
1014 
1015     /**
1016      * Resolves character entity references.
1017      * @param entityName the name of the entity
1018      * @param stringBuffer the current XMLStringBuffer to append the character to.
1019      * @return true if resolved, false otherwise
1020      */
1021     protected boolean resolveCharacter(String entityName, XMLStringBuffer stringBuffer) {
1022         /**
1023          * entityNames (symbols) are interned. The equals method would do the same,
1024          * but I'm leaving it as comparisons by references are common in the impl
1025          * and it made it explicit to others who read this code.
1026          */
1027         if (entityName == fAmpSymbol) {
1028             stringBuffer.append('&');
1029             return true;
1030         } else if (entityName == fAposSymbol) {
1031             stringBuffer.append('\'');
1032             return true;
1033         } else if (entityName == fLtSymbol) {
1034             stringBuffer.append('<');
1035             return true;
1036         } else if (entityName == fGtSymbol) {
1037             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
1038             stringBuffer.append('>');
1039             return true;
1040         } else if (entityName == fQuotSymbol) {
1041             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
1042             stringBuffer.append('"');
1043             return true;
1044         }
1045         return false;
1046     }
1047 
1048     /**
1049      * Scans External ID and return the public and system IDs.
1050      *
1051      * @param identifiers An array of size 2 to return the system id,
1052      *                    and public id (in that order).
1053      * @param optionalSystemId Specifies whether the system id is optional.
1054      *
1055      * <strong>Note:</strong> This method uses fString and fStringBuffer,
1056      * anything in them at the time of calling is lost.
1057      */
1058     protected void scanExternalID(String[] identifiers,
1059             boolean optionalSystemId)
1060             throws IOException, XNIException {
1061 
1062         String systemId = null;
1063         String publicId = null;
1064         if (fEntityScanner.skipString("PUBLIC")) {
1065             if (!fEntityScanner.skipSpaces()) {
1066                 reportFatalError("SpaceRequiredAfterPUBLIC", null);
1067             }
1068             scanPubidLiteral(fString);
1069             publicId = fString.toString();
1070 
1071             if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1072                 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
1073             }
1074         }
1075 
1076         if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1077             if (publicId == null && !fEntityScanner.skipSpaces()) {
1078                 reportFatalError("SpaceRequiredAfterSYSTEM", null);
1079             }
1080             int quote = fEntityScanner.peekChar();
1081             if (quote != '\'' && quote != '"') {
1082                 if (publicId != null && optionalSystemId) {
1083                     // looks like we don't have any system id
1084                     // simply return the public id
1085                     identifiers[0] = null;
1086                     identifiers[1] = publicId;
1087                     return;
1088                 }
1089                 reportFatalError("QuoteRequiredInSystemID", null);
1090             }
1091             fEntityScanner.scanChar(null);
1092             XMLString ident = fString;
1093             if (fEntityScanner.scanLiteral(quote, ident, false) != quote) {
1094                 fStringBuffer.clear();
1095                 do {
1096                     fStringBuffer.append(ident);
1097                     int c = fEntityScanner.peekChar();
1098                     if (XMLChar.isMarkup(c) || c == ']') {
1099                         fStringBuffer.append((char)fEntityScanner.scanChar(null));
1100                     } else if (c != -1 && isInvalidLiteral(c)) {
1101                         reportFatalError("InvalidCharInSystemID",
1102                             new Object[] {Integer.toString(c, 16)});
1103                     }
1104                 } while (fEntityScanner.scanLiteral(quote, ident, false) != quote);
1105                 fStringBuffer.append(ident);
1106                 ident = fStringBuffer;
1107             }
1108             systemId = ident.toString();
1109             if (!fEntityScanner.skipChar(quote, null)) {
1110                 reportFatalError("SystemIDUnterminated", null);
1111             }
1112         }
1113 
1114         // store result in array
1115         identifiers[0] = systemId;
1116         identifiers[1] = publicId;
1117     }
1118 
1119 
1120     /**
1121      * Scans public ID literal.
1122      *
1123      * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1124      * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1125      *
1126      * The returned string is normalized according to the following rule,
1127      * from http://www.w3.org/TR/REC-xml#dt-pubid:
1128      *
1129      * Before a match is attempted, all strings of white space in the public
1130      * identifier must be normalized to single space characters (#x20), and
1131      * leading and trailing white space must be removed.
1132      *
1133      * @param literal The string to fill in with the public ID literal.
1134      * @return True on success.
1135      *
1136      * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1137      * the time of calling is lost.
1138      */
1139     protected boolean scanPubidLiteral(XMLString literal)
1140     throws IOException, XNIException {
1141         int quote = fEntityScanner.scanChar(null);
1142         if (quote != '\'' && quote != '"') {
1143             reportFatalError("QuoteRequiredInPublicID", null);
1144             return false;
1145         }
1146 
1147         fStringBuffer.clear();
1148         // skip leading whitespace
1149         boolean skipSpace = true;
1150         boolean dataok = true;
1151         while (true) {
1152             int c = fEntityScanner.scanChar(null);
1153             if (c == ' ' || c == '\n' || c == '\r') {
1154                 if (!skipSpace) {
1155                     // take the first whitespace as a space and skip the others
1156                     fStringBuffer.append(' ');
1157                     skipSpace = true;
1158                 }
1159             } else if (c == quote) {
1160                 if (skipSpace) {
1161                     // if we finished on a space let's trim it
1162                     fStringBuffer.length--;
1163                 }
1164                 literal.setValues(fStringBuffer);
1165                 break;
1166             } else if (XMLChar.isPubid(c)) {
1167                 fStringBuffer.append((char)c);
1168                 skipSpace = false;
1169             } else if (c == -1) {
1170                 reportFatalError("PublicIDUnterminated", null);
1171                 return false;
1172             } else {
1173                 dataok = false;
1174                 reportFatalError("InvalidCharInPublicID",
1175                         new Object[]{Integer.toHexString(c)});
1176             }
1177         }
1178         return dataok;
1179     }
1180 
1181 
1182     /**
1183      * Normalize whitespace in an XMLString converting all whitespace
1184      * characters to space characters.
1185      */
1186     protected void normalizeWhitespace(XMLString value) {
1187         int i=0;
1188         int j=0;
1189         int [] buff = fEntityScanner.whiteSpaceLookup;
1190         int buffLen = fEntityScanner.whiteSpaceLen;
1191         int end = value.offset + value.length;
1192         while(i < buffLen){
1193             j = buff[i];
1194             if(j < end ){
1195                 value.ch[j] = ' ';
1196             }
1197             i++;
1198         }
1199     }
1200 
1201     //
1202     // XMLEntityHandler methods
1203     //
1204 
1205     /**
1206      * This method notifies of the start of an entity. The document entity
1207      * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1208      * parameter entity names start with '%'; and general entities are just
1209      * specified by their name.
1210      *
1211      * @param name     The name of the entity.
1212      * @param identifier The resource identifier.
1213      * @param encoding The auto-detected IANA encoding name of the entity
1214      *                 stream. This value will be null in those situations
1215      *                 where the entity encoding is not auto-detected (e.g.
1216      *                 internal entities or a document entity that is
1217      *                 parsed from a java.io.Reader).
1218      *
1219      * @throws XNIException Thrown by handler to signal an error.
1220      */
1221     public void startEntity(String name,
1222             XMLResourceIdentifier identifier,
1223             String encoding, Augmentations augs) throws XNIException {
1224 
1225         // keep track of the entity depth
1226         fEntityDepth++;
1227         // must reset entity scanner
1228         fEntityScanner = fEntityManager.getEntityScanner();
1229         fEntityStore = fEntityManager.getEntityStore() ;
1230     } // startEntity(String,XMLResourceIdentifier,String)
1231 
1232     /**
1233      * This method notifies the end of an entity. The document entity has
1234      * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1235      * parameter entity names start with '%'; and general entities are just
1236      * specified by their name.
1237      *
1238      * @param name The name of the entity.
1239      *
1240      * @throws XNIException Thrown by handler to signal an error.
1241      */
1242     public void endEntity(String name, Augmentations augs) throws IOException, XNIException {
1243         // keep track of the entity depth
1244         if (fEntityDepth > 0) {
1245             fEntityDepth--;
1246         }
1247     } // endEntity(String)
1248 
1249     /**
1250      * Scans a character reference and append the corresponding chars to the
1251      * specified buffer.
1252      *
1253      * <p>
1254      * <pre>
1255      * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1256      * </pre>
1257      *
1258      * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1259      * at the time of calling is lost.
1260      *
1261      * @param buf the character buffer to append chars to
1262      * @param buf2 the character buffer to append non-normalized chars to
1263      *
1264      * @return the character value or (-1) on conversion failure
1265      */
1266     protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
1267     throws IOException, XNIException {
1268         int initLen = buf.length;
1269         // scan hexadecimal value
1270         boolean hex = false;
1271         if (fEntityScanner.skipChar('x', NameType.REFERENCE)) {
1272             if (buf2 != null) { buf2.append('x'); }
1273             hex = true;
1274             fStringBuffer3.clear();
1275             boolean digit = true;
1276 
1277             int c = fEntityScanner.peekChar();
1278             digit = (c >= '0' && c <= '9') ||
1279                     (c >= 'a' && c <= 'f') ||
1280                     (c >= 'A' && c <= 'F');
1281             if (digit) {
1282                 if (buf2 != null) { buf2.append((char)c); }
1283                 fEntityScanner.scanChar(NameType.REFERENCE);
1284                 fStringBuffer3.append((char)c);
1285 
1286                 do {
1287                     c = fEntityScanner.peekChar();
1288                     digit = (c >= '0' && c <= '9') ||
1289                             (c >= 'a' && c <= 'f') ||
1290                             (c >= 'A' && c <= 'F');
1291                     if (digit) {
1292                         if (buf2 != null) { buf2.append((char)c); }
1293                         fEntityScanner.scanChar(NameType.REFERENCE);
1294                         fStringBuffer3.append((char)c);
1295                     }
1296                 } while (digit);
1297             } else {
1298                 reportFatalError("HexdigitRequiredInCharRef", null);
1299             }
1300         }
1301 
1302         // scan decimal value
1303         else {
1304             fStringBuffer3.clear();
1305             boolean digit = true;
1306 
1307             int c = fEntityScanner.peekChar();
1308             digit = c >= '0' && c <= '9';
1309             if (digit) {
1310                 if (buf2 != null) { buf2.append((char)c); }
1311                 fEntityScanner.scanChar(NameType.REFERENCE);
1312                 fStringBuffer3.append((char)c);
1313 
1314                 do {
1315                     c = fEntityScanner.peekChar();
1316                     digit = c >= '0' && c <= '9';
1317                     if (digit) {
1318                         if (buf2 != null) { buf2.append((char)c); }
1319                         fEntityScanner.scanChar(NameType.REFERENCE);
1320                         fStringBuffer3.append((char)c);
1321                     }
1322                 } while (digit);
1323             } else {
1324                 reportFatalError("DigitRequiredInCharRef", null);
1325             }
1326         }
1327 
1328         // end
1329         if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
1330             reportFatalError("SemicolonRequiredInCharRef", null);
1331         }
1332         if (buf2 != null) { buf2.append(';'); }
1333 
1334         // convert string to number
1335         int value = -1;
1336         try {
1337             value = Integer.parseInt(fStringBuffer3.toString(),
1338                     hex ? 16 : 10);
1339 
1340             // character reference must be a valid XML character
1341             if (isInvalid(value)) {
1342                 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1343                 if (hex) errorBuf.append('x');
1344                 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1345                 reportFatalError("InvalidCharRef",
1346                         new Object[]{errorBuf.toString()});
1347             }
1348         } catch (NumberFormatException e) {
1349             // Conversion failed, let -1 value drop through.
1350             // If we end up here, the character reference was invalid.
1351             StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1352             if (hex) errorBuf.append('x');
1353             errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1354             reportFatalError("InvalidCharRef",
1355                     new Object[]{errorBuf.toString()});
1356         }
1357 
1358         // append corresponding chars to the given buffer
1359         if (!XMLChar.isSupplemental(value)) {
1360             buf.append((char) value);
1361         } else {
1362             // character is supplemental, split it into surrogate chars
1363             buf.append(XMLChar.highSurrogate(value));
1364             buf.append(XMLChar.lowSurrogate(value));
1365         }
1366 
1367         // char refs notification code
1368         if (fNotifyCharRefs && value != -1) {
1369             String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
1370             if (!fScanningAttribute) {
1371                 fCharRefLiteral = literal;
1372             }
1373         }
1374 
1375         if (fEntityScanner.fCurrentEntity.isGE) {
1376             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, buf.length - initLen);
1377         }
1378         return value;
1379     }
1380     // returns true if the given character is not
1381     // valid with respect to the version of
1382     // XML understood by this scanner.
1383     protected boolean isInvalid(int value) {
1384         return (XMLChar.isInvalid(value));
1385     } // isInvalid(int):  boolean
1386 
1387     // returns true if the given character is not
1388     // valid or may not be used outside a character reference
1389     // with respect to the version of XML understood by this scanner.
1390     protected boolean isInvalidLiteral(int value) {
1391         return (XMLChar.isInvalid(value));
1392     } // isInvalidLiteral(int):  boolean
1393 
1394     // returns true if the given character is
1395     // a valid nameChar with respect to the version of
1396     // XML understood by this scanner.
1397     protected boolean isValidNameChar(int value) {
1398         return (XMLChar.isName(value));
1399     } // isValidNameChar(int):  boolean
1400 
1401     // returns true if the given character is
1402     // a valid NCName character with respect to the version of
1403     // XML understood by this scanner.
1404     protected boolean isValidNCName(int value) {
1405         return (XMLChar.isNCName(value));
1406     } // isValidNCName(int):  boolean
1407 
1408     // returns true if the given character is
1409     // a valid nameStartChar with respect to the version of
1410     // XML understood by this scanner.
1411     protected boolean isValidNameStartChar(int value) {
1412         return (XMLChar.isNameStart(value));
1413     } // isValidNameStartChar(int):  boolean
1414 
1415     // returns true if the given character is
1416     // a valid high surrogate for a nameStartChar
1417     // with respect to the version of XML understood
1418     // by this scanner.
1419     protected boolean isValidNameStartHighSurrogate(int value) {
1420         return false;
1421     } // isValidNameStartHighSurrogate(int):  boolean
1422 
1423     protected boolean versionSupported(String version ) {
1424         return version.equals("1.0") || version.equals("1.1");
1425     } // version Supported
1426 
1427     /**
1428      * Scans surrogates and append them to the specified buffer.
1429      * <p>
1430      * <strong>Note:</strong> This assumes the current char has already been
1431      * identified as a high surrogate.
1432      *
1433      * @param buf The StringBuffer to append the read surrogates to.
1434      * @return True if it succeeded.
1435      */
1436     protected boolean scanSurrogates(XMLStringBuffer buf)
1437     throws IOException, XNIException {
1438 
1439         int high = fEntityScanner.scanChar(null);
1440         int low = fEntityScanner.peekChar();
1441         if (!XMLChar.isLowSurrogate(low)) {
1442             reportFatalError("InvalidCharInContent",
1443                     new Object[] {Integer.toString(high, 16)});
1444                     return false;
1445         }
1446         fEntityScanner.scanChar(null);
1447 
1448         // convert surrogates to supplemental character
1449         int c = XMLChar.supplemental((char)high, (char)low);
1450 
1451         // supplemental character must be a valid XML character
1452         if (isInvalid(c)) {
1453             reportFatalError("InvalidCharInContent",
1454                     new Object[]{Integer.toString(c, 16)});
1455                     return false;
1456         }
1457 
1458         // fill in the buffer
1459         buf.append((char)high);
1460         buf.append((char)low);
1461 
1462         return true;
1463 
1464     } // scanSurrogates():boolean
1465 
1466 
1467     /**
1468      * Convenience function used in all XML scanners.
1469      */
1470     protected void reportFatalError(String msgId, Object[] args)
1471     throws XNIException {
1472         fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,
1473                 msgId, args,
1474                 XMLErrorReporter.SEVERITY_FATAL_ERROR);
1475     }
1476 
1477     // private methods
1478     private void init() {
1479         // initialize scanner
1480         fEntityScanner = null;
1481         // initialize vars
1482         fEntityDepth = 0;
1483         fReportEntity = true;
1484         fResourceIdentifier.clear();
1485 
1486         if(!fAttributeCacheInitDone){
1487             for(int i = 0; i < initialCacheCount; i++){
1488                 attributeValueCache.add(new XMLString());
1489                 stringBufferCache.add(new XMLStringBuffer());
1490             }
1491             fAttributeCacheInitDone = true;
1492         }
1493         fStringBufferIndex = 0;
1494         fAttributeCacheUsedCount = 0;
1495 
1496     }
1497 
1498     XMLStringBuffer getStringBuffer(){
1499         if((fStringBufferIndex < initialCacheCount )|| (fStringBufferIndex < stringBufferCache.size())){
1500             return stringBufferCache.get(fStringBufferIndex++);
1501         }else{
1502             XMLStringBuffer tmpObj = new XMLStringBuffer();
1503             fStringBufferIndex++;
1504             stringBufferCache.add(tmpObj);
1505             return tmpObj;
1506         }
1507     }
1508 
1509     /**
1510      * Add the count of the content buffer and check if the accumulated
1511      * value exceeds the limit
1512      * @param isPEDecl a flag to indicate whether the entity is parameter
1513      * @param entityName entity name
1514      * @param buffer content buffer
1515      */
1516     void checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer) {
1517         checkEntityLimit(isPEDecl, entityName, buffer.length);
1518     }
1519 
1520     /**
1521      * Add the count and check limit
1522      * @param isPEDecl a flag to indicate whether the entity is parameter
1523      * @param entityName entity name
1524      * @param len length of the buffer
1525      */
1526     void checkEntityLimit(boolean isPEDecl, String entityName, int len) {
1527         if (fLimitAnalyzer == null) {
1528             fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
1529         }
1530         if (isPEDecl) {
1531             fLimitAnalyzer.addValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, "%" + entityName, len);
1532             if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1533                         fSecurityManager.debugPrint(fLimitAnalyzer);
1534                 reportFatalError("MaxEntitySizeLimit", new Object[]{"%" + entityName,
1535                     fLimitAnalyzer.getValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT),
1536                     fSecurityManager.getLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT),
1537                     fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT)});
1538             }
1539         } else {
1540             fLimitAnalyzer.addValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, entityName, len);
1541             if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1542                         fSecurityManager.debugPrint(fLimitAnalyzer);
1543                 reportFatalError("MaxEntitySizeLimit", new Object[]{entityName,
1544                     fLimitAnalyzer.getValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT),
1545                     fSecurityManager.getLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT),
1546                     fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT)});
1547             }
1548         }
1549         if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1550             fSecurityManager.debugPrint(fLimitAnalyzer);
1551             reportFatalError("TotalEntitySizeLimit",
1552                 new Object[]{fLimitAnalyzer.getTotalValue(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT),
1553                 fSecurityManager.getLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT),
1554                 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT)});
1555         }
1556     }
1557 } // class XMLScanner