1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.util.Status; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import java.io.IOException; 27 import java.util.ArrayList; 28 import javax.xml.stream.events.XMLEvent; 29 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 30 import com.sun.org.apache.xerces.internal.util.SymbolTable; 31 import com.sun.org.apache.xerces.internal.util.XMLChar; 32 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 33 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 34 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 36 import com.sun.org.apache.xerces.internal.xni.Augmentations; 37 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.xml.internal.stream.Entity; 45 46 //import com.sun.xml.stream.XMLEntityManager; 47 //import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter; 48 49 /** 50 * This class is responsible for holding scanning methods common to 51 * scanning the XML document structure and content as well as the DTD 52 * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit 53 * from this base class. 54 * 55 * <p> 56 * This component requires the following features and properties from the 57 * component manager that uses it: 58 * <ul> 59 * <li>http://xml.org/sax/features/validation</li> 60 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li> 61 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 62 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 63 * <li>http://apache.org/xml/properties/internal/entity-manager</li> 64 * </ul> 65 * 66 * @author Andy Clark, IBM 67 * @author Arnaud Le Hors, IBM 68 * @author Eric Ye, IBM 69 * @author K.Venugopal SUN Microsystems 70 * @author Sunitha Reddy, SUN Microsystems 71 * @LastModified: Feb 2020 72 */ 73 public abstract class XMLScanner 74 implements XMLComponent { 75 76 // 77 // Constants 78 // 79 80 // feature identifiers 81 82 /** Feature identifier: namespaces. */ 83 protected static final String NAMESPACES = 84 Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE; 85 86 /** Feature identifier: validation. */ 87 protected static final String VALIDATION = 88 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 89 90 /** Feature identifier: notify character references. */ 91 protected static final String NOTIFY_CHAR_REFS = 92 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE; 93 94 // property identifiers 95 96 protected static final String PARSER_SETTINGS = 97 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 98 /** Property identifier: symbol table. */ 99 protected static final String SYMBOL_TABLE = 100 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 101 102 /** Property identifier: error reporter. */ 103 protected static final String ERROR_REPORTER = 104 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 105 106 /** Property identifier: entity manager. */ 107 protected static final String ENTITY_MANAGER = 108 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY; 109 110 /** Property identifier: Security manager. */ 111 private static final String SECURITY_MANAGER = Constants.SECURITY_MANAGER; 112 113 // debugging 114 115 /** Debug attribute normalization. */ 116 protected static final boolean DEBUG_ATTR_NORMALIZATION = false; 117 118 /** 119 * Type of names 120 */ 121 public static enum NameType { 122 ATTRIBUTE("attribute"), 123 ATTRIBUTENAME("attribute name"), 124 COMMENT("comment"), 125 DOCTYPE("doctype"), 126 ELEMENTSTART("startelement"), 127 ELEMENTEND("endelement"), 128 ENTITY("entity"), 129 NOTATION("notation"), 130 PI("pi"), 131 REFERENCE("reference"); 132 133 final String literal; 134 NameType(String literal) { 135 this.literal = literal; 136 } 137 138 String literal() { 139 return literal; 140 } 141 } 142 143 //xxx: setting the default value as false, as we dont need to calculate this value 144 //we should have a feature when set to true computes this value 145 private boolean fNeedNonNormalizedValue = false; 146 147 protected ArrayList<XMLString> attributeValueCache = new ArrayList<>(); 148 protected ArrayList<XMLStringBuffer> stringBufferCache = new ArrayList<>(); 149 protected int fStringBufferIndex = 0; 150 protected boolean fAttributeCacheInitDone = false; 151 protected int fAttributeCacheUsedCount = 0; 152 153 // 154 // Data 155 // 156 157 // features 158 159 /** 160 * Validation. This feature identifier is: 161 * http://xml.org/sax/features/validation 162 */ 163 protected boolean fValidation = false; 164 165 /** Namespaces. */ 166 protected boolean fNamespaces; 167 168 /** Character references notification. */ 169 protected boolean fNotifyCharRefs = false; 170 171 /** Internal parser-settings feature */ 172 protected boolean fParserSettings = true; 173 174 // properties 175 176 protected PropertyManager fPropertyManager = null ; 177 /** Symbol table. */ 178 protected SymbolTable fSymbolTable; 179 180 /** Error reporter. */ 181 protected XMLErrorReporter fErrorReporter; 182 183 /** Entity manager. */ 184 //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager(); 185 protected XMLEntityManager fEntityManager = null ; 186 187 /** xxx this should be available from EntityManager Entity storage */ 188 protected XMLEntityStorage fEntityStore = null ; 189 190 /** Security manager. */ 191 protected XMLSecurityManager fSecurityManager = null; 192 193 /** Limit analyzer. */ 194 protected XMLLimitAnalyzer fLimitAnalyzer = null; 195 196 // protected data 197 198 /** event type */ 199 protected XMLEvent fEvent ; 200 201 /** Entity scanner, this always works on last entity that was opened. */ 202 protected XMLEntityScanner fEntityScanner = null; 203 204 /** Entity depth. */ 205 protected int fEntityDepth; 206 207 /** Literal value of the last character reference scanned. */ 208 protected String fCharRefLiteral = null; 209 210 /** Scanning attribute. */ 211 protected boolean fScanningAttribute; 212 213 /** Report entity boundary. */ 214 protected boolean fReportEntity; 215 216 // symbols 217 218 /** Symbol: "version". */ 219 protected final static String fVersionSymbol = "version".intern(); 220 221 /** Symbol: "encoding". */ 222 protected final static String fEncodingSymbol = "encoding".intern(); 223 224 /** Symbol: "standalone". */ 225 protected final static String fStandaloneSymbol = "standalone".intern(); 226 227 /** Symbol: "amp". */ 228 protected final static String fAmpSymbol = "amp".intern(); 229 230 /** Symbol: "lt". */ 231 protected final static String fLtSymbol = "lt".intern(); 232 233 /** Symbol: "gt". */ 234 protected final static String fGtSymbol = "gt".intern(); 235 236 /** Symbol: "quot". */ 237 protected final static String fQuotSymbol = "quot".intern(); 238 239 /** Symbol: "apos". */ 240 protected final static String fAposSymbol = "apos".intern(); 241 242 // temporary variables 243 244 // NOTE: These objects are private to help prevent accidental modification 245 // of values by a subclass. If there were protected *and* the sub- 246 // modified the values, it would be difficult to track down the real 247 // cause of the bug. By making these private, we avoid this 248 // possibility. 249 250 /** String. */ 251 private XMLString fString = new XMLString(); 252 253 /** String buffer. */ 254 private XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 255 256 /** String buffer. */ 257 private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 258 259 /** String buffer. */ 260 private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer(); 261 262 // temporary location for Resource identification information. 263 protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 264 int initialCacheCount = 6; 265 // 266 // XMLComponent methods 267 // 268 269 /** 270 * 271 * 272 * @param componentManager The component manager. 273 * 274 * @throws SAXException Throws exception if required features and 275 * properties cannot be found. 276 */ 277 public void reset(XMLComponentManager componentManager) 278 throws XMLConfigurationException { 279 280 fParserSettings = componentManager.getFeature(PARSER_SETTINGS, true); 281 282 if (!fParserSettings) { 283 // parser settings have not been changed 284 init(); 285 return; 286 } 287 288 289 // Xerces properties 290 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 291 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 292 fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER); 293 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER); 294 295 //this step is extra because we have separated the storage of entity 296 fEntityStore = fEntityManager.getEntityStore() ; 297 298 // sax features 299 fValidation = componentManager.getFeature(VALIDATION, false); 300 fNamespaces = componentManager.getFeature(NAMESPACES, true); 301 fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS, false); 302 303 init(); 304 } // reset(XMLComponentManager) 305 306 protected void setPropertyManager(PropertyManager propertyManager){ 307 fPropertyManager = propertyManager ; 308 } 309 310 /** 311 * Sets the value of a property during parsing. 312 * 313 * @param propertyId 314 * @param value 315 */ 316 public void setProperty(String propertyId, Object value) 317 throws XMLConfigurationException { 318 319 // Xerces properties 320 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 321 String property = 322 propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 323 if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) { 324 fSymbolTable = (SymbolTable)value; 325 } else if (property.equals(Constants.ERROR_REPORTER_PROPERTY)) { 326 fErrorReporter = (XMLErrorReporter)value; 327 } else if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 328 fEntityManager = (XMLEntityManager)value; 329 } 330 } 331 332 if (propertyId.equals(SECURITY_MANAGER)) { 333 fSecurityManager = (XMLSecurityManager)value; 334 } 335 /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){ 336 fStaxProperties = (HashMap)value; 337 //TODO::discuss with neeraj what are his thoughts on passing properties. 338 //For now use this 339 }*/ 340 341 } // setProperty(String,Object) 342 343 /* 344 * Sets the feature of the scanner. 345 */ 346 public void setFeature(String featureId, boolean value) 347 throws XMLConfigurationException { 348 349 if (VALIDATION.equals(featureId)) { 350 fValidation = value; 351 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 352 fNotifyCharRefs = value; 353 } 354 } 355 356 /* 357 * Gets the state of the feature of the scanner. 358 */ 359 public boolean getFeature(String featureId) 360 throws XMLConfigurationException { 361 362 if (VALIDATION.equals(featureId)) { 363 return fValidation; 364 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 365 return fNotifyCharRefs; 366 } 367 throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId); 368 } 369 370 // 371 // Protected methods 372 // 373 374 // anybody calling this had better have set Symtoltable! 375 protected void reset() { 376 init(); 377 378 // DTD preparsing defaults: 379 fValidation = true; 380 fNotifyCharRefs = false; 381 382 } 383 384 public void reset(PropertyManager propertyManager) { 385 init(); 386 // Xerces properties 387 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 388 389 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 390 391 fEntityManager = (XMLEntityManager)propertyManager.getProperty(ENTITY_MANAGER); 392 fEntityStore = fEntityManager.getEntityStore() ; 393 fEntityScanner = (XMLEntityScanner)fEntityManager.getEntityScanner() ; 394 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 395 396 //fEntityManager.reset(); 397 // DTD preparsing defaults: 398 fValidation = false; 399 fNotifyCharRefs = false; 400 401 } 402 // common scanning methods 403 404 /** 405 * Scans an XML or text declaration. 406 * <p> 407 * <pre> 408 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 409 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 410 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 411 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 412 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 413 * | ('"' ('yes' | 'no') '"')) 414 * 415 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 416 * </pre> 417 * 418 * @param scanningTextDecl True if a text declaration is to 419 * be scanned instead of an XML 420 * declaration. 421 * @param pseudoAttributeValues An array of size 3 to return the version, 422 * encoding and standalone pseudo attribute values 423 * (in that order). 424 * 425 * <strong>Note:</strong> This method uses fString, anything in it 426 * at the time of calling is lost. 427 */ 428 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl, 429 String[] pseudoAttributeValues) 430 throws IOException, XNIException { 431 432 // pseudo-attribute values 433 String version = null; 434 String encoding = null; 435 String standalone = null; 436 437 // scan pseudo-attributes 438 final int STATE_VERSION = 0; 439 final int STATE_ENCODING = 1; 440 final int STATE_STANDALONE = 2; 441 final int STATE_DONE = 3; 442 int state = STATE_VERSION; 443 444 boolean dataFoundForTarget = false; 445 boolean sawSpace = fEntityScanner.skipSpaces(); 446 // since pseudoattributes are *not* attributes, 447 // their quotes don't need to be preserved in external parameter entities. 448 // the XMLEntityScanner#scanLiteral method will continue to 449 // emit -1 in such cases when it finds a quote; this is 450 // fine for other methods that parse scanned entities, 451 // but not for the scanning of pseudoattributes. So, 452 // temporarily, we must mark the current entity as not being "literal" 453 Entity.ScannedEntity currEnt = fEntityManager.getCurrentEntity(); 454 boolean currLiteral = currEnt.literal; 455 currEnt.literal = false; 456 while (fEntityScanner.peekChar() != '?') { 457 dataFoundForTarget = true; 458 String name = scanPseudoAttribute(scanningTextDecl, fString); 459 switch (state) { 460 case STATE_VERSION: { 461 if (name.equals(fVersionSymbol)) { 462 if (!sawSpace) { 463 reportFatalError(scanningTextDecl 464 ? "SpaceRequiredBeforeVersionInTextDecl" 465 : "SpaceRequiredBeforeVersionInXMLDecl", 466 null); 467 } 468 version = fString.toString(); 469 state = STATE_ENCODING; 470 if (!versionSupported(version)) { 471 reportFatalError("VersionNotSupported", 472 new Object[]{version}); 473 } 474 475 if (version.equals("1.1")) { 476 Entity.ScannedEntity top = fEntityManager.getTopLevelEntity(); 477 if (top != null && (top.version == null || top.version.equals("1.0"))) { 478 reportFatalError("VersionMismatch", null); 479 } 480 fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1); 481 } 482 483 } else if (name.equals(fEncodingSymbol)) { 484 if (!scanningTextDecl) { 485 reportFatalError("VersionInfoRequired", null); 486 } 487 if (!sawSpace) { 488 reportFatalError(scanningTextDecl 489 ? "SpaceRequiredBeforeEncodingInTextDecl" 490 : "SpaceRequiredBeforeEncodingInXMLDecl", 491 null); 492 } 493 encoding = fString.toString(); 494 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 495 } else { 496 if (scanningTextDecl) { 497 reportFatalError("EncodingDeclRequired", null); 498 } else { 499 reportFatalError("VersionInfoRequired", null); 500 } 501 } 502 break; 503 } 504 case STATE_ENCODING: { 505 if (name.equals(fEncodingSymbol)) { 506 if (!sawSpace) { 507 reportFatalError(scanningTextDecl 508 ? "SpaceRequiredBeforeEncodingInTextDecl" 509 : "SpaceRequiredBeforeEncodingInXMLDecl", 510 null); 511 } 512 encoding = fString.toString(); 513 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 514 // TODO: check encoding name; set encoding on 515 // entity scanner 516 } else if (!scanningTextDecl && name.equals(fStandaloneSymbol)) { 517 if (!sawSpace) { 518 reportFatalError("SpaceRequiredBeforeStandalone", 519 null); 520 } 521 standalone = fString.toString(); 522 state = STATE_DONE; 523 if (!standalone.equals("yes") && !standalone.equals("no")) { 524 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 525 } 526 } else { 527 reportFatalError("EncodingDeclRequired", null); 528 } 529 break; 530 } 531 case STATE_STANDALONE: { 532 if (name.equals(fStandaloneSymbol)) { 533 if (!sawSpace) { 534 reportFatalError("SpaceRequiredBeforeStandalone", 535 null); 536 } 537 standalone = fString.toString(); 538 state = STATE_DONE; 539 if (!standalone.equals("yes") && !standalone.equals("no")) { 540 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 541 } 542 } else { 543 reportFatalError("SDDeclNameInvalid", null); 544 } 545 break; 546 } 547 default: { 548 reportFatalError("NoMorePseudoAttributes", null); 549 } 550 } 551 sawSpace = fEntityScanner.skipSpaces(); 552 } 553 // restore original literal value 554 if(currLiteral) { 555 currEnt.literal = true; 556 } 557 // REVISIT: should we remove this error reporting? 558 if (scanningTextDecl && state != STATE_DONE) { 559 reportFatalError("MorePseudoAttributes", null); 560 } 561 562 // If there is no data in the xml or text decl then we fail to report error 563 // for version or encoding info above. 564 if (scanningTextDecl) { 565 if (!dataFoundForTarget && encoding == null) { 566 reportFatalError("EncodingDeclRequired", null); 567 } 568 } else { 569 if (!dataFoundForTarget && version == null) { 570 reportFatalError("VersionInfoRequired", null); 571 } 572 } 573 574 // end 575 if (!fEntityScanner.skipChar('?', null)) { 576 reportFatalError("XMLDeclUnterminated", null); 577 } 578 if (!fEntityScanner.skipChar('>', null)) { 579 reportFatalError("XMLDeclUnterminated", null); 580 581 } 582 583 // fill in return array 584 pseudoAttributeValues[0] = version; 585 pseudoAttributeValues[1] = encoding; 586 pseudoAttributeValues[2] = standalone; 587 588 } // scanXMLDeclOrTextDecl(boolean) 589 590 /** 591 * Scans a pseudo attribute. 592 * 593 * @param scanningTextDecl True if scanning this pseudo-attribute for a 594 * TextDecl; false if scanning XMLDecl. This 595 * flag is needed to report the correct type of 596 * error. 597 * @param value The string to fill in with the attribute 598 * value. 599 * 600 * @return The name of the attribute 601 * 602 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 603 * at the time of calling is lost. 604 */ 605 protected String scanPseudoAttribute(boolean scanningTextDecl, 606 XMLString value) 607 throws IOException, XNIException { 608 609 String name = scanPseudoAttributeName(); 610 // XMLEntityManager.print(fEntityManager.getCurrentEntity()); 611 612 if (name == null) { 613 reportFatalError("PseudoAttrNameExpected", null); 614 } 615 fEntityScanner.skipSpaces(); 616 if (!fEntityScanner.skipChar('=', null)) { 617 reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl" 618 : "EqRequiredInXMLDecl", new Object[]{name}); 619 } 620 fEntityScanner.skipSpaces(); 621 int quote = fEntityScanner.peekChar(); 622 if (quote != '\'' && quote != '"') { 623 reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl" 624 : "QuoteRequiredInXMLDecl" , new Object[]{name}); 625 } 626 fEntityScanner.scanChar(NameType.ATTRIBUTE); 627 int c = fEntityScanner.scanLiteral(quote, value, false); 628 if (c != quote) { 629 fStringBuffer2.clear(); 630 do { 631 fStringBuffer2.append(value); 632 if (c != -1) { 633 if (c == '&' || c == '%' || c == '<' || c == ']') { 634 fStringBuffer2.append((char)fEntityScanner.scanChar(NameType.ATTRIBUTE)); 635 } else if (XMLChar.isHighSurrogate(c)) { 636 scanSurrogates(fStringBuffer2); 637 } else if (isInvalidLiteral(c)) { 638 String key = scanningTextDecl 639 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl"; 640 reportFatalError(key, 641 new Object[] {Integer.toString(c, 16)}); 642 fEntityScanner.scanChar(null); 643 } 644 } 645 c = fEntityScanner.scanLiteral(quote, value, false); 646 } while (c != quote); 647 fStringBuffer2.append(value); 648 value.setValues(fStringBuffer2); 649 } 650 if (!fEntityScanner.skipChar(quote, null)) { 651 reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl" 652 : "CloseQuoteMissingInXMLDecl", 653 new Object[]{name}); 654 } 655 656 // return 657 return name; 658 659 } // scanPseudoAttribute(XMLString):String 660 661 /** 662 * Scans the name of a pseudo attribute. The only legal names 663 * in XML 1.0/1.1 documents are 'version', 'encoding' and 'standalone'. 664 * 665 * @return the name of the pseudo attribute or <code>null</code> 666 * if a legal pseudo attribute name could not be scanned. 667 */ 668 private String scanPseudoAttributeName() throws IOException, XNIException { 669 final int ch = fEntityScanner.peekChar(); 670 switch (ch) { 671 case 'v': 672 if (fEntityScanner.skipString(fVersionSymbol)) { 673 return fVersionSymbol; 674 } 675 break; 676 case 'e': 677 if (fEntityScanner.skipString(fEncodingSymbol)) { 678 return fEncodingSymbol; 679 } 680 break; 681 case 's': 682 if (fEntityScanner.skipString(fStandaloneSymbol)) { 683 return fStandaloneSymbol; 684 } 685 break; 686 } 687 return null; 688 } // scanPseudoAttributeName() 689 690 /** 691 * Scans a processing instruction. 692 * <p> 693 * <pre> 694 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 695 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 696 * </pre> 697 */ 698 //CHANGED: 699 //EARLIER: scanPI() 700 //NOW: scanPI(XMLStringBuffer) 701 //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same 702 // as that for scanContent() 703 704 protected void scanPI(XMLStringBuffer data) throws IOException, XNIException { 705 706 // target 707 fReportEntity = false; 708 String target = fEntityScanner.scanName(NameType.PI); 709 if (target == null) { 710 reportFatalError("PITargetRequired", null); 711 } 712 713 // scan data 714 scanPIData(target, data); 715 fReportEntity = true; 716 717 } // scanPI(XMLStringBuffer) 718 719 /** 720 * Scans a processing data. This is needed to handle the situation 721 * where a document starts with a processing instruction whose 722 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 723 * 724 * This method would always read the whole data. We have while loop and data is buffered 725 * until delimeter is encountered. 726 * 727 * @param target The PI target 728 * @param data The string to fill in with the data 729 */ 730 731 //CHANGED: 732 //Earlier:This method uses the fStringBuffer and later buffer values are set to 733 //the supplied XMLString.... 734 //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would 735 //be appended to that buffer 736 737 protected void scanPIData(String target, XMLStringBuffer data) 738 throws IOException, XNIException { 739 740 // check target 741 if (target.length() == 3) { 742 char c0 = Character.toLowerCase(target.charAt(0)); 743 char c1 = Character.toLowerCase(target.charAt(1)); 744 char c2 = Character.toLowerCase(target.charAt(2)); 745 if (c0 == 'x' && c1 == 'm' && c2 == 'l') { 746 reportFatalError("ReservedPITarget", null); 747 } 748 } 749 750 // spaces 751 if (!fEntityScanner.skipSpaces()) { 752 if (fEntityScanner.skipString("?>")) { 753 // we found the end, there is no data just return 754 return; 755 } else { 756 // if there is data there should be some space 757 reportFatalError("SpaceRequiredInPI", null); 758 } 759 } 760 761 // since scanData appends the parsed data to the buffer passed 762 // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer) 763 //until all of the data is buffered. 764 if (fEntityScanner.scanData("?>", data)) { 765 do { 766 int c = fEntityScanner.peekChar(); 767 if (c != -1) { 768 if (XMLChar.isHighSurrogate(c)) { 769 scanSurrogates(data); 770 } else if (isInvalidLiteral(c)) { 771 reportFatalError("InvalidCharInPI", 772 new Object[]{Integer.toHexString(c)}); 773 fEntityScanner.scanChar(null); 774 } 775 } 776 } while (fEntityScanner.scanData("?>", data)); 777 } 778 779 } // scanPIData(String,XMLString) 780 781 /** 782 * Scans a comment. 783 * <p> 784 * <pre> 785 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 786 * </pre> 787 * <p> 788 * <strong>Note:</strong> Called after scanning past '<!--' 789 * <strong>Note:</strong> This method uses fString, anything in it 790 * at the time of calling is lost. 791 * 792 * @param text The buffer to fill in with the text. 793 */ 794 protected void scanComment(XMLStringBuffer text) 795 throws IOException, XNIException { 796 797 //System.out.println( "XMLScanner#scanComment# In Scan Comment" ); 798 // text 799 // REVISIT: handle invalid character, eof 800 text.clear(); 801 while (fEntityScanner.scanData("--", text)) { 802 int c = fEntityScanner.peekChar(); 803 804 //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() ); 805 //System.out.println( "XMLScanner#scanComment#c == " + c ); 806 807 if (c != -1) { 808 if (XMLChar.isHighSurrogate(c)) { 809 scanSurrogates(text); 810 } 811 else if (isInvalidLiteral(c)) { 812 reportFatalError("InvalidCharInComment", 813 new Object[] { Integer.toHexString(c) }); 814 fEntityScanner.scanChar(NameType.COMMENT); 815 } 816 } 817 } 818 if (!fEntityScanner.skipChar('>', NameType.COMMENT)) { 819 reportFatalError("DashDashInComment", null); 820 } 821 822 } // scanComment() 823 824 /** 825 * Scans an attribute value and normalizes whitespace converting all 826 * whitespace characters to space characters. 827 * 828 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 829 * 830 * @param value The XMLString to fill in with the value. 831 * @param nonNormalizedValue The XMLString to fill in with the 832 * non-normalized value. 833 * @param atName The name of the attribute being parsed (for error msgs). 834 * @param attributes The attributes list for the scanned attribute. 835 * @param attrIndex The index of the attribute to use from the list. 836 * @param checkEntities true if undeclared entities should be reported as VC violation, 837 * false if undeclared entities should be reported as WFC violation. 838 * @param eleName The name of element to which this attribute belongs. 839 * @param isNSURI a flag indicating whether the content is a Namespace URI 840 * 841 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 842 * at the time of calling is lost. 843 **/ 844 protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue, 845 String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, 846 String eleName, boolean isNSURI) 847 throws IOException, XNIException { 848 XMLStringBuffer stringBuffer = null; 849 // quote 850 int quote = fEntityScanner.peekChar(); 851 if (quote != '\'' && quote != '"') { 852 reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName}); 853 } 854 855 fEntityScanner.scanChar(NameType.ATTRIBUTE); 856 int entityDepth = fEntityDepth; 857 858 int c = fEntityScanner.scanLiteral(quote, value, isNSURI); 859 if (DEBUG_ATTR_NORMALIZATION) { 860 System.out.println("** scanLiteral -> \"" 861 + value.toString() + "\""); 862 } 863 if(fNeedNonNormalizedValue){ 864 fStringBuffer2.clear(); 865 fStringBuffer2.append(value); 866 } 867 if(fEntityScanner.whiteSpaceLen > 0) 868 normalizeWhitespace(value); 869 if (DEBUG_ATTR_NORMALIZATION) { 870 System.out.println("** normalizeWhitespace -> \"" 871 + value.toString() + "\""); 872 } 873 if (c != quote) { 874 fScanningAttribute = true; 875 stringBuffer = getStringBuffer(); 876 stringBuffer.clear(); 877 do { 878 stringBuffer.append(value); 879 if (DEBUG_ATTR_NORMALIZATION) { 880 System.out.println("** value2: \"" 881 + stringBuffer.toString() + "\""); 882 } 883 if (c == '&') { 884 fEntityScanner.skipChar('&', NameType.REFERENCE); 885 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 886 fStringBuffer2.append('&'); 887 } 888 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 889 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 890 fStringBuffer2.append('#'); 891 } 892 int ch ; 893 if (fNeedNonNormalizedValue) 894 ch = scanCharReferenceValue(stringBuffer, fStringBuffer2); 895 else 896 ch = scanCharReferenceValue(stringBuffer, null); 897 898 if (ch != -1) { 899 if (DEBUG_ATTR_NORMALIZATION) { 900 System.out.println("** value3: \"" 901 + stringBuffer.toString() 902 + "\""); 903 } 904 } 905 } else { 906 String entityName = fEntityScanner.scanName(NameType.ENTITY); 907 if (entityName == null) { 908 reportFatalError("NameRequiredInReference", null); 909 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 910 fStringBuffer2.append(entityName); 911 } 912 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 913 reportFatalError("SemicolonRequiredInReference", 914 new Object []{entityName}); 915 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 916 fStringBuffer2.append(';'); 917 } 918 if (resolveCharacter(entityName, stringBuffer)) { 919 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 920 } else { 921 if (fEntityStore.isExternalEntity(entityName)) { 922 reportFatalError("ReferenceToExternalEntity", 923 new Object[] { entityName }); 924 } else { 925 if (!fEntityStore.isDeclaredEntity(entityName)) { 926 //WFC & VC: Entity Declared 927 if (checkEntities) { 928 if (fValidation) { 929 fErrorReporter.reportError(fEntityScanner,XMLMessageFormatter.XML_DOMAIN, 930 "EntityNotDeclared", 931 new Object[]{entityName}, 932 XMLErrorReporter.SEVERITY_ERROR); 933 } 934 } else { 935 reportFatalError("EntityNotDeclared", 936 new Object[]{entityName}); 937 } 938 } 939 fEntityManager.startEntity(true, entityName, true); 940 } 941 } 942 } 943 } else if (c == '<') { 944 reportFatalError("LessthanInAttValue", 945 new Object[] { eleName, atName }); 946 fEntityScanner.scanChar(null); 947 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 948 fStringBuffer2.append((char)c); 949 } 950 } else if (c == '%' || c == ']') { 951 fEntityScanner.scanChar(null); 952 stringBuffer.append((char)c); 953 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 954 fStringBuffer2.append((char)c); 955 } 956 if (DEBUG_ATTR_NORMALIZATION) { 957 System.out.println("** valueF: \"" 958 + stringBuffer.toString() + "\""); 959 } 960 } else if (c == '\n' || c == '\r') { 961 fEntityScanner.scanChar(null); 962 stringBuffer.append(' '); 963 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 964 fStringBuffer2.append('\n'); 965 } 966 } else if (c != -1 && XMLChar.isHighSurrogate(c)) { 967 fStringBuffer3.clear(); 968 if (scanSurrogates(fStringBuffer3)) { 969 stringBuffer.append(fStringBuffer3); 970 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 971 fStringBuffer2.append(fStringBuffer3); 972 } 973 if (DEBUG_ATTR_NORMALIZATION) { 974 System.out.println("** valueI: \"" 975 + stringBuffer.toString() 976 + "\""); 977 } 978 } 979 } else if (c != -1 && isInvalidLiteral(c)) { 980 reportFatalError("InvalidCharInAttValue", 981 new Object[] {eleName, atName, Integer.toString(c, 16)}); 982 fEntityScanner.scanChar(null); 983 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 984 fStringBuffer2.append((char)c); 985 } 986 } 987 c = fEntityScanner.scanLiteral(quote, value, isNSURI); 988 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 989 fStringBuffer2.append(value); 990 } 991 if(fEntityScanner.whiteSpaceLen > 0) 992 normalizeWhitespace(value); 993 //Todo ::Move this check to Attributes , do conversion 994 //only if attribute is being accessed. -Venu 995 } while (c != quote || entityDepth != fEntityDepth); 996 stringBuffer.append(value); 997 if (DEBUG_ATTR_NORMALIZATION) { 998 System.out.println("** valueN: \"" 999 + stringBuffer.toString() + "\""); 1000 } 1001 value.setValues(stringBuffer); 1002 fScanningAttribute = false; 1003 } 1004 if(fNeedNonNormalizedValue) 1005 nonNormalizedValue.setValues(fStringBuffer2); 1006 1007 // quote 1008 int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE); 1009 if (cquote != quote) { 1010 reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName}); 1011 } 1012 } // scanAttributeValue() 1013 1014 1015 /** 1016 * Resolves character entity references. 1017 * @param entityName the name of the entity 1018 * @param stringBuffer the current XMLStringBuffer to append the character to. 1019 * @return true if resolved, false otherwise 1020 */ 1021 protected boolean resolveCharacter(String entityName, XMLStringBuffer stringBuffer) { 1022 /** 1023 * entityNames (symbols) are interned. The equals method would do the same, 1024 * but I'm leaving it as comparisons by references are common in the impl 1025 * and it made it explicit to others who read this code. 1026 */ 1027 if (entityName == fAmpSymbol) { 1028 stringBuffer.append('&'); 1029 return true; 1030 } else if (entityName == fAposSymbol) { 1031 stringBuffer.append('\''); 1032 return true; 1033 } else if (entityName == fLtSymbol) { 1034 stringBuffer.append('<'); 1035 return true; 1036 } else if (entityName == fGtSymbol) { 1037 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1038 stringBuffer.append('>'); 1039 return true; 1040 } else if (entityName == fQuotSymbol) { 1041 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1042 stringBuffer.append('"'); 1043 return true; 1044 } 1045 return false; 1046 } 1047 1048 /** 1049 * Scans External ID and return the public and system IDs. 1050 * 1051 * @param identifiers An array of size 2 to return the system id, 1052 * and public id (in that order). 1053 * @param optionalSystemId Specifies whether the system id is optional. 1054 * 1055 * <strong>Note:</strong> This method uses fString and fStringBuffer, 1056 * anything in them at the time of calling is lost. 1057 */ 1058 protected void scanExternalID(String[] identifiers, 1059 boolean optionalSystemId) 1060 throws IOException, XNIException { 1061 1062 String systemId = null; 1063 String publicId = null; 1064 if (fEntityScanner.skipString("PUBLIC")) { 1065 if (!fEntityScanner.skipSpaces()) { 1066 reportFatalError("SpaceRequiredAfterPUBLIC", null); 1067 } 1068 scanPubidLiteral(fString); 1069 publicId = fString.toString(); 1070 1071 if (!fEntityScanner.skipSpaces() && !optionalSystemId) { 1072 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null); 1073 } 1074 } 1075 1076 if (publicId != null || fEntityScanner.skipString("SYSTEM")) { 1077 if (publicId == null && !fEntityScanner.skipSpaces()) { 1078 reportFatalError("SpaceRequiredAfterSYSTEM", null); 1079 } 1080 int quote = fEntityScanner.peekChar(); 1081 if (quote != '\'' && quote != '"') { 1082 if (publicId != null && optionalSystemId) { 1083 // looks like we don't have any system id 1084 // simply return the public id 1085 identifiers[0] = null; 1086 identifiers[1] = publicId; 1087 return; 1088 } 1089 reportFatalError("QuoteRequiredInSystemID", null); 1090 } 1091 fEntityScanner.scanChar(null); 1092 XMLString ident = fString; 1093 if (fEntityScanner.scanLiteral(quote, ident, false) != quote) { 1094 fStringBuffer.clear(); 1095 do { 1096 fStringBuffer.append(ident); 1097 int c = fEntityScanner.peekChar(); 1098 if (XMLChar.isMarkup(c) || c == ']') { 1099 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 1100 } else if (c != -1 && isInvalidLiteral(c)) { 1101 reportFatalError("InvalidCharInSystemID", 1102 new Object[] {Integer.toString(c, 16)}); 1103 } 1104 } while (fEntityScanner.scanLiteral(quote, ident, false) != quote); 1105 fStringBuffer.append(ident); 1106 ident = fStringBuffer; 1107 } 1108 systemId = ident.toString(); 1109 if (!fEntityScanner.skipChar(quote, null)) { 1110 reportFatalError("SystemIDUnterminated", null); 1111 } 1112 } 1113 1114 // store result in array 1115 identifiers[0] = systemId; 1116 identifiers[1] = publicId; 1117 } 1118 1119 1120 /** 1121 * Scans public ID literal. 1122 * 1123 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 1124 * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 1125 * 1126 * The returned string is normalized according to the following rule, 1127 * from http://www.w3.org/TR/REC-xml#dt-pubid: 1128 * 1129 * Before a match is attempted, all strings of white space in the public 1130 * identifier must be normalized to single space characters (#x20), and 1131 * leading and trailing white space must be removed. 1132 * 1133 * @param literal The string to fill in with the public ID literal. 1134 * @return True on success. 1135 * 1136 * <strong>Note:</strong> This method uses fStringBuffer, anything in it at 1137 * the time of calling is lost. 1138 */ 1139 protected boolean scanPubidLiteral(XMLString literal) 1140 throws IOException, XNIException { 1141 int quote = fEntityScanner.scanChar(null); 1142 if (quote != '\'' && quote != '"') { 1143 reportFatalError("QuoteRequiredInPublicID", null); 1144 return false; 1145 } 1146 1147 fStringBuffer.clear(); 1148 // skip leading whitespace 1149 boolean skipSpace = true; 1150 boolean dataok = true; 1151 while (true) { 1152 int c = fEntityScanner.scanChar(null); 1153 if (c == ' ' || c == '\n' || c == '\r') { 1154 if (!skipSpace) { 1155 // take the first whitespace as a space and skip the others 1156 fStringBuffer.append(' '); 1157 skipSpace = true; 1158 } 1159 } else if (c == quote) { 1160 if (skipSpace) { 1161 // if we finished on a space let's trim it 1162 fStringBuffer.length--; 1163 } 1164 literal.setValues(fStringBuffer); 1165 break; 1166 } else if (XMLChar.isPubid(c)) { 1167 fStringBuffer.append((char)c); 1168 skipSpace = false; 1169 } else if (c == -1) { 1170 reportFatalError("PublicIDUnterminated", null); 1171 return false; 1172 } else { 1173 dataok = false; 1174 reportFatalError("InvalidCharInPublicID", 1175 new Object[]{Integer.toHexString(c)}); 1176 } 1177 } 1178 return dataok; 1179 } 1180 1181 1182 /** 1183 * Normalize whitespace in an XMLString converting all whitespace 1184 * characters to space characters. 1185 */ 1186 protected void normalizeWhitespace(XMLString value) { 1187 int i=0; 1188 int j=0; 1189 int [] buff = fEntityScanner.whiteSpaceLookup; 1190 int buffLen = fEntityScanner.whiteSpaceLen; 1191 int end = value.offset + value.length; 1192 while(i < buffLen){ 1193 j = buff[i]; 1194 if(j < end ){ 1195 value.ch[j] = ' '; 1196 } 1197 i++; 1198 } 1199 } 1200 1201 // 1202 // XMLEntityHandler methods 1203 // 1204 1205 /** 1206 * This method notifies of the start of an entity. The document entity 1207 * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1208 * parameter entity names start with '%'; and general entities are just 1209 * specified by their name. 1210 * 1211 * @param name The name of the entity. 1212 * @param identifier The resource identifier. 1213 * @param encoding The auto-detected IANA encoding name of the entity 1214 * stream. This value will be null in those situations 1215 * where the entity encoding is not auto-detected (e.g. 1216 * internal entities or a document entity that is 1217 * parsed from a java.io.Reader). 1218 * 1219 * @throws XNIException Thrown by handler to signal an error. 1220 */ 1221 public void startEntity(String name, 1222 XMLResourceIdentifier identifier, 1223 String encoding, Augmentations augs) throws XNIException { 1224 1225 // keep track of the entity depth 1226 fEntityDepth++; 1227 // must reset entity scanner 1228 fEntityScanner = fEntityManager.getEntityScanner(); 1229 fEntityStore = fEntityManager.getEntityStore() ; 1230 } // startEntity(String,XMLResourceIdentifier,String) 1231 1232 /** 1233 * This method notifies the end of an entity. The document entity has 1234 * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1235 * parameter entity names start with '%'; and general entities are just 1236 * specified by their name. 1237 * 1238 * @param name The name of the entity. 1239 * 1240 * @throws XNIException Thrown by handler to signal an error. 1241 */ 1242 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 1243 // keep track of the entity depth 1244 if (fEntityDepth > 0) { 1245 fEntityDepth--; 1246 } 1247 } // endEntity(String) 1248 1249 /** 1250 * Scans a character reference and append the corresponding chars to the 1251 * specified buffer. 1252 * 1253 * <p> 1254 * <pre> 1255 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1256 * </pre> 1257 * 1258 * <strong>Note:</strong> This method uses fStringBuffer, anything in it 1259 * at the time of calling is lost. 1260 * 1261 * @param buf the character buffer to append chars to 1262 * @param buf2 the character buffer to append non-normalized chars to 1263 * 1264 * @return the character value or (-1) on conversion failure 1265 */ 1266 protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2) 1267 throws IOException, XNIException { 1268 int initLen = buf.length; 1269 // scan hexadecimal value 1270 boolean hex = false; 1271 if (fEntityScanner.skipChar('x', NameType.REFERENCE)) { 1272 if (buf2 != null) { buf2.append('x'); } 1273 hex = true; 1274 fStringBuffer3.clear(); 1275 boolean digit = true; 1276 1277 int c = fEntityScanner.peekChar(); 1278 digit = (c >= '0' && c <= '9') || 1279 (c >= 'a' && c <= 'f') || 1280 (c >= 'A' && c <= 'F'); 1281 if (digit) { 1282 if (buf2 != null) { buf2.append((char)c); } 1283 fEntityScanner.scanChar(NameType.REFERENCE); 1284 fStringBuffer3.append((char)c); 1285 1286 do { 1287 c = fEntityScanner.peekChar(); 1288 digit = (c >= '0' && c <= '9') || 1289 (c >= 'a' && c <= 'f') || 1290 (c >= 'A' && c <= 'F'); 1291 if (digit) { 1292 if (buf2 != null) { buf2.append((char)c); } 1293 fEntityScanner.scanChar(NameType.REFERENCE); 1294 fStringBuffer3.append((char)c); 1295 } 1296 } while (digit); 1297 } else { 1298 reportFatalError("HexdigitRequiredInCharRef", null); 1299 } 1300 } 1301 1302 // scan decimal value 1303 else { 1304 fStringBuffer3.clear(); 1305 boolean digit = true; 1306 1307 int c = fEntityScanner.peekChar(); 1308 digit = c >= '0' && c <= '9'; 1309 if (digit) { 1310 if (buf2 != null) { buf2.append((char)c); } 1311 fEntityScanner.scanChar(NameType.REFERENCE); 1312 fStringBuffer3.append((char)c); 1313 1314 do { 1315 c = fEntityScanner.peekChar(); 1316 digit = c >= '0' && c <= '9'; 1317 if (digit) { 1318 if (buf2 != null) { buf2.append((char)c); } 1319 fEntityScanner.scanChar(NameType.REFERENCE); 1320 fStringBuffer3.append((char)c); 1321 } 1322 } while (digit); 1323 } else { 1324 reportFatalError("DigitRequiredInCharRef", null); 1325 } 1326 } 1327 1328 // end 1329 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1330 reportFatalError("SemicolonRequiredInCharRef", null); 1331 } 1332 if (buf2 != null) { buf2.append(';'); } 1333 1334 // convert string to number 1335 int value = -1; 1336 try { 1337 value = Integer.parseInt(fStringBuffer3.toString(), 1338 hex ? 16 : 10); 1339 1340 // character reference must be a valid XML character 1341 if (isInvalid(value)) { 1342 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1343 if (hex) errorBuf.append('x'); 1344 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1345 reportFatalError("InvalidCharRef", 1346 new Object[]{errorBuf.toString()}); 1347 } 1348 } catch (NumberFormatException e) { 1349 // Conversion failed, let -1 value drop through. 1350 // If we end up here, the character reference was invalid. 1351 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1352 if (hex) errorBuf.append('x'); 1353 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1354 reportFatalError("InvalidCharRef", 1355 new Object[]{errorBuf.toString()}); 1356 } 1357 1358 // append corresponding chars to the given buffer 1359 if (!XMLChar.isSupplemental(value)) { 1360 buf.append((char) value); 1361 } else { 1362 // character is supplemental, split it into surrogate chars 1363 buf.append(XMLChar.highSurrogate(value)); 1364 buf.append(XMLChar.lowSurrogate(value)); 1365 } 1366 1367 // char refs notification code 1368 if (fNotifyCharRefs && value != -1) { 1369 String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString(); 1370 if (!fScanningAttribute) { 1371 fCharRefLiteral = literal; 1372 } 1373 } 1374 1375 if (fEntityScanner.fCurrentEntity.isGE) { 1376 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, buf.length - initLen); 1377 } 1378 return value; 1379 } 1380 // returns true if the given character is not 1381 // valid with respect to the version of 1382 // XML understood by this scanner. 1383 protected boolean isInvalid(int value) { 1384 return (XMLChar.isInvalid(value)); 1385 } // isInvalid(int): boolean 1386 1387 // returns true if the given character is not 1388 // valid or may not be used outside a character reference 1389 // with respect to the version of XML understood by this scanner. 1390 protected boolean isInvalidLiteral(int value) { 1391 return (XMLChar.isInvalid(value)); 1392 } // isInvalidLiteral(int): boolean 1393 1394 // returns true if the given character is 1395 // a valid nameChar with respect to the version of 1396 // XML understood by this scanner. 1397 protected boolean isValidNameChar(int value) { 1398 return (XMLChar.isName(value)); 1399 } // isValidNameChar(int): boolean 1400 1401 // returns true if the given character is 1402 // a valid NCName character with respect to the version of 1403 // XML understood by this scanner. 1404 protected boolean isValidNCName(int value) { 1405 return (XMLChar.isNCName(value)); 1406 } // isValidNCName(int): boolean 1407 1408 // returns true if the given character is 1409 // a valid nameStartChar with respect to the version of 1410 // XML understood by this scanner. 1411 protected boolean isValidNameStartChar(int value) { 1412 return (XMLChar.isNameStart(value)); 1413 } // isValidNameStartChar(int): boolean 1414 1415 // returns true if the given character is 1416 // a valid high surrogate for a nameStartChar 1417 // with respect to the version of XML understood 1418 // by this scanner. 1419 protected boolean isValidNameStartHighSurrogate(int value) { 1420 return false; 1421 } // isValidNameStartHighSurrogate(int): boolean 1422 1423 protected boolean versionSupported(String version ) { 1424 return version.equals("1.0") || version.equals("1.1"); 1425 } // version Supported 1426 1427 /** 1428 * Scans surrogates and append them to the specified buffer. 1429 * <p> 1430 * <strong>Note:</strong> This assumes the current char has already been 1431 * identified as a high surrogate. 1432 * 1433 * @param buf The StringBuffer to append the read surrogates to. 1434 * @return True if it succeeded. 1435 */ 1436 protected boolean scanSurrogates(XMLStringBuffer buf) 1437 throws IOException, XNIException { 1438 1439 int high = fEntityScanner.scanChar(null); 1440 int low = fEntityScanner.peekChar(); 1441 if (!XMLChar.isLowSurrogate(low)) { 1442 reportFatalError("InvalidCharInContent", 1443 new Object[] {Integer.toString(high, 16)}); 1444 return false; 1445 } 1446 fEntityScanner.scanChar(null); 1447 1448 // convert surrogates to supplemental character 1449 int c = XMLChar.supplemental((char)high, (char)low); 1450 1451 // supplemental character must be a valid XML character 1452 if (isInvalid(c)) { 1453 reportFatalError("InvalidCharInContent", 1454 new Object[]{Integer.toString(c, 16)}); 1455 return false; 1456 } 1457 1458 // fill in the buffer 1459 buf.append((char)high); 1460 buf.append((char)low); 1461 1462 return true; 1463 1464 } // scanSurrogates():boolean 1465 1466 1467 /** 1468 * Convenience function used in all XML scanners. 1469 */ 1470 protected void reportFatalError(String msgId, Object[] args) 1471 throws XNIException { 1472 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN, 1473 msgId, args, 1474 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1475 } 1476 1477 // private methods 1478 private void init() { 1479 // initialize scanner 1480 fEntityScanner = null; 1481 // initialize vars 1482 fEntityDepth = 0; 1483 fReportEntity = true; 1484 fResourceIdentifier.clear(); 1485 1486 if(!fAttributeCacheInitDone){ 1487 for(int i = 0; i < initialCacheCount; i++){ 1488 attributeValueCache.add(new XMLString()); 1489 stringBufferCache.add(new XMLStringBuffer()); 1490 } 1491 fAttributeCacheInitDone = true; 1492 } 1493 fStringBufferIndex = 0; 1494 fAttributeCacheUsedCount = 0; 1495 1496 } 1497 1498 XMLStringBuffer getStringBuffer(){ 1499 if((fStringBufferIndex < initialCacheCount )|| (fStringBufferIndex < stringBufferCache.size())){ 1500 return stringBufferCache.get(fStringBufferIndex++); 1501 }else{ 1502 XMLStringBuffer tmpObj = new XMLStringBuffer(); 1503 fStringBufferIndex++; 1504 stringBufferCache.add(tmpObj); 1505 return tmpObj; 1506 } 1507 } 1508 1509 /** 1510 * Add the count of the content buffer and check if the accumulated 1511 * value exceeds the limit 1512 * @param isPEDecl a flag to indicate whether the entity is parameter 1513 * @param entityName entity name 1514 * @param buffer content buffer 1515 */ 1516 void checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer) { 1517 checkEntityLimit(isPEDecl, entityName, buffer.length); 1518 } 1519 1520 /** 1521 * Add the count and check limit 1522 * @param isPEDecl a flag to indicate whether the entity is parameter 1523 * @param entityName entity name 1524 * @param len length of the buffer 1525 */ 1526 void checkEntityLimit(boolean isPEDecl, String entityName, int len) { 1527 if (fLimitAnalyzer == null) { 1528 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 1529 } 1530 if (isPEDecl) { 1531 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, "%" + entityName, len); 1532 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1533 fSecurityManager.debugPrint(fLimitAnalyzer); 1534 reportFatalError("MaxEntitySizeLimit", new Object[]{"%" + entityName, 1535 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1536 fSecurityManager.getLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1537 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT)}); 1538 } 1539 } else { 1540 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, entityName, len); 1541 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1542 fSecurityManager.debugPrint(fLimitAnalyzer); 1543 reportFatalError("MaxEntitySizeLimit", new Object[]{entityName, 1544 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1545 fSecurityManager.getLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1546 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT)}); 1547 } 1548 } 1549 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1550 fSecurityManager.debugPrint(fLimitAnalyzer); 1551 reportFatalError("TotalEntitySizeLimit", 1552 new Object[]{fLimitAnalyzer.getTotalValue(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1553 fSecurityManager.getLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1554 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 1555 } 1556 } 1557 } // class XMLScanner