1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.util.Status; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import java.io.IOException; 27 import java.util.ArrayList; 28 import javax.xml.stream.events.XMLEvent; 29 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 30 import com.sun.org.apache.xerces.internal.util.SymbolTable; 31 import com.sun.org.apache.xerces.internal.util.XMLChar; 32 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 33 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 34 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 36 import com.sun.org.apache.xerces.internal.xni.Augmentations; 37 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.xml.internal.stream.Entity; 45 46 //import com.sun.xml.stream.XMLEntityManager; 47 //import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter; 48 49 /** 50 * This class is responsible for holding scanning methods common to 51 * scanning the XML document structure and content as well as the DTD 52 * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit 53 * from this base class. 54 * 55 * <p> 56 * This component requires the following features and properties from the 57 * component manager that uses it: 58 * <ul> 59 * <li>http://xml.org/sax/features/validation</li> 60 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li> 61 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 62 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 63 * <li>http://apache.org/xml/properties/internal/entity-manager</li> 64 * </ul> 65 * 66 * @author Andy Clark, IBM 67 * @author Arnaud Le Hors, IBM 68 * @author Eric Ye, IBM 69 * @author K.Venugopal SUN Microsystems 70 * @author Sunitha Reddy, SUN Microsystems 71 * @version $Id: XMLScanner.java,v 1.12 2010-11-01 04:39:41 joehw Exp $ 72 * @LastModified: Feb 2020 73 */ 74 public abstract class XMLScanner 75 implements XMLComponent { 76 77 // 78 // Constants 79 // 80 81 // feature identifiers 82 83 /** Feature identifier: namespaces. */ 84 protected static final String NAMESPACES = 85 Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE; 86 87 /** Feature identifier: validation. */ 88 protected static final String VALIDATION = 89 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 90 91 /** Feature identifier: notify character references. */ 92 protected static final String NOTIFY_CHAR_REFS = 93 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE; 94 95 // property identifiers 96 97 protected static final String PARSER_SETTINGS = 98 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 99 /** Property identifier: symbol table. */ 100 protected static final String SYMBOL_TABLE = 101 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 102 103 /** Property identifier: error reporter. */ 104 protected static final String ERROR_REPORTER = 105 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 106 107 /** Property identifier: entity manager. */ 108 protected static final String ENTITY_MANAGER = 109 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY; 110 111 /** Property identifier: Security manager. */ 112 private static final String SECURITY_MANAGER = Constants.SECURITY_MANAGER; 113 114 // debugging 115 116 /** Debug attribute normalization. */ 117 protected static final boolean DEBUG_ATTR_NORMALIZATION = false; 118 119 /** 120 * Type of names 121 */ 122 public static enum NameType { 123 ATTRIBUTE("attribute"), 124 ATTRIBUTENAME("attribute name"), 125 COMMENT("comment"), 126 DOCTYPE("doctype"), 127 ELEMENTSTART("startelement"), 128 ELEMENTEND("endelement"), 129 ENTITY("entity"), 130 NOTATION("notation"), 131 PI("pi"), 132 REFERENCE("reference"); 133 134 final String literal; 135 NameType(String literal) { 136 this.literal = literal; 137 } 138 139 String literal() { 140 return literal; 141 } 142 } 143 144 //xxx: setting the default value as false, as we dont need to calculate this value 145 //we should have a feature when set to true computes this value 146 private boolean fNeedNonNormalizedValue = false; 147 148 protected ArrayList<XMLString> attributeValueCache = new ArrayList<>(); 149 protected ArrayList<XMLStringBuffer> stringBufferCache = new ArrayList<>(); 150 protected int fStringBufferIndex = 0; 151 protected boolean fAttributeCacheInitDone = false; 152 protected int fAttributeCacheUsedCount = 0; 153 154 // 155 // Data 156 // 157 158 // features 159 160 /** 161 * Validation. This feature identifier is: 162 * http://xml.org/sax/features/validation 163 */ 164 protected boolean fValidation = false; 165 166 /** Namespaces. */ 167 protected boolean fNamespaces; 168 169 /** Character references notification. */ 170 protected boolean fNotifyCharRefs = false; 171 172 /** Internal parser-settings feature */ 173 protected boolean fParserSettings = true; 174 175 // properties 176 177 protected PropertyManager fPropertyManager = null ; 178 /** Symbol table. */ 179 protected SymbolTable fSymbolTable; 180 181 /** Error reporter. */ 182 protected XMLErrorReporter fErrorReporter; 183 184 /** Entity manager. */ 185 //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager(); 186 protected XMLEntityManager fEntityManager = null ; 187 188 /** xxx this should be available from EntityManager Entity storage */ 189 protected XMLEntityStorage fEntityStore = null ; 190 191 /** Security manager. */ 192 protected XMLSecurityManager fSecurityManager = null; 193 194 /** Limit analyzer. */ 195 protected XMLLimitAnalyzer fLimitAnalyzer = null; 196 197 // protected data 198 199 /** event type */ 200 protected XMLEvent fEvent ; 201 202 /** Entity scanner, this always works on last entity that was opened. */ 203 protected XMLEntityScanner fEntityScanner = null; 204 205 /** Entity depth. */ 206 protected int fEntityDepth; 207 208 /** Literal value of the last character reference scanned. */ 209 protected String fCharRefLiteral = null; 210 211 /** Scanning attribute. */ 212 protected boolean fScanningAttribute; 213 214 /** Report entity boundary. */ 215 protected boolean fReportEntity; 216 217 // symbols 218 219 /** Symbol: "version". */ 220 protected final static String fVersionSymbol = "version".intern(); 221 222 /** Symbol: "encoding". */ 223 protected final static String fEncodingSymbol = "encoding".intern(); 224 225 /** Symbol: "standalone". */ 226 protected final static String fStandaloneSymbol = "standalone".intern(); 227 228 /** Symbol: "amp". */ 229 protected final static String fAmpSymbol = "amp".intern(); 230 231 /** Symbol: "lt". */ 232 protected final static String fLtSymbol = "lt".intern(); 233 234 /** Symbol: "gt". */ 235 protected final static String fGtSymbol = "gt".intern(); 236 237 /** Symbol: "quot". */ 238 protected final static String fQuotSymbol = "quot".intern(); 239 240 /** Symbol: "apos". */ 241 protected final static String fAposSymbol = "apos".intern(); 242 243 // temporary variables 244 245 // NOTE: These objects are private to help prevent accidental modification 246 // of values by a subclass. If there were protected *and* the sub- 247 // modified the values, it would be difficult to track down the real 248 // cause of the bug. By making these private, we avoid this 249 // possibility. 250 251 /** String. */ 252 private XMLString fString = new XMLString(); 253 254 /** String buffer. */ 255 private XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 256 257 /** String buffer. */ 258 private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 259 260 /** String buffer. */ 261 private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer(); 262 263 // temporary location for Resource identification information. 264 protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 265 int initialCacheCount = 6; 266 // 267 // XMLComponent methods 268 // 269 270 /** 271 * 272 * 273 * @param componentManager The component manager. 274 * 275 * @throws SAXException Throws exception if required features and 276 * properties cannot be found. 277 */ 278 public void reset(XMLComponentManager componentManager) 279 throws XMLConfigurationException { 280 281 fParserSettings = componentManager.getFeature(PARSER_SETTINGS, true); 282 283 if (!fParserSettings) { 284 // parser settings have not been changed 285 init(); 286 return; 287 } 288 289 290 // Xerces properties 291 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 292 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 293 fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER); 294 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER); 295 296 //this step is extra because we have separated the storage of entity 297 fEntityStore = fEntityManager.getEntityStore() ; 298 299 // sax features 300 fValidation = componentManager.getFeature(VALIDATION, false); 301 fNamespaces = componentManager.getFeature(NAMESPACES, true); 302 fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS, false); 303 304 init(); 305 } // reset(XMLComponentManager) 306 307 protected void setPropertyManager(PropertyManager propertyManager){ 308 fPropertyManager = propertyManager ; 309 } 310 311 /** 312 * Sets the value of a property during parsing. 313 * 314 * @param propertyId 315 * @param value 316 */ 317 public void setProperty(String propertyId, Object value) 318 throws XMLConfigurationException { 319 320 // Xerces properties 321 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 322 String property = 323 propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 324 if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) { 325 fSymbolTable = (SymbolTable)value; 326 } else if (property.equals(Constants.ERROR_REPORTER_PROPERTY)) { 327 fErrorReporter = (XMLErrorReporter)value; 328 } else if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 329 fEntityManager = (XMLEntityManager)value; 330 } 331 } 332 333 if (propertyId.equals(SECURITY_MANAGER)) { 334 fSecurityManager = (XMLSecurityManager)value; 335 } 336 /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){ 337 fStaxProperties = (HashMap)value; 338 //TODO::discuss with neeraj what are his thoughts on passing properties. 339 //For now use this 340 }*/ 341 342 } // setProperty(String,Object) 343 344 /* 345 * Sets the feature of the scanner. 346 */ 347 public void setFeature(String featureId, boolean value) 348 throws XMLConfigurationException { 349 350 if (VALIDATION.equals(featureId)) { 351 fValidation = value; 352 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 353 fNotifyCharRefs = value; 354 } 355 } 356 357 /* 358 * Gets the state of the feature of the scanner. 359 */ 360 public boolean getFeature(String featureId) 361 throws XMLConfigurationException { 362 363 if (VALIDATION.equals(featureId)) { 364 return fValidation; 365 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 366 return fNotifyCharRefs; 367 } 368 throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId); 369 } 370 371 // 372 // Protected methods 373 // 374 375 // anybody calling this had better have set Symtoltable! 376 protected void reset() { 377 init(); 378 379 // DTD preparsing defaults: 380 fValidation = true; 381 fNotifyCharRefs = false; 382 383 } 384 385 public void reset(PropertyManager propertyManager) { 386 init(); 387 // Xerces properties 388 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 389 390 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 391 392 fEntityManager = (XMLEntityManager)propertyManager.getProperty(ENTITY_MANAGER); 393 fEntityStore = fEntityManager.getEntityStore() ; 394 fEntityScanner = (XMLEntityScanner)fEntityManager.getEntityScanner() ; 395 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 396 397 //fEntityManager.reset(); 398 // DTD preparsing defaults: 399 fValidation = false; 400 fNotifyCharRefs = false; 401 402 } 403 // common scanning methods 404 405 /** 406 * Scans an XML or text declaration. 407 * <p> 408 * <pre> 409 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 410 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 411 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 412 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 413 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 414 * | ('"' ('yes' | 'no') '"')) 415 * 416 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 417 * </pre> 418 * 419 * @param scanningTextDecl True if a text declaration is to 420 * be scanned instead of an XML 421 * declaration. 422 * @param pseudoAttributeValues An array of size 3 to return the version, 423 * encoding and standalone pseudo attribute values 424 * (in that order). 425 * 426 * <strong>Note:</strong> This method uses fString, anything in it 427 * at the time of calling is lost. 428 */ 429 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl, 430 String[] pseudoAttributeValues) 431 throws IOException, XNIException { 432 433 // pseudo-attribute values 434 String version = null; 435 String encoding = null; 436 String standalone = null; 437 438 // scan pseudo-attributes 439 final int STATE_VERSION = 0; 440 final int STATE_ENCODING = 1; 441 final int STATE_STANDALONE = 2; 442 final int STATE_DONE = 3; 443 int state = STATE_VERSION; 444 445 boolean dataFoundForTarget = false; 446 boolean sawSpace = fEntityScanner.skipSpaces(); 447 // since pseudoattributes are *not* attributes, 448 // their quotes don't need to be preserved in external parameter entities. 449 // the XMLEntityScanner#scanLiteral method will continue to 450 // emit -1 in such cases when it finds a quote; this is 451 // fine for other methods that parse scanned entities, 452 // but not for the scanning of pseudoattributes. So, 453 // temporarily, we must mark the current entity as not being "literal" 454 Entity.ScannedEntity currEnt = fEntityManager.getCurrentEntity(); 455 boolean currLiteral = currEnt.literal; 456 currEnt.literal = false; 457 while (fEntityScanner.peekChar() != '?') { 458 dataFoundForTarget = true; 459 String name = scanPseudoAttribute(scanningTextDecl, fString); 460 switch (state) { 461 case STATE_VERSION: { 462 if (name.equals(fVersionSymbol)) { 463 if (!sawSpace) { 464 reportFatalError(scanningTextDecl 465 ? "SpaceRequiredBeforeVersionInTextDecl" 466 : "SpaceRequiredBeforeVersionInXMLDecl", 467 null); 468 } 469 version = fString.toString(); 470 state = STATE_ENCODING; 471 if (!versionSupported(version)) { 472 reportFatalError("VersionNotSupported", 473 new Object[]{version}); 474 } 475 476 if (version.equals("1.1")) { 477 Entity.ScannedEntity top = fEntityManager.getTopLevelEntity(); 478 if (top != null && (top.version == null || top.version.equals("1.0"))) { 479 reportFatalError("VersionMismatch", null); 480 } 481 fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1); 482 } 483 484 } else if (name.equals(fEncodingSymbol)) { 485 if (!scanningTextDecl) { 486 reportFatalError("VersionInfoRequired", null); 487 } 488 if (!sawSpace) { 489 reportFatalError(scanningTextDecl 490 ? "SpaceRequiredBeforeEncodingInTextDecl" 491 : "SpaceRequiredBeforeEncodingInXMLDecl", 492 null); 493 } 494 encoding = fString.toString(); 495 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 496 } else { 497 if (scanningTextDecl) { 498 reportFatalError("EncodingDeclRequired", null); 499 } else { 500 reportFatalError("VersionInfoRequired", null); 501 } 502 } 503 break; 504 } 505 case STATE_ENCODING: { 506 if (name.equals(fEncodingSymbol)) { 507 if (!sawSpace) { 508 reportFatalError(scanningTextDecl 509 ? "SpaceRequiredBeforeEncodingInTextDecl" 510 : "SpaceRequiredBeforeEncodingInXMLDecl", 511 null); 512 } 513 encoding = fString.toString(); 514 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 515 // TODO: check encoding name; set encoding on 516 // entity scanner 517 } else if (!scanningTextDecl && name.equals(fStandaloneSymbol)) { 518 if (!sawSpace) { 519 reportFatalError("SpaceRequiredBeforeStandalone", 520 null); 521 } 522 standalone = fString.toString(); 523 state = STATE_DONE; 524 if (!standalone.equals("yes") && !standalone.equals("no")) { 525 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 526 } 527 } else { 528 reportFatalError("EncodingDeclRequired", null); 529 } 530 break; 531 } 532 case STATE_STANDALONE: { 533 if (name.equals(fStandaloneSymbol)) { 534 if (!sawSpace) { 535 reportFatalError("SpaceRequiredBeforeStandalone", 536 null); 537 } 538 standalone = fString.toString(); 539 state = STATE_DONE; 540 if (!standalone.equals("yes") && !standalone.equals("no")) { 541 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 542 } 543 } else { 544 reportFatalError("SDDeclNameInvalid", null); 545 } 546 break; 547 } 548 default: { 549 reportFatalError("NoMorePseudoAttributes", null); 550 } 551 } 552 sawSpace = fEntityScanner.skipSpaces(); 553 } 554 // restore original literal value 555 if(currLiteral) { 556 currEnt.literal = true; 557 } 558 // REVISIT: should we remove this error reporting? 559 if (scanningTextDecl && state != STATE_DONE) { 560 reportFatalError("MorePseudoAttributes", null); 561 } 562 563 // If there is no data in the xml or text decl then we fail to report error 564 // for version or encoding info above. 565 if (scanningTextDecl) { 566 if (!dataFoundForTarget && encoding == null) { 567 reportFatalError("EncodingDeclRequired", null); 568 } 569 } else { 570 if (!dataFoundForTarget && version == null) { 571 reportFatalError("VersionInfoRequired", null); 572 } 573 } 574 575 // end 576 if (!fEntityScanner.skipChar('?', null)) { 577 reportFatalError("XMLDeclUnterminated", null); 578 } 579 if (!fEntityScanner.skipChar('>', null)) { 580 reportFatalError("XMLDeclUnterminated", null); 581 582 } 583 584 // fill in return array 585 pseudoAttributeValues[0] = version; 586 pseudoAttributeValues[1] = encoding; 587 pseudoAttributeValues[2] = standalone; 588 589 } // scanXMLDeclOrTextDecl(boolean) 590 591 /** 592 * Scans a pseudo attribute. 593 * 594 * @param scanningTextDecl True if scanning this pseudo-attribute for a 595 * TextDecl; false if scanning XMLDecl. This 596 * flag is needed to report the correct type of 597 * error. 598 * @param value The string to fill in with the attribute 599 * value. 600 * 601 * @return The name of the attribute 602 * 603 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 604 * at the time of calling is lost. 605 */ 606 protected String scanPseudoAttribute(boolean scanningTextDecl, 607 XMLString value) 608 throws IOException, XNIException { 609 610 String name = scanPseudoAttributeName(); 611 // XMLEntityManager.print(fEntityManager.getCurrentEntity()); 612 613 if (name == null) { 614 reportFatalError("PseudoAttrNameExpected", null); 615 } 616 fEntityScanner.skipSpaces(); 617 if (!fEntityScanner.skipChar('=', null)) { 618 reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl" 619 : "EqRequiredInXMLDecl", new Object[]{name}); 620 } 621 fEntityScanner.skipSpaces(); 622 int quote = fEntityScanner.peekChar(); 623 if (quote != '\'' && quote != '"') { 624 reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl" 625 : "QuoteRequiredInXMLDecl" , new Object[]{name}); 626 } 627 fEntityScanner.scanChar(NameType.ATTRIBUTE); 628 int c = fEntityScanner.scanLiteral(quote, value, false); 629 if (c != quote) { 630 fStringBuffer2.clear(); 631 do { 632 fStringBuffer2.append(value); 633 if (c != -1) { 634 if (c == '&' || c == '%' || c == '<' || c == ']') { 635 fStringBuffer2.append((char)fEntityScanner.scanChar(NameType.ATTRIBUTE)); 636 } else if (XMLChar.isHighSurrogate(c)) { 637 scanSurrogates(fStringBuffer2); 638 } else if (isInvalidLiteral(c)) { 639 String key = scanningTextDecl 640 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl"; 641 reportFatalError(key, 642 new Object[] {Integer.toString(c, 16)}); 643 fEntityScanner.scanChar(null); 644 } 645 } 646 c = fEntityScanner.scanLiteral(quote, value, false); 647 } while (c != quote); 648 fStringBuffer2.append(value); 649 value.setValues(fStringBuffer2); 650 } 651 if (!fEntityScanner.skipChar(quote, null)) { 652 reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl" 653 : "CloseQuoteMissingInXMLDecl", 654 new Object[]{name}); 655 } 656 657 // return 658 return name; 659 660 } // scanPseudoAttribute(XMLString):String 661 662 /** 663 * Scans the name of a pseudo attribute. The only legal names 664 * in XML 1.0/1.1 documents are 'version', 'encoding' and 'standalone'. 665 * 666 * @return the name of the pseudo attribute or <code>null</code> 667 * if a legal pseudo attribute name could not be scanned. 668 */ 669 private String scanPseudoAttributeName() throws IOException, XNIException { 670 final int ch = fEntityScanner.peekChar(); 671 switch (ch) { 672 case 'v': 673 if (fEntityScanner.skipString(fVersionSymbol)) { 674 return fVersionSymbol; 675 } 676 break; 677 case 'e': 678 if (fEntityScanner.skipString(fEncodingSymbol)) { 679 return fEncodingSymbol; 680 } 681 break; 682 case 's': 683 if (fEntityScanner.skipString(fStandaloneSymbol)) { 684 return fStandaloneSymbol; 685 } 686 break; 687 } 688 return null; 689 } // scanPseudoAttributeName() 690 691 /** 692 * Scans a processing instruction. 693 * <p> 694 * <pre> 695 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 696 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 697 * </pre> 698 */ 699 //CHANGED: 700 //EARLIER: scanPI() 701 //NOW: scanPI(XMLStringBuffer) 702 //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same 703 // as that for scanContent() 704 705 protected void scanPI(XMLStringBuffer data) throws IOException, XNIException { 706 707 // target 708 fReportEntity = false; 709 String target = fEntityScanner.scanName(NameType.PI); 710 if (target == null) { 711 reportFatalError("PITargetRequired", null); 712 } 713 714 // scan data 715 scanPIData(target, data); 716 fReportEntity = true; 717 718 } // scanPI(XMLStringBuffer) 719 720 /** 721 * Scans a processing data. This is needed to handle the situation 722 * where a document starts with a processing instruction whose 723 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 724 * 725 * This method would always read the whole data. We have while loop and data is buffered 726 * until delimeter is encountered. 727 * 728 * @param target The PI target 729 * @param data The string to fill in with the data 730 */ 731 732 //CHANGED: 733 //Earlier:This method uses the fStringBuffer and later buffer values are set to 734 //the supplied XMLString.... 735 //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would 736 //be appended to that buffer 737 738 protected void scanPIData(String target, XMLStringBuffer data) 739 throws IOException, XNIException { 740 741 // check target 742 if (target.length() == 3) { 743 char c0 = Character.toLowerCase(target.charAt(0)); 744 char c1 = Character.toLowerCase(target.charAt(1)); 745 char c2 = Character.toLowerCase(target.charAt(2)); 746 if (c0 == 'x' && c1 == 'm' && c2 == 'l') { 747 reportFatalError("ReservedPITarget", null); 748 } 749 } 750 751 // spaces 752 if (!fEntityScanner.skipSpaces()) { 753 if (fEntityScanner.skipString("?>")) { 754 // we found the end, there is no data just return 755 return; 756 } else { 757 // if there is data there should be some space 758 reportFatalError("SpaceRequiredInPI", null); 759 } 760 } 761 762 // since scanData appends the parsed data to the buffer passed 763 // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer) 764 //until all of the data is buffered. 765 if (fEntityScanner.scanData("?>", data)) { 766 do { 767 int c = fEntityScanner.peekChar(); 768 if (c != -1) { 769 if (XMLChar.isHighSurrogate(c)) { 770 scanSurrogates(data); 771 } else if (isInvalidLiteral(c)) { 772 reportFatalError("InvalidCharInPI", 773 new Object[]{Integer.toHexString(c)}); 774 fEntityScanner.scanChar(null); 775 } 776 } 777 } while (fEntityScanner.scanData("?>", data)); 778 } 779 780 } // scanPIData(String,XMLString) 781 782 /** 783 * Scans a comment. 784 * <p> 785 * <pre> 786 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 787 * </pre> 788 * <p> 789 * <strong>Note:</strong> Called after scanning past '<!--' 790 * <strong>Note:</strong> This method uses fString, anything in it 791 * at the time of calling is lost. 792 * 793 * @param text The buffer to fill in with the text. 794 */ 795 protected void scanComment(XMLStringBuffer text) 796 throws IOException, XNIException { 797 798 //System.out.println( "XMLScanner#scanComment# In Scan Comment" ); 799 // text 800 // REVISIT: handle invalid character, eof 801 text.clear(); 802 while (fEntityScanner.scanData("--", text)) { 803 int c = fEntityScanner.peekChar(); 804 805 //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() ); 806 //System.out.println( "XMLScanner#scanComment#c == " + c ); 807 808 if (c != -1) { 809 if (XMLChar.isHighSurrogate(c)) { 810 scanSurrogates(text); 811 } 812 else if (isInvalidLiteral(c)) { 813 reportFatalError("InvalidCharInComment", 814 new Object[] { Integer.toHexString(c) }); 815 fEntityScanner.scanChar(NameType.COMMENT); 816 } 817 } 818 } 819 if (!fEntityScanner.skipChar('>', NameType.COMMENT)) { 820 reportFatalError("DashDashInComment", null); 821 } 822 823 } // scanComment() 824 825 /** 826 * Scans an attribute value and normalizes whitespace converting all 827 * whitespace characters to space characters. 828 * 829 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 830 * 831 * @param value The XMLString to fill in with the value. 832 * @param nonNormalizedValue The XMLString to fill in with the 833 * non-normalized value. 834 * @param atName The name of the attribute being parsed (for error msgs). 835 * @param attributes The attributes list for the scanned attribute. 836 * @param attrIndex The index of the attribute to use from the list. 837 * @param checkEntities true if undeclared entities should be reported as VC violation, 838 * false if undeclared entities should be reported as WFC violation. 839 * @param eleName The name of element to which this attribute belongs. 840 * @param isNSURI a flag indicating whether the content is a Namespace URI 841 * 842 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 843 * at the time of calling is lost. 844 **/ 845 protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue, 846 String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, 847 String eleName, boolean isNSURI) 848 throws IOException, XNIException { 849 XMLStringBuffer stringBuffer = null; 850 // quote 851 int quote = fEntityScanner.peekChar(); 852 if (quote != '\'' && quote != '"') { 853 reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName}); 854 } 855 856 fEntityScanner.scanChar(NameType.ATTRIBUTE); 857 int entityDepth = fEntityDepth; 858 859 int c = fEntityScanner.scanLiteral(quote, value, isNSURI); 860 if (DEBUG_ATTR_NORMALIZATION) { 861 System.out.println("** scanLiteral -> \"" 862 + value.toString() + "\""); 863 } 864 if(fNeedNonNormalizedValue){ 865 fStringBuffer2.clear(); 866 fStringBuffer2.append(value); 867 } 868 if(fEntityScanner.whiteSpaceLen > 0) 869 normalizeWhitespace(value); 870 if (DEBUG_ATTR_NORMALIZATION) { 871 System.out.println("** normalizeWhitespace -> \"" 872 + value.toString() + "\""); 873 } 874 if (c != quote) { 875 fScanningAttribute = true; 876 stringBuffer = getStringBuffer(); 877 stringBuffer.clear(); 878 do { 879 stringBuffer.append(value); 880 if (DEBUG_ATTR_NORMALIZATION) { 881 System.out.println("** value2: \"" 882 + stringBuffer.toString() + "\""); 883 } 884 if (c == '&') { 885 fEntityScanner.skipChar('&', NameType.REFERENCE); 886 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 887 fStringBuffer2.append('&'); 888 } 889 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 890 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 891 fStringBuffer2.append('#'); 892 } 893 int ch ; 894 if (fNeedNonNormalizedValue) 895 ch = scanCharReferenceValue(stringBuffer, fStringBuffer2); 896 else 897 ch = scanCharReferenceValue(stringBuffer, null); 898 899 if (ch != -1) { 900 if (DEBUG_ATTR_NORMALIZATION) { 901 System.out.println("** value3: \"" 902 + stringBuffer.toString() 903 + "\""); 904 } 905 } 906 } else { 907 String entityName = fEntityScanner.scanName(NameType.ENTITY); 908 if (entityName == null) { 909 reportFatalError("NameRequiredInReference", null); 910 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 911 fStringBuffer2.append(entityName); 912 } 913 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 914 reportFatalError("SemicolonRequiredInReference", 915 new Object []{entityName}); 916 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 917 fStringBuffer2.append(';'); 918 } 919 if (resolveCharacter(entityName, stringBuffer)) { 920 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 921 } else { 922 if (fEntityStore.isExternalEntity(entityName)) { 923 reportFatalError("ReferenceToExternalEntity", 924 new Object[] { entityName }); 925 } else { 926 if (!fEntityStore.isDeclaredEntity(entityName)) { 927 //WFC & VC: Entity Declared 928 if (checkEntities) { 929 if (fValidation) { 930 fErrorReporter.reportError(fEntityScanner,XMLMessageFormatter.XML_DOMAIN, 931 "EntityNotDeclared", 932 new Object[]{entityName}, 933 XMLErrorReporter.SEVERITY_ERROR); 934 } 935 } else { 936 reportFatalError("EntityNotDeclared", 937 new Object[]{entityName}); 938 } 939 } 940 fEntityManager.startEntity(true, entityName, true); 941 } 942 } 943 } 944 } else if (c == '<') { 945 reportFatalError("LessthanInAttValue", 946 new Object[] { eleName, atName }); 947 fEntityScanner.scanChar(null); 948 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 949 fStringBuffer2.append((char)c); 950 } 951 } else if (c == '%' || c == ']') { 952 fEntityScanner.scanChar(null); 953 stringBuffer.append((char)c); 954 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 955 fStringBuffer2.append((char)c); 956 } 957 if (DEBUG_ATTR_NORMALIZATION) { 958 System.out.println("** valueF: \"" 959 + stringBuffer.toString() + "\""); 960 } 961 } else if (c == '\n' || c == '\r') { 962 fEntityScanner.scanChar(null); 963 stringBuffer.append(' '); 964 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 965 fStringBuffer2.append('\n'); 966 } 967 } else if (c != -1 && XMLChar.isHighSurrogate(c)) { 968 fStringBuffer3.clear(); 969 if (scanSurrogates(fStringBuffer3)) { 970 stringBuffer.append(fStringBuffer3); 971 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 972 fStringBuffer2.append(fStringBuffer3); 973 } 974 if (DEBUG_ATTR_NORMALIZATION) { 975 System.out.println("** valueI: \"" 976 + stringBuffer.toString() 977 + "\""); 978 } 979 } 980 } else if (c != -1 && isInvalidLiteral(c)) { 981 reportFatalError("InvalidCharInAttValue", 982 new Object[] {eleName, atName, Integer.toString(c, 16)}); 983 fEntityScanner.scanChar(null); 984 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 985 fStringBuffer2.append((char)c); 986 } 987 } 988 c = fEntityScanner.scanLiteral(quote, value, isNSURI); 989 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 990 fStringBuffer2.append(value); 991 } 992 if(fEntityScanner.whiteSpaceLen > 0) 993 normalizeWhitespace(value); 994 //Todo ::Move this check to Attributes , do conversion 995 //only if attribute is being accessed. -Venu 996 } while (c != quote || entityDepth != fEntityDepth); 997 stringBuffer.append(value); 998 if (DEBUG_ATTR_NORMALIZATION) { 999 System.out.println("** valueN: \"" 1000 + stringBuffer.toString() + "\""); 1001 } 1002 value.setValues(stringBuffer); 1003 fScanningAttribute = false; 1004 } 1005 if(fNeedNonNormalizedValue) 1006 nonNormalizedValue.setValues(fStringBuffer2); 1007 1008 // quote 1009 int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE); 1010 if (cquote != quote) { 1011 reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName}); 1012 } 1013 } // scanAttributeValue() 1014 1015 1016 /** 1017 * Resolves character entity references. 1018 * @param entityName the name of the entity 1019 * @param stringBuffer the current XMLStringBuffer to append the character to. 1020 * @return true if resolved, false otherwise 1021 */ 1022 protected boolean resolveCharacter(String entityName, XMLStringBuffer stringBuffer) { 1023 /** 1024 * entityNames (symbols) are interned. The equals method would do the same, 1025 * but I'm leaving it as comparisons by references are common in the impl 1026 * and it made it explicit to others who read this code. 1027 */ 1028 if (entityName == fAmpSymbol) { 1029 stringBuffer.append('&'); 1030 return true; 1031 } else if (entityName == fAposSymbol) { 1032 stringBuffer.append('\''); 1033 return true; 1034 } else if (entityName == fLtSymbol) { 1035 stringBuffer.append('<'); 1036 return true; 1037 } else if (entityName == fGtSymbol) { 1038 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1039 stringBuffer.append('>'); 1040 return true; 1041 } else if (entityName == fQuotSymbol) { 1042 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1043 stringBuffer.append('"'); 1044 return true; 1045 } 1046 return false; 1047 } 1048 1049 /** 1050 * Scans External ID and return the public and system IDs. 1051 * 1052 * @param identifiers An array of size 2 to return the system id, 1053 * and public id (in that order). 1054 * @param optionalSystemId Specifies whether the system id is optional. 1055 * 1056 * <strong>Note:</strong> This method uses fString and fStringBuffer, 1057 * anything in them at the time of calling is lost. 1058 */ 1059 protected void scanExternalID(String[] identifiers, 1060 boolean optionalSystemId) 1061 throws IOException, XNIException { 1062 1063 String systemId = null; 1064 String publicId = null; 1065 if (fEntityScanner.skipString("PUBLIC")) { 1066 if (!fEntityScanner.skipSpaces()) { 1067 reportFatalError("SpaceRequiredAfterPUBLIC", null); 1068 } 1069 scanPubidLiteral(fString); 1070 publicId = fString.toString(); 1071 1072 if (!fEntityScanner.skipSpaces() && !optionalSystemId) { 1073 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null); 1074 } 1075 } 1076 1077 if (publicId != null || fEntityScanner.skipString("SYSTEM")) { 1078 if (publicId == null && !fEntityScanner.skipSpaces()) { 1079 reportFatalError("SpaceRequiredAfterSYSTEM", null); 1080 } 1081 int quote = fEntityScanner.peekChar(); 1082 if (quote != '\'' && quote != '"') { 1083 if (publicId != null && optionalSystemId) { 1084 // looks like we don't have any system id 1085 // simply return the public id 1086 identifiers[0] = null; 1087 identifiers[1] = publicId; 1088 return; 1089 } 1090 reportFatalError("QuoteRequiredInSystemID", null); 1091 } 1092 fEntityScanner.scanChar(null); 1093 XMLString ident = fString; 1094 if (fEntityScanner.scanLiteral(quote, ident, false) != quote) { 1095 fStringBuffer.clear(); 1096 do { 1097 fStringBuffer.append(ident); 1098 int c = fEntityScanner.peekChar(); 1099 if (XMLChar.isMarkup(c) || c == ']') { 1100 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 1101 } else if (c != -1 && isInvalidLiteral(c)) { 1102 reportFatalError("InvalidCharInSystemID", 1103 new Object[] {Integer.toString(c, 16)}); 1104 } 1105 } while (fEntityScanner.scanLiteral(quote, ident, false) != quote); 1106 fStringBuffer.append(ident); 1107 ident = fStringBuffer; 1108 } 1109 systemId = ident.toString(); 1110 if (!fEntityScanner.skipChar(quote, null)) { 1111 reportFatalError("SystemIDUnterminated", null); 1112 } 1113 } 1114 1115 // store result in array 1116 identifiers[0] = systemId; 1117 identifiers[1] = publicId; 1118 } 1119 1120 1121 /** 1122 * Scans public ID literal. 1123 * 1124 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 1125 * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 1126 * 1127 * The returned string is normalized according to the following rule, 1128 * from http://www.w3.org/TR/REC-xml#dt-pubid: 1129 * 1130 * Before a match is attempted, all strings of white space in the public 1131 * identifier must be normalized to single space characters (#x20), and 1132 * leading and trailing white space must be removed. 1133 * 1134 * @param literal The string to fill in with the public ID literal. 1135 * @return True on success. 1136 * 1137 * <strong>Note:</strong> This method uses fStringBuffer, anything in it at 1138 * the time of calling is lost. 1139 */ 1140 protected boolean scanPubidLiteral(XMLString literal) 1141 throws IOException, XNIException { 1142 int quote = fEntityScanner.scanChar(null); 1143 if (quote != '\'' && quote != '"') { 1144 reportFatalError("QuoteRequiredInPublicID", null); 1145 return false; 1146 } 1147 1148 fStringBuffer.clear(); 1149 // skip leading whitespace 1150 boolean skipSpace = true; 1151 boolean dataok = true; 1152 while (true) { 1153 int c = fEntityScanner.scanChar(null); 1154 if (c == ' ' || c == '\n' || c == '\r') { 1155 if (!skipSpace) { 1156 // take the first whitespace as a space and skip the others 1157 fStringBuffer.append(' '); 1158 skipSpace = true; 1159 } 1160 } else if (c == quote) { 1161 if (skipSpace) { 1162 // if we finished on a space let's trim it 1163 fStringBuffer.length--; 1164 } 1165 literal.setValues(fStringBuffer); 1166 break; 1167 } else if (XMLChar.isPubid(c)) { 1168 fStringBuffer.append((char)c); 1169 skipSpace = false; 1170 } else if (c == -1) { 1171 reportFatalError("PublicIDUnterminated", null); 1172 return false; 1173 } else { 1174 dataok = false; 1175 reportFatalError("InvalidCharInPublicID", 1176 new Object[]{Integer.toHexString(c)}); 1177 } 1178 } 1179 return dataok; 1180 } 1181 1182 1183 /** 1184 * Normalize whitespace in an XMLString converting all whitespace 1185 * characters to space characters. 1186 */ 1187 protected void normalizeWhitespace(XMLString value) { 1188 int i=0; 1189 int j=0; 1190 int [] buff = fEntityScanner.whiteSpaceLookup; 1191 int buffLen = fEntityScanner.whiteSpaceLen; 1192 int end = value.offset + value.length; 1193 while(i < buffLen){ 1194 j = buff[i]; 1195 if(j < end ){ 1196 value.ch[j] = ' '; 1197 } 1198 i++; 1199 } 1200 } 1201 1202 // 1203 // XMLEntityHandler methods 1204 // 1205 1206 /** 1207 * This method notifies of the start of an entity. The document entity 1208 * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1209 * parameter entity names start with '%'; and general entities are just 1210 * specified by their name. 1211 * 1212 * @param name The name of the entity. 1213 * @param identifier The resource identifier. 1214 * @param encoding The auto-detected IANA encoding name of the entity 1215 * stream. This value will be null in those situations 1216 * where the entity encoding is not auto-detected (e.g. 1217 * internal entities or a document entity that is 1218 * parsed from a java.io.Reader). 1219 * 1220 * @throws XNIException Thrown by handler to signal an error. 1221 */ 1222 public void startEntity(String name, 1223 XMLResourceIdentifier identifier, 1224 String encoding, Augmentations augs) throws XNIException { 1225 1226 // keep track of the entity depth 1227 fEntityDepth++; 1228 // must reset entity scanner 1229 fEntityScanner = fEntityManager.getEntityScanner(); 1230 fEntityStore = fEntityManager.getEntityStore() ; 1231 } // startEntity(String,XMLResourceIdentifier,String) 1232 1233 /** 1234 * This method notifies the end of an entity. The document entity has 1235 * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1236 * parameter entity names start with '%'; and general entities are just 1237 * specified by their name. 1238 * 1239 * @param name The name of the entity. 1240 * 1241 * @throws XNIException Thrown by handler to signal an error. 1242 */ 1243 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 1244 // keep track of the entity depth 1245 if (fEntityDepth > 0) { 1246 fEntityDepth--; 1247 } 1248 } // endEntity(String) 1249 1250 /** 1251 * Scans a character reference and append the corresponding chars to the 1252 * specified buffer. 1253 * 1254 * <p> 1255 * <pre> 1256 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1257 * </pre> 1258 * 1259 * <strong>Note:</strong> This method uses fStringBuffer, anything in it 1260 * at the time of calling is lost. 1261 * 1262 * @param buf the character buffer to append chars to 1263 * @param buf2 the character buffer to append non-normalized chars to 1264 * 1265 * @return the character value or (-1) on conversion failure 1266 */ 1267 protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2) 1268 throws IOException, XNIException { 1269 int initLen = buf.length; 1270 // scan hexadecimal value 1271 boolean hex = false; 1272 if (fEntityScanner.skipChar('x', NameType.REFERENCE)) { 1273 if (buf2 != null) { buf2.append('x'); } 1274 hex = true; 1275 fStringBuffer3.clear(); 1276 boolean digit = true; 1277 1278 int c = fEntityScanner.peekChar(); 1279 digit = (c >= '0' && c <= '9') || 1280 (c >= 'a' && c <= 'f') || 1281 (c >= 'A' && c <= 'F'); 1282 if (digit) { 1283 if (buf2 != null) { buf2.append((char)c); } 1284 fEntityScanner.scanChar(NameType.REFERENCE); 1285 fStringBuffer3.append((char)c); 1286 1287 do { 1288 c = fEntityScanner.peekChar(); 1289 digit = (c >= '0' && c <= '9') || 1290 (c >= 'a' && c <= 'f') || 1291 (c >= 'A' && c <= 'F'); 1292 if (digit) { 1293 if (buf2 != null) { buf2.append((char)c); } 1294 fEntityScanner.scanChar(NameType.REFERENCE); 1295 fStringBuffer3.append((char)c); 1296 } 1297 } while (digit); 1298 } else { 1299 reportFatalError("HexdigitRequiredInCharRef", null); 1300 } 1301 } 1302 1303 // scan decimal value 1304 else { 1305 fStringBuffer3.clear(); 1306 boolean digit = true; 1307 1308 int c = fEntityScanner.peekChar(); 1309 digit = c >= '0' && c <= '9'; 1310 if (digit) { 1311 if (buf2 != null) { buf2.append((char)c); } 1312 fEntityScanner.scanChar(NameType.REFERENCE); 1313 fStringBuffer3.append((char)c); 1314 1315 do { 1316 c = fEntityScanner.peekChar(); 1317 digit = c >= '0' && c <= '9'; 1318 if (digit) { 1319 if (buf2 != null) { buf2.append((char)c); } 1320 fEntityScanner.scanChar(NameType.REFERENCE); 1321 fStringBuffer3.append((char)c); 1322 } 1323 } while (digit); 1324 } else { 1325 reportFatalError("DigitRequiredInCharRef", null); 1326 } 1327 } 1328 1329 // end 1330 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1331 reportFatalError("SemicolonRequiredInCharRef", null); 1332 } 1333 if (buf2 != null) { buf2.append(';'); } 1334 1335 // convert string to number 1336 int value = -1; 1337 try { 1338 value = Integer.parseInt(fStringBuffer3.toString(), 1339 hex ? 16 : 10); 1340 1341 // character reference must be a valid XML character 1342 if (isInvalid(value)) { 1343 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1344 if (hex) errorBuf.append('x'); 1345 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1346 reportFatalError("InvalidCharRef", 1347 new Object[]{errorBuf.toString()}); 1348 } 1349 } catch (NumberFormatException e) { 1350 // Conversion failed, let -1 value drop through. 1351 // If we end up here, the character reference was invalid. 1352 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1353 if (hex) errorBuf.append('x'); 1354 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1355 reportFatalError("InvalidCharRef", 1356 new Object[]{errorBuf.toString()}); 1357 } 1358 1359 // append corresponding chars to the given buffer 1360 if (!XMLChar.isSupplemental(value)) { 1361 buf.append((char) value); 1362 } else { 1363 // character is supplemental, split it into surrogate chars 1364 buf.append(XMLChar.highSurrogate(value)); 1365 buf.append(XMLChar.lowSurrogate(value)); 1366 } 1367 1368 // char refs notification code 1369 if (fNotifyCharRefs && value != -1) { 1370 String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString(); 1371 if (!fScanningAttribute) { 1372 fCharRefLiteral = literal; 1373 } 1374 } 1375 1376 if (fEntityScanner.fCurrentEntity.isGE) { 1377 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, buf.length - initLen); 1378 } 1379 return value; 1380 } 1381 // returns true if the given character is not 1382 // valid with respect to the version of 1383 // XML understood by this scanner. 1384 protected boolean isInvalid(int value) { 1385 return (XMLChar.isInvalid(value)); 1386 } // isInvalid(int): boolean 1387 1388 // returns true if the given character is not 1389 // valid or may not be used outside a character reference 1390 // with respect to the version of XML understood by this scanner. 1391 protected boolean isInvalidLiteral(int value) { 1392 return (XMLChar.isInvalid(value)); 1393 } // isInvalidLiteral(int): boolean 1394 1395 // returns true if the given character is 1396 // a valid nameChar with respect to the version of 1397 // XML understood by this scanner. 1398 protected boolean isValidNameChar(int value) { 1399 return (XMLChar.isName(value)); 1400 } // isValidNameChar(int): boolean 1401 1402 // returns true if the given character is 1403 // a valid NCName character with respect to the version of 1404 // XML understood by this scanner. 1405 protected boolean isValidNCName(int value) { 1406 return (XMLChar.isNCName(value)); 1407 } // isValidNCName(int): boolean 1408 1409 // returns true if the given character is 1410 // a valid nameStartChar with respect to the version of 1411 // XML understood by this scanner. 1412 protected boolean isValidNameStartChar(int value) { 1413 return (XMLChar.isNameStart(value)); 1414 } // isValidNameStartChar(int): boolean 1415 1416 // returns true if the given character is 1417 // a valid high surrogate for a nameStartChar 1418 // with respect to the version of XML understood 1419 // by this scanner. 1420 protected boolean isValidNameStartHighSurrogate(int value) { 1421 return false; 1422 } // isValidNameStartHighSurrogate(int): boolean 1423 1424 protected boolean versionSupported(String version ) { 1425 return version.equals("1.0") || version.equals("1.1"); 1426 } // version Supported 1427 1428 /** 1429 * Scans surrogates and append them to the specified buffer. 1430 * <p> 1431 * <strong>Note:</strong> This assumes the current char has already been 1432 * identified as a high surrogate. 1433 * 1434 * @param buf The StringBuffer to append the read surrogates to. 1435 * @return True if it succeeded. 1436 */ 1437 protected boolean scanSurrogates(XMLStringBuffer buf) 1438 throws IOException, XNIException { 1439 1440 int high = fEntityScanner.scanChar(null); 1441 int low = fEntityScanner.peekChar(); 1442 if (!XMLChar.isLowSurrogate(low)) { 1443 reportFatalError("InvalidCharInContent", 1444 new Object[] {Integer.toString(high, 16)}); 1445 return false; 1446 } 1447 fEntityScanner.scanChar(null); 1448 1449 // convert surrogates to supplemental character 1450 int c = XMLChar.supplemental((char)high, (char)low); 1451 1452 // supplemental character must be a valid XML character 1453 if (isInvalid(c)) { 1454 reportFatalError("InvalidCharInContent", 1455 new Object[]{Integer.toString(c, 16)}); 1456 return false; 1457 } 1458 1459 // fill in the buffer 1460 buf.append((char)high); 1461 buf.append((char)low); 1462 1463 return true; 1464 1465 } // scanSurrogates():boolean 1466 1467 1468 /** 1469 * Convenience function used in all XML scanners. 1470 */ 1471 protected void reportFatalError(String msgId, Object[] args) 1472 throws XNIException { 1473 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN, 1474 msgId, args, 1475 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1476 } 1477 1478 // private methods 1479 private void init() { 1480 // initialize scanner 1481 fEntityScanner = null; 1482 // initialize vars 1483 fEntityDepth = 0; 1484 fReportEntity = true; 1485 fResourceIdentifier.clear(); 1486 1487 if(!fAttributeCacheInitDone){ 1488 for(int i = 0; i < initialCacheCount; i++){ 1489 attributeValueCache.add(new XMLString()); 1490 stringBufferCache.add(new XMLStringBuffer()); 1491 } 1492 fAttributeCacheInitDone = true; 1493 } 1494 fStringBufferIndex = 0; 1495 fAttributeCacheUsedCount = 0; 1496 1497 } 1498 1499 XMLStringBuffer getStringBuffer(){ 1500 if((fStringBufferIndex < initialCacheCount )|| (fStringBufferIndex < stringBufferCache.size())){ 1501 return stringBufferCache.get(fStringBufferIndex++); 1502 }else{ 1503 XMLStringBuffer tmpObj = new XMLStringBuffer(); 1504 fStringBufferIndex++; 1505 stringBufferCache.add(tmpObj); 1506 return tmpObj; 1507 } 1508 } 1509 1510 /** 1511 * Add the count of the content buffer and check if the accumulated 1512 * value exceeds the limit 1513 * @param isPEDecl a flag to indicate whether the entity is parameter 1514 * @param entityName entity name 1515 * @param buffer content buffer 1516 */ 1517 void checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer) { 1518 checkEntityLimit(isPEDecl, entityName, buffer.length); 1519 } 1520 1521 /** 1522 * Add the count and check limit 1523 * @param isPEDecl a flag to indicate whether the entity is parameter 1524 * @param entityName entity name 1525 * @param len length of the buffer 1526 */ 1527 void checkEntityLimit(boolean isPEDecl, String entityName, int len) { 1528 if (fLimitAnalyzer == null) { 1529 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 1530 } 1531 if (isPEDecl) { 1532 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, "%" + entityName, len); 1533 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1534 fSecurityManager.debugPrint(fLimitAnalyzer); 1535 reportFatalError("MaxEntitySizeLimit", new Object[]{"%" + entityName, 1536 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1537 fSecurityManager.getLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1538 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT)}); 1539 } 1540 } else { 1541 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, entityName, len); 1542 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1543 fSecurityManager.debugPrint(fLimitAnalyzer); 1544 reportFatalError("MaxEntitySizeLimit", new Object[]{entityName, 1545 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1546 fSecurityManager.getLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1547 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT)}); 1548 } 1549 } 1550 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1551 fSecurityManager.debugPrint(fLimitAnalyzer); 1552 reportFatalError("TotalEntitySizeLimit", 1553 new Object[]{fLimitAnalyzer.getTotalValue(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1554 fSecurityManager.getLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1555 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 1556 } 1557 } 1558 } // class XMLScanner