1 /* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.util.Status; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import java.io.IOException; 27 import java.util.ArrayList; 28 import javax.xml.stream.events.XMLEvent; 29 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 30 import com.sun.org.apache.xerces.internal.util.SymbolTable; 31 import com.sun.org.apache.xerces.internal.util.XMLChar; 32 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 33 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 34 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 36 import com.sun.org.apache.xerces.internal.xni.Augmentations; 37 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.xml.internal.stream.Entity; 45 46 //import com.sun.xml.stream.XMLEntityManager; 47 //import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter; 48 49 /** 50 * This class is responsible for holding scanning methods common to 51 * scanning the XML document structure and content as well as the DTD 52 * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit 53 * from this base class. 54 * 55 * <p> 56 * This component requires the following features and properties from the 57 * component manager that uses it: 58 * <ul> 59 * <li>http://xml.org/sax/features/validation</li> 60 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li> 61 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 62 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 63 * <li>http://apache.org/xml/properties/internal/entity-manager</li> 64 * </ul> 65 * 66 * @author Andy Clark, IBM 67 * @author Arnaud Le Hors, IBM 68 * @author Eric Ye, IBM 69 * @author K.Venugopal SUN Microsystems 70 * @author Sunitha Reddy, SUN Microsystems 71 */ 72 public abstract class XMLScanner 73 implements XMLComponent { 74 75 // 76 // Constants 77 // 78 79 // feature identifiers 80 81 /** Feature identifier: namespaces. */ 82 protected static final String NAMESPACES = 83 Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE; 84 85 /** Feature identifier: validation. */ 86 protected static final String VALIDATION = 87 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 88 89 /** Feature identifier: notify character references. */ 90 protected static final String NOTIFY_CHAR_REFS = 91 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE; 92 93 // property identifiers 94 95 protected static final String PARSER_SETTINGS = 96 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 97 /** Property identifier: symbol table. */ 98 protected static final String SYMBOL_TABLE = 99 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 100 101 /** Property identifier: error reporter. */ 102 protected static final String ERROR_REPORTER = 103 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 104 105 /** Property identifier: entity manager. */ 106 protected static final String ENTITY_MANAGER = 107 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY; 108 109 /** Property identifier: Security manager. */ 110 private static final String SECURITY_MANAGER = Constants.SECURITY_MANAGER; 111 112 // debugging 113 114 /** Debug attribute normalization. */ 115 protected static final boolean DEBUG_ATTR_NORMALIZATION = false; 116 117 /** 118 * Type of names 119 */ 120 public static enum NameType { 121 ATTRIBUTE("attribute"), 122 ATTRIBUTENAME("attribute name"), 123 COMMENT("comment"), 124 DOCTYPE("doctype"), 125 ELEMENTSTART("startelement"), 126 ELEMENTEND("endelement"), 127 ENTITY("entity"), 128 NOTATION("notation"), 129 PI("pi"), 130 REFERENCE("reference"); 131 132 final String literal; 133 NameType(String literal) { 134 this.literal = literal; 135 } 136 137 String literal() { 138 return literal; 139 } 140 } 141 142 //xxx: setting the default value as false, as we dont need to calculate this value 143 //we should have a feature when set to true computes this value 144 private boolean fNeedNonNormalizedValue = false; 145 146 protected ArrayList<XMLString> attributeValueCache = new ArrayList<>(); 147 protected ArrayList<XMLStringBuffer> stringBufferCache = new ArrayList<>(); 148 protected int fStringBufferIndex = 0; 149 protected boolean fAttributeCacheInitDone = false; 150 protected int fAttributeCacheUsedCount = 0; 151 152 // 153 // Data 154 // 155 156 // features 157 158 /** 159 * Validation. This feature identifier is: 160 * http://xml.org/sax/features/validation 161 */ 162 protected boolean fValidation = false; 163 164 /** Namespaces. */ 165 protected boolean fNamespaces; 166 167 /** Character references notification. */ 168 protected boolean fNotifyCharRefs = false; 169 170 /** Internal parser-settings feature */ 171 protected boolean fParserSettings = true; 172 173 // properties 174 175 protected PropertyManager fPropertyManager = null ; 176 /** Symbol table. */ 177 protected SymbolTable fSymbolTable; 178 179 /** Error reporter. */ 180 protected XMLErrorReporter fErrorReporter; 181 182 /** Entity manager. */ 183 //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager(); 184 protected XMLEntityManager fEntityManager = null ; 185 186 /** xxx this should be available from EntityManager Entity storage */ 187 protected XMLEntityStorage fEntityStore = null ; 188 189 /** Security manager. */ 190 protected XMLSecurityManager fSecurityManager = null; 191 192 /** Limit analyzer. */ 193 protected XMLLimitAnalyzer fLimitAnalyzer = null; 194 195 // protected data 196 197 /** event type */ 198 protected XMLEvent fEvent ; 199 200 /** Entity scanner, this always works on last entity that was opened. */ 201 protected XMLEntityScanner fEntityScanner = null; 202 203 /** Entity depth. */ 204 protected int fEntityDepth; 205 206 /** Literal value of the last character reference scanned. */ 207 protected String fCharRefLiteral = null; 208 209 /** Scanning attribute. */ 210 protected boolean fScanningAttribute; 211 212 /** Report entity boundary. */ 213 protected boolean fReportEntity; 214 215 // symbols 216 217 /** Symbol: "version". */ 218 protected final static String fVersionSymbol = "version".intern(); 219 220 /** Symbol: "encoding". */ 221 protected final static String fEncodingSymbol = "encoding".intern(); 222 223 /** Symbol: "standalone". */ 224 protected final static String fStandaloneSymbol = "standalone".intern(); 225 226 /** Symbol: "amp". */ 227 protected final static String fAmpSymbol = "amp".intern(); 228 229 /** Symbol: "lt". */ 230 protected final static String fLtSymbol = "lt".intern(); 231 232 /** Symbol: "gt". */ 233 protected final static String fGtSymbol = "gt".intern(); 234 235 /** Symbol: "quot". */ 236 protected final static String fQuotSymbol = "quot".intern(); 237 238 /** Symbol: "apos". */ 239 protected final static String fAposSymbol = "apos".intern(); 240 241 // temporary variables 242 243 // NOTE: These objects are private to help prevent accidental modification 244 // of values by a subclass. If there were protected *and* the sub- 245 // modified the values, it would be difficult to track down the real 246 // cause of the bug. By making these private, we avoid this 247 // possibility. 248 249 /** String. */ 250 private XMLString fString = new XMLString(); 251 252 /** String buffer. */ 253 private XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 254 255 /** String buffer. */ 256 private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 257 258 /** String buffer. */ 259 private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer(); 260 261 // temporary location for Resource identification information. 262 protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 263 int initialCacheCount = 6; 264 // 265 // XMLComponent methods 266 // 267 268 /** 269 * 270 * 271 * @param componentManager The component manager. 272 * 273 * @throws SAXException Throws exception if required features and 274 * properties cannot be found. 275 */ 276 public void reset(XMLComponentManager componentManager) 277 throws XMLConfigurationException { 278 279 fParserSettings = componentManager.getFeature(PARSER_SETTINGS, true); 280 281 if (!fParserSettings) { 282 // parser settings have not been changed 283 init(); 284 return; 285 } 286 287 288 // Xerces properties 289 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 290 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 291 fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER); 292 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER); 293 294 //this step is extra because we have separated the storage of entity 295 fEntityStore = fEntityManager.getEntityStore() ; 296 297 // sax features 298 fValidation = componentManager.getFeature(VALIDATION, false); 299 fNamespaces = componentManager.getFeature(NAMESPACES, true); 300 fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS, false); 301 302 init(); 303 } // reset(XMLComponentManager) 304 305 protected void setPropertyManager(PropertyManager propertyManager){ 306 fPropertyManager = propertyManager ; 307 } 308 309 /** 310 * Sets the value of a property during parsing. 311 * 312 * @param propertyId 313 * @param value 314 */ 315 public void setProperty(String propertyId, Object value) 316 throws XMLConfigurationException { 317 318 // Xerces properties 319 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 320 String property = 321 propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 322 if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) { 323 fSymbolTable = (SymbolTable)value; 324 } else if (property.equals(Constants.ERROR_REPORTER_PROPERTY)) { 325 fErrorReporter = (XMLErrorReporter)value; 326 } else if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 327 fEntityManager = (XMLEntityManager)value; 328 } 329 } 330 331 if (propertyId.equals(SECURITY_MANAGER)) { 332 fSecurityManager = (XMLSecurityManager)value; 333 } 334 /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){ 335 fStaxProperties = (HashMap)value; 336 //TODO::discuss with neeraj what are his thoughts on passing properties. 337 //For now use this 338 }*/ 339 340 } // setProperty(String,Object) 341 342 /* 343 * Sets the feature of the scanner. 344 */ 345 public void setFeature(String featureId, boolean value) 346 throws XMLConfigurationException { 347 348 if (VALIDATION.equals(featureId)) { 349 fValidation = value; 350 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 351 fNotifyCharRefs = value; 352 } 353 } 354 355 /* 356 * Gets the state of the feature of the scanner. 357 */ 358 public boolean getFeature(String featureId) 359 throws XMLConfigurationException { 360 361 if (VALIDATION.equals(featureId)) { 362 return fValidation; 363 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 364 return fNotifyCharRefs; 365 } 366 throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId); 367 } 368 369 // 370 // Protected methods 371 // 372 373 // anybody calling this had better have set Symtoltable! 374 protected void reset() { 375 init(); 376 377 // DTD preparsing defaults: 378 fValidation = true; 379 fNotifyCharRefs = false; 380 381 } 382 383 public void reset(PropertyManager propertyManager) { 384 init(); 385 // Xerces properties 386 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 387 388 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 389 390 fEntityManager = (XMLEntityManager)propertyManager.getProperty(ENTITY_MANAGER); 391 fEntityStore = fEntityManager.getEntityStore() ; 392 fEntityScanner = (XMLEntityScanner)fEntityManager.getEntityScanner() ; 393 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 394 395 //fEntityManager.reset(); 396 // DTD preparsing defaults: 397 fValidation = false; 398 fNotifyCharRefs = false; 399 400 } 401 // common scanning methods 402 403 /** 404 * Scans an XML or text declaration. 405 * <p> 406 * <pre> 407 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 408 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 409 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 410 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 411 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 412 * | ('"' ('yes' | 'no') '"')) 413 * 414 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 415 * </pre> 416 * 417 * @param scanningTextDecl True if a text declaration is to 418 * be scanned instead of an XML 419 * declaration. 420 * @param pseudoAttributeValues An array of size 3 to return the version, 421 * encoding and standalone pseudo attribute values 422 * (in that order). 423 * 424 * <strong>Note:</strong> This method uses fString, anything in it 425 * at the time of calling is lost. 426 */ 427 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl, 428 String[] pseudoAttributeValues) 429 throws IOException, XNIException { 430 431 // pseudo-attribute values 432 String version = null; 433 String encoding = null; 434 String standalone = null; 435 436 // scan pseudo-attributes 437 final int STATE_VERSION = 0; 438 final int STATE_ENCODING = 1; 439 final int STATE_STANDALONE = 2; 440 final int STATE_DONE = 3; 441 int state = STATE_VERSION; 442 443 boolean dataFoundForTarget = false; 444 boolean sawSpace = fEntityScanner.skipSpaces(); 445 // since pseudoattributes are *not* attributes, 446 // their quotes don't need to be preserved in external parameter entities. 447 // the XMLEntityScanner#scanLiteral method will continue to 448 // emit -1 in such cases when it finds a quote; this is 449 // fine for other methods that parse scanned entities, 450 // but not for the scanning of pseudoattributes. So, 451 // temporarily, we must mark the current entity as not being "literal" 452 Entity.ScannedEntity currEnt = fEntityManager.getCurrentEntity(); 453 boolean currLiteral = currEnt.literal; 454 currEnt.literal = false; 455 while (fEntityScanner.peekChar() != '?') { 456 dataFoundForTarget = true; 457 String name = scanPseudoAttribute(scanningTextDecl, fString); 458 switch (state) { 459 case STATE_VERSION: { 460 if (name.equals(fVersionSymbol)) { 461 if (!sawSpace) { 462 reportFatalError(scanningTextDecl 463 ? "SpaceRequiredBeforeVersionInTextDecl" 464 : "SpaceRequiredBeforeVersionInXMLDecl", 465 null); 466 } 467 version = fString.toString(); 468 state = STATE_ENCODING; 469 if (!versionSupported(version)) { 470 reportFatalError("VersionNotSupported", 471 new Object[]{version}); 472 } 473 474 if (version.equals("1.1")) { 475 Entity.ScannedEntity top = fEntityManager.getTopLevelEntity(); 476 if (top != null && (top.version == null || top.version.equals("1.0"))) { 477 reportFatalError("VersionMismatch", null); 478 } 479 fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1); 480 } 481 482 } else if (name.equals(fEncodingSymbol)) { 483 if (!scanningTextDecl) { 484 reportFatalError("VersionInfoRequired", null); 485 } 486 if (!sawSpace) { 487 reportFatalError(scanningTextDecl 488 ? "SpaceRequiredBeforeEncodingInTextDecl" 489 : "SpaceRequiredBeforeEncodingInXMLDecl", 490 null); 491 } 492 encoding = fString.toString(); 493 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 494 } else { 495 if (scanningTextDecl) { 496 reportFatalError("EncodingDeclRequired", null); 497 } else { 498 reportFatalError("VersionInfoRequired", null); 499 } 500 } 501 break; 502 } 503 case STATE_ENCODING: { 504 if (name.equals(fEncodingSymbol)) { 505 if (!sawSpace) { 506 reportFatalError(scanningTextDecl 507 ? "SpaceRequiredBeforeEncodingInTextDecl" 508 : "SpaceRequiredBeforeEncodingInXMLDecl", 509 null); 510 } 511 encoding = fString.toString(); 512 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 513 // TODO: check encoding name; set encoding on 514 // entity scanner 515 } else if (!scanningTextDecl && name.equals(fStandaloneSymbol)) { 516 if (!sawSpace) { 517 reportFatalError("SpaceRequiredBeforeStandalone", 518 null); 519 } 520 standalone = fString.toString(); 521 state = STATE_DONE; 522 if (!standalone.equals("yes") && !standalone.equals("no")) { 523 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 524 } 525 } else { 526 reportFatalError("EncodingDeclRequired", null); 527 } 528 break; 529 } 530 case STATE_STANDALONE: { 531 if (name.equals(fStandaloneSymbol)) { 532 if (!sawSpace) { 533 reportFatalError("SpaceRequiredBeforeStandalone", 534 null); 535 } 536 standalone = fString.toString(); 537 state = STATE_DONE; 538 if (!standalone.equals("yes") && !standalone.equals("no")) { 539 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 540 } 541 } else { 542 reportFatalError("SDDeclNameInvalid", null); 543 } 544 break; 545 } 546 default: { 547 reportFatalError("NoMorePseudoAttributes", null); 548 } 549 } 550 sawSpace = fEntityScanner.skipSpaces(); 551 } 552 // restore original literal value 553 if(currLiteral) { 554 currEnt.literal = true; 555 } 556 // REVISIT: should we remove this error reporting? 557 if (scanningTextDecl && state != STATE_DONE) { 558 reportFatalError("MorePseudoAttributes", null); 559 } 560 561 // If there is no data in the xml or text decl then we fail to report error 562 // for version or encoding info above. 563 if (scanningTextDecl) { 564 if (!dataFoundForTarget && encoding == null) { 565 reportFatalError("EncodingDeclRequired", null); 566 } 567 } else { 568 if (!dataFoundForTarget && version == null) { 569 reportFatalError("VersionInfoRequired", null); 570 } 571 } 572 573 // end 574 if (!fEntityScanner.skipChar('?', null)) { 575 reportFatalError("XMLDeclUnterminated", null); 576 } 577 if (!fEntityScanner.skipChar('>', null)) { 578 reportFatalError("XMLDeclUnterminated", null); 579 580 } 581 582 // fill in return array 583 pseudoAttributeValues[0] = version; 584 pseudoAttributeValues[1] = encoding; 585 pseudoAttributeValues[2] = standalone; 586 587 } // scanXMLDeclOrTextDecl(boolean) 588 589 /** 590 * Scans a pseudo attribute. 591 * 592 * @param scanningTextDecl True if scanning this pseudo-attribute for a 593 * TextDecl; false if scanning XMLDecl. This 594 * flag is needed to report the correct type of 595 * error. 596 * @param value The string to fill in with the attribute 597 * value. 598 * 599 * @return The name of the attribute 600 * 601 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 602 * at the time of calling is lost. 603 */ 604 protected String scanPseudoAttribute(boolean scanningTextDecl, 605 XMLString value) 606 throws IOException, XNIException { 607 608 String name = scanPseudoAttributeName(); 609 // XMLEntityManager.print(fEntityManager.getCurrentEntity()); 610 611 if (name == null) { 612 reportFatalError("PseudoAttrNameExpected", null); 613 } 614 fEntityScanner.skipSpaces(); 615 if (!fEntityScanner.skipChar('=', null)) { 616 reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl" 617 : "EqRequiredInXMLDecl", new Object[]{name}); 618 } 619 fEntityScanner.skipSpaces(); 620 int quote = fEntityScanner.peekChar(); 621 if (quote != '\'' && quote != '"') { 622 reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl" 623 : "QuoteRequiredInXMLDecl" , new Object[]{name}); 624 } 625 fEntityScanner.scanChar(NameType.ATTRIBUTE); 626 int c = fEntityScanner.scanLiteral(quote, value, false); 627 if (c != quote) { 628 fStringBuffer2.clear(); 629 do { 630 fStringBuffer2.append(value); 631 if (c != -1) { 632 if (c == '&' || c == '%' || c == '<' || c == ']') { 633 fStringBuffer2.append((char)fEntityScanner.scanChar(NameType.ATTRIBUTE)); 634 } else if (XMLChar.isHighSurrogate(c)) { 635 scanSurrogates(fStringBuffer2); 636 } else if (isInvalidLiteral(c)) { 637 String key = scanningTextDecl 638 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl"; 639 reportFatalError(key, 640 new Object[] {Integer.toString(c, 16)}); 641 fEntityScanner.scanChar(null); 642 } 643 } 644 c = fEntityScanner.scanLiteral(quote, value, false); 645 } while (c != quote); 646 fStringBuffer2.append(value); 647 value.setValues(fStringBuffer2); 648 } 649 if (!fEntityScanner.skipChar(quote, null)) { 650 reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl" 651 : "CloseQuoteMissingInXMLDecl", 652 new Object[]{name}); 653 } 654 655 // return 656 return name; 657 658 } // scanPseudoAttribute(XMLString):String 659 660 /** 661 * Scans the name of a pseudo attribute. The only legal names 662 * in XML 1.0/1.1 documents are 'version', 'encoding' and 'standalone'. 663 * 664 * @return the name of the pseudo attribute or <code>null</code> 665 * if a legal pseudo attribute name could not be scanned. 666 */ 667 private String scanPseudoAttributeName() throws IOException, XNIException { 668 final int ch = fEntityScanner.peekChar(); 669 switch (ch) { 670 case 'v': 671 if (fEntityScanner.skipString(fVersionSymbol)) { 672 return fVersionSymbol; 673 } 674 break; 675 case 'e': 676 if (fEntityScanner.skipString(fEncodingSymbol)) { 677 return fEncodingSymbol; 678 } 679 break; 680 case 's': 681 if (fEntityScanner.skipString(fStandaloneSymbol)) { 682 return fStandaloneSymbol; 683 } 684 break; 685 } 686 return null; 687 } // scanPseudoAttributeName() 688 689 /** 690 * Scans a processing instruction. 691 * <p> 692 * <pre> 693 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 694 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 695 * </pre> 696 */ 697 //CHANGED: 698 //EARLIER: scanPI() 699 //NOW: scanPI(XMLStringBuffer) 700 //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same 701 // as that for scanContent() 702 703 protected void scanPI(XMLStringBuffer data) throws IOException, XNIException { 704 705 // target 706 fReportEntity = false; 707 String target = fEntityScanner.scanName(NameType.PI); 708 if (target == null) { 709 reportFatalError("PITargetRequired", null); 710 } 711 712 // scan data 713 scanPIData(target, data); 714 fReportEntity = true; 715 716 } // scanPI(XMLStringBuffer) 717 718 /** 719 * Scans a processing data. This is needed to handle the situation 720 * where a document starts with a processing instruction whose 721 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 722 * 723 * This method would always read the whole data. We have while loop and data is buffered 724 * until delimeter is encountered. 725 * 726 * @param target The PI target 727 * @param data The string to fill in with the data 728 */ 729 730 //CHANGED: 731 //Earlier:This method uses the fStringBuffer and later buffer values are set to 732 //the supplied XMLString.... 733 //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would 734 //be appended to that buffer 735 736 protected void scanPIData(String target, XMLStringBuffer data) 737 throws IOException, XNIException { 738 739 // check target 740 if (target.length() == 3) { 741 char c0 = Character.toLowerCase(target.charAt(0)); 742 char c1 = Character.toLowerCase(target.charAt(1)); 743 char c2 = Character.toLowerCase(target.charAt(2)); 744 if (c0 == 'x' && c1 == 'm' && c2 == 'l') { 745 reportFatalError("ReservedPITarget", null); 746 } 747 } 748 749 // spaces 750 if (!fEntityScanner.skipSpaces()) { 751 if (fEntityScanner.skipString("?>")) { 752 // we found the end, there is no data just return 753 return; 754 } else { 755 // if there is data there should be some space 756 reportFatalError("SpaceRequiredInPI", null); 757 } 758 } 759 760 // since scanData appends the parsed data to the buffer passed 761 // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer) 762 //until all of the data is buffered. 763 if (fEntityScanner.scanData("?>", data, 0)) { 764 do { 765 int c = fEntityScanner.peekChar(); 766 if (c != -1) { 767 if (XMLChar.isHighSurrogate(c)) { 768 scanSurrogates(data); 769 } else if (isInvalidLiteral(c)) { 770 reportFatalError("InvalidCharInPI", 771 new Object[]{Integer.toHexString(c)}); 772 fEntityScanner.scanChar(null); 773 } 774 } 775 } while (fEntityScanner.scanData("?>", data, 0)); 776 } 777 778 } // scanPIData(String,XMLString) 779 780 /** 781 * Scans a comment. 782 * <p> 783 * <pre> 784 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 785 * </pre> 786 * <p> 787 * <strong>Note:</strong> Called after scanning past '<!--' 788 * <strong>Note:</strong> This method uses fString, anything in it 789 * at the time of calling is lost. 790 * 791 * @param text The buffer to fill in with the text. 792 */ 793 protected void scanComment(XMLStringBuffer text) 794 throws IOException, XNIException { 795 796 //System.out.println( "XMLScanner#scanComment# In Scan Comment" ); 797 // text 798 // REVISIT: handle invalid character, eof 799 text.clear(); 800 while (fEntityScanner.scanData("--", text, 0)) { 801 int c = fEntityScanner.peekChar(); 802 803 //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() ); 804 //System.out.println( "XMLScanner#scanComment#c == " + c ); 805 806 if (c != -1) { 807 if (XMLChar.isHighSurrogate(c)) { 808 scanSurrogates(text); 809 } 810 else if (isInvalidLiteral(c)) { 811 reportFatalError("InvalidCharInComment", 812 new Object[] { Integer.toHexString(c) }); 813 fEntityScanner.scanChar(NameType.COMMENT); 814 } 815 } 816 } 817 if (!fEntityScanner.skipChar('>', NameType.COMMENT)) { 818 reportFatalError("DashDashInComment", null); 819 } 820 821 } // scanComment() 822 823 /** 824 * Scans an attribute value and normalizes whitespace converting all 825 * whitespace characters to space characters. 826 * 827 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 828 * 829 * @param value The XMLString to fill in with the value. 830 * @param nonNormalizedValue The XMLString to fill in with the 831 * non-normalized value. 832 * @param atName The name of the attribute being parsed (for error msgs). 833 * @param attributes The attributes list for the scanned attribute. 834 * @param attrIndex The index of the attribute to use from the list. 835 * @param checkEntities true if undeclared entities should be reported as VC violation, 836 * false if undeclared entities should be reported as WFC violation. 837 * @param eleName The name of element to which this attribute belongs. 838 * @param isNSURI a flag indicating whether the content is a Namespace URI 839 * 840 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 841 * at the time of calling is lost. 842 **/ 843 protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue, 844 String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, 845 String eleName, boolean isNSURI) 846 throws IOException, XNIException { 847 XMLStringBuffer stringBuffer = null; 848 // quote 849 int quote = fEntityScanner.peekChar(); 850 if (quote != '\'' && quote != '"') { 851 reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName}); 852 } 853 854 fEntityScanner.scanChar(NameType.ATTRIBUTE); 855 int entityDepth = fEntityDepth; 856 857 int c = fEntityScanner.scanLiteral(quote, value, isNSURI); 858 if (DEBUG_ATTR_NORMALIZATION) { 859 System.out.println("** scanLiteral -> \"" 860 + value.toString() + "\""); 861 } 862 if(fNeedNonNormalizedValue){ 863 fStringBuffer2.clear(); 864 fStringBuffer2.append(value); 865 } 866 if(fEntityScanner.whiteSpaceLen > 0) 867 normalizeWhitespace(value); 868 if (DEBUG_ATTR_NORMALIZATION) { 869 System.out.println("** normalizeWhitespace -> \"" 870 + value.toString() + "\""); 871 } 872 if (c != quote) { 873 fScanningAttribute = true; 874 stringBuffer = getStringBuffer(); 875 stringBuffer.clear(); 876 do { 877 stringBuffer.append(value); 878 if (DEBUG_ATTR_NORMALIZATION) { 879 System.out.println("** value2: \"" 880 + stringBuffer.toString() + "\""); 881 } 882 if (c == '&') { 883 fEntityScanner.skipChar('&', NameType.REFERENCE); 884 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 885 fStringBuffer2.append('&'); 886 } 887 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 888 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 889 fStringBuffer2.append('#'); 890 } 891 int ch ; 892 if (fNeedNonNormalizedValue) 893 ch = scanCharReferenceValue(stringBuffer, fStringBuffer2); 894 else 895 ch = scanCharReferenceValue(stringBuffer, null); 896 897 if (ch != -1) { 898 if (DEBUG_ATTR_NORMALIZATION) { 899 System.out.println("** value3: \"" 900 + stringBuffer.toString() 901 + "\""); 902 } 903 } 904 } else { 905 String entityName = fEntityScanner.scanName(NameType.ENTITY); 906 if (entityName == null) { 907 reportFatalError("NameRequiredInReference", null); 908 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 909 fStringBuffer2.append(entityName); 910 } 911 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 912 reportFatalError("SemicolonRequiredInReference", 913 new Object []{entityName}); 914 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 915 fStringBuffer2.append(';'); 916 } 917 if (resolveCharacter(entityName, stringBuffer)) { 918 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 919 } else { 920 if (fEntityStore.isExternalEntity(entityName)) { 921 reportFatalError("ReferenceToExternalEntity", 922 new Object[] { entityName }); 923 } else { 924 if (!fEntityStore.isDeclaredEntity(entityName)) { 925 //WFC & VC: Entity Declared 926 if (checkEntities) { 927 if (fValidation) { 928 fErrorReporter.reportError(fEntityScanner,XMLMessageFormatter.XML_DOMAIN, 929 "EntityNotDeclared", 930 new Object[]{entityName}, 931 XMLErrorReporter.SEVERITY_ERROR); 932 } 933 } else { 934 reportFatalError("EntityNotDeclared", 935 new Object[]{entityName}); 936 } 937 } 938 fEntityManager.startEntity(true, entityName, true); 939 } 940 } 941 } 942 } else if (c == '<') { 943 reportFatalError("LessthanInAttValue", 944 new Object[] { eleName, atName }); 945 fEntityScanner.scanChar(null); 946 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 947 fStringBuffer2.append((char)c); 948 } 949 } else if (c == '%' || c == ']') { 950 fEntityScanner.scanChar(null); 951 stringBuffer.append((char)c); 952 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 953 fStringBuffer2.append((char)c); 954 } 955 if (DEBUG_ATTR_NORMALIZATION) { 956 System.out.println("** valueF: \"" 957 + stringBuffer.toString() + "\""); 958 } 959 } else if (c == '\n' || c == '\r') { 960 fEntityScanner.scanChar(null); 961 stringBuffer.append(' '); 962 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 963 fStringBuffer2.append('\n'); 964 } 965 } else if (c != -1 && XMLChar.isHighSurrogate(c)) { 966 fStringBuffer3.clear(); 967 if (scanSurrogates(fStringBuffer3)) { 968 stringBuffer.append(fStringBuffer3); 969 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 970 fStringBuffer2.append(fStringBuffer3); 971 } 972 if (DEBUG_ATTR_NORMALIZATION) { 973 System.out.println("** valueI: \"" 974 + stringBuffer.toString() 975 + "\""); 976 } 977 } 978 } else if (c != -1 && isInvalidLiteral(c)) { 979 reportFatalError("InvalidCharInAttValue", 980 new Object[] {eleName, atName, Integer.toString(c, 16)}); 981 fEntityScanner.scanChar(null); 982 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 983 fStringBuffer2.append((char)c); 984 } 985 } 986 c = fEntityScanner.scanLiteral(quote, value, isNSURI); 987 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 988 fStringBuffer2.append(value); 989 } 990 if(fEntityScanner.whiteSpaceLen > 0) 991 normalizeWhitespace(value); 992 //Todo ::Move this check to Attributes , do conversion 993 //only if attribute is being accessed. -Venu 994 } while (c != quote || entityDepth != fEntityDepth); 995 stringBuffer.append(value); 996 if (DEBUG_ATTR_NORMALIZATION) { 997 System.out.println("** valueN: \"" 998 + stringBuffer.toString() + "\""); 999 } 1000 value.setValues(stringBuffer); 1001 fScanningAttribute = false; 1002 } 1003 if(fNeedNonNormalizedValue) 1004 nonNormalizedValue.setValues(fStringBuffer2); 1005 1006 // quote 1007 int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE); 1008 if (cquote != quote) { 1009 reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName}); 1010 } 1011 } // scanAttributeValue() 1012 1013 1014 /** 1015 * Resolves character entity references. 1016 * @param entityName the name of the entity 1017 * @param stringBuffer the current XMLStringBuffer to append the character to. 1018 * @return true if resolved, false otherwise 1019 */ 1020 protected boolean resolveCharacter(String entityName, XMLStringBuffer stringBuffer) { 1021 /** 1022 * entityNames (symbols) are interned. The equals method would do the same, 1023 * but I'm leaving it as comparisons by references are common in the impl 1024 * and it made it explicit to others who read this code. 1025 */ 1026 if (entityName == fAmpSymbol) { 1027 stringBuffer.append('&'); 1028 return true; 1029 } else if (entityName == fAposSymbol) { 1030 stringBuffer.append('\''); 1031 return true; 1032 } else if (entityName == fLtSymbol) { 1033 stringBuffer.append('<'); 1034 return true; 1035 } else if (entityName == fGtSymbol) { 1036 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1037 stringBuffer.append('>'); 1038 return true; 1039 } else if (entityName == fQuotSymbol) { 1040 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1041 stringBuffer.append('"'); 1042 return true; 1043 } 1044 return false; 1045 } 1046 1047 /** 1048 * Scans External ID and return the public and system IDs. 1049 * 1050 * @param identifiers An array of size 2 to return the system id, 1051 * and public id (in that order). 1052 * @param optionalSystemId Specifies whether the system id is optional. 1053 * 1054 * <strong>Note:</strong> This method uses fString and fStringBuffer, 1055 * anything in them at the time of calling is lost. 1056 */ 1057 protected void scanExternalID(String[] identifiers, 1058 boolean optionalSystemId) 1059 throws IOException, XNIException { 1060 1061 String systemId = null; 1062 String publicId = null; 1063 if (fEntityScanner.skipString("PUBLIC")) { 1064 if (!fEntityScanner.skipSpaces()) { 1065 reportFatalError("SpaceRequiredAfterPUBLIC", null); 1066 } 1067 scanPubidLiteral(fString); 1068 publicId = fString.toString(); 1069 1070 if (!fEntityScanner.skipSpaces() && !optionalSystemId) { 1071 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null); 1072 } 1073 } 1074 1075 if (publicId != null || fEntityScanner.skipString("SYSTEM")) { 1076 if (publicId == null && !fEntityScanner.skipSpaces()) { 1077 reportFatalError("SpaceRequiredAfterSYSTEM", null); 1078 } 1079 int quote = fEntityScanner.peekChar(); 1080 if (quote != '\'' && quote != '"') { 1081 if (publicId != null && optionalSystemId) { 1082 // looks like we don't have any system id 1083 // simply return the public id 1084 identifiers[0] = null; 1085 identifiers[1] = publicId; 1086 return; 1087 } 1088 reportFatalError("QuoteRequiredInSystemID", null); 1089 } 1090 fEntityScanner.scanChar(null); 1091 XMLString ident = fString; 1092 if (fEntityScanner.scanLiteral(quote, ident, false) != quote) { 1093 fStringBuffer.clear(); 1094 do { 1095 fStringBuffer.append(ident); 1096 int c = fEntityScanner.peekChar(); 1097 if (XMLChar.isMarkup(c) || c == ']') { 1098 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 1099 } else if (c != -1 && isInvalidLiteral(c)) { 1100 reportFatalError("InvalidCharInSystemID", 1101 new Object[] {Integer.toString(c, 16)}); 1102 } 1103 } while (fEntityScanner.scanLiteral(quote, ident, false) != quote); 1104 fStringBuffer.append(ident); 1105 ident = fStringBuffer; 1106 } 1107 systemId = ident.toString(); 1108 if (!fEntityScanner.skipChar(quote, null)) { 1109 reportFatalError("SystemIDUnterminated", null); 1110 } 1111 } 1112 1113 // store result in array 1114 identifiers[0] = systemId; 1115 identifiers[1] = publicId; 1116 } 1117 1118 1119 /** 1120 * Scans public ID literal. 1121 * 1122 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 1123 * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 1124 * 1125 * The returned string is normalized according to the following rule, 1126 * from http://www.w3.org/TR/REC-xml#dt-pubid: 1127 * 1128 * Before a match is attempted, all strings of white space in the public 1129 * identifier must be normalized to single space characters (#x20), and 1130 * leading and trailing white space must be removed. 1131 * 1132 * @param literal The string to fill in with the public ID literal. 1133 * @return True on success. 1134 * 1135 * <strong>Note:</strong> This method uses fStringBuffer, anything in it at 1136 * the time of calling is lost. 1137 */ 1138 protected boolean scanPubidLiteral(XMLString literal) 1139 throws IOException, XNIException { 1140 int quote = fEntityScanner.scanChar(null); 1141 if (quote != '\'' && quote != '"') { 1142 reportFatalError("QuoteRequiredInPublicID", null); 1143 return false; 1144 } 1145 1146 fStringBuffer.clear(); 1147 // skip leading whitespace 1148 boolean skipSpace = true; 1149 boolean dataok = true; 1150 while (true) { 1151 int c = fEntityScanner.scanChar(null); 1152 if (c == ' ' || c == '\n' || c == '\r') { 1153 if (!skipSpace) { 1154 // take the first whitespace as a space and skip the others 1155 fStringBuffer.append(' '); 1156 skipSpace = true; 1157 } 1158 } else if (c == quote) { 1159 if (skipSpace) { 1160 // if we finished on a space let's trim it 1161 fStringBuffer.length--; 1162 } 1163 literal.setValues(fStringBuffer); 1164 break; 1165 } else if (XMLChar.isPubid(c)) { 1166 fStringBuffer.append((char)c); 1167 skipSpace = false; 1168 } else if (c == -1) { 1169 reportFatalError("PublicIDUnterminated", null); 1170 return false; 1171 } else { 1172 dataok = false; 1173 reportFatalError("InvalidCharInPublicID", 1174 new Object[]{Integer.toHexString(c)}); 1175 } 1176 } 1177 return dataok; 1178 } 1179 1180 1181 /** 1182 * Normalize whitespace in an XMLString converting all whitespace 1183 * characters to space characters. 1184 */ 1185 protected void normalizeWhitespace(XMLString value) { 1186 int i=0; 1187 int j=0; 1188 int [] buff = fEntityScanner.whiteSpaceLookup; 1189 int buffLen = fEntityScanner.whiteSpaceLen; 1190 int end = value.offset + value.length; 1191 while(i < buffLen){ 1192 j = buff[i]; 1193 if(j < end ){ 1194 value.ch[j] = ' '; 1195 } 1196 i++; 1197 } 1198 } 1199 1200 // 1201 // XMLEntityHandler methods 1202 // 1203 1204 /** 1205 * This method notifies of the start of an entity. The document entity 1206 * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1207 * parameter entity names start with '%'; and general entities are just 1208 * specified by their name. 1209 * 1210 * @param name The name of the entity. 1211 * @param identifier The resource identifier. 1212 * @param encoding The auto-detected IANA encoding name of the entity 1213 * stream. This value will be null in those situations 1214 * where the entity encoding is not auto-detected (e.g. 1215 * internal entities or a document entity that is 1216 * parsed from a java.io.Reader). 1217 * 1218 * @throws XNIException Thrown by handler to signal an error. 1219 */ 1220 public void startEntity(String name, 1221 XMLResourceIdentifier identifier, 1222 String encoding, Augmentations augs) throws XNIException { 1223 1224 // keep track of the entity depth 1225 fEntityDepth++; 1226 // must reset entity scanner 1227 fEntityScanner = fEntityManager.getEntityScanner(); 1228 fEntityStore = fEntityManager.getEntityStore() ; 1229 } // startEntity(String,XMLResourceIdentifier,String) 1230 1231 /** 1232 * This method notifies the end of an entity. The document entity has 1233 * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1234 * parameter entity names start with '%'; and general entities are just 1235 * specified by their name. 1236 * 1237 * @param name The name of the entity. 1238 * 1239 * @throws XNIException Thrown by handler to signal an error. 1240 */ 1241 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 1242 1243 // keep track of the entity depth 1244 fEntityDepth--; 1245 1246 } // endEntity(String) 1247 1248 /** 1249 * Scans a character reference and append the corresponding chars to the 1250 * specified buffer. 1251 * 1252 * <p> 1253 * <pre> 1254 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1255 * </pre> 1256 * 1257 * <strong>Note:</strong> This method uses fStringBuffer, anything in it 1258 * at the time of calling is lost. 1259 * 1260 * @param buf the character buffer to append chars to 1261 * @param buf2 the character buffer to append non-normalized chars to 1262 * 1263 * @return the character value or (-1) on conversion failure 1264 */ 1265 protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2) 1266 throws IOException, XNIException { 1267 int initLen = buf.length; 1268 // scan hexadecimal value 1269 boolean hex = false; 1270 if (fEntityScanner.skipChar('x', NameType.REFERENCE)) { 1271 if (buf2 != null) { buf2.append('x'); } 1272 hex = true; 1273 fStringBuffer3.clear(); 1274 boolean digit = true; 1275 1276 int c = fEntityScanner.peekChar(); 1277 digit = (c >= '0' && c <= '9') || 1278 (c >= 'a' && c <= 'f') || 1279 (c >= 'A' && c <= 'F'); 1280 if (digit) { 1281 if (buf2 != null) { buf2.append((char)c); } 1282 fEntityScanner.scanChar(NameType.REFERENCE); 1283 fStringBuffer3.append((char)c); 1284 1285 do { 1286 c = fEntityScanner.peekChar(); 1287 digit = (c >= '0' && c <= '9') || 1288 (c >= 'a' && c <= 'f') || 1289 (c >= 'A' && c <= 'F'); 1290 if (digit) { 1291 if (buf2 != null) { buf2.append((char)c); } 1292 fEntityScanner.scanChar(NameType.REFERENCE); 1293 fStringBuffer3.append((char)c); 1294 } 1295 } while (digit); 1296 } else { 1297 reportFatalError("HexdigitRequiredInCharRef", null); 1298 } 1299 } 1300 1301 // scan decimal value 1302 else { 1303 fStringBuffer3.clear(); 1304 boolean digit = true; 1305 1306 int c = fEntityScanner.peekChar(); 1307 digit = c >= '0' && c <= '9'; 1308 if (digit) { 1309 if (buf2 != null) { buf2.append((char)c); } 1310 fEntityScanner.scanChar(NameType.REFERENCE); 1311 fStringBuffer3.append((char)c); 1312 1313 do { 1314 c = fEntityScanner.peekChar(); 1315 digit = c >= '0' && c <= '9'; 1316 if (digit) { 1317 if (buf2 != null) { buf2.append((char)c); } 1318 fEntityScanner.scanChar(NameType.REFERENCE); 1319 fStringBuffer3.append((char)c); 1320 } 1321 } while (digit); 1322 } else { 1323 reportFatalError("DigitRequiredInCharRef", null); 1324 } 1325 } 1326 1327 // end 1328 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1329 reportFatalError("SemicolonRequiredInCharRef", null); 1330 } 1331 if (buf2 != null) { buf2.append(';'); } 1332 1333 // convert string to number 1334 int value = -1; 1335 try { 1336 value = Integer.parseInt(fStringBuffer3.toString(), 1337 hex ? 16 : 10); 1338 1339 // character reference must be a valid XML character 1340 if (isInvalid(value)) { 1341 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1342 if (hex) errorBuf.append('x'); 1343 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1344 reportFatalError("InvalidCharRef", 1345 new Object[]{errorBuf.toString()}); 1346 } 1347 } catch (NumberFormatException e) { 1348 // Conversion failed, let -1 value drop through. 1349 // If we end up here, the character reference was invalid. 1350 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1351 if (hex) errorBuf.append('x'); 1352 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1353 reportFatalError("InvalidCharRef", 1354 new Object[]{errorBuf.toString()}); 1355 } 1356 1357 // append corresponding chars to the given buffer 1358 if (!XMLChar.isSupplemental(value)) { 1359 buf.append((char) value); 1360 } else { 1361 // character is supplemental, split it into surrogate chars 1362 buf.append(XMLChar.highSurrogate(value)); 1363 buf.append(XMLChar.lowSurrogate(value)); 1364 } 1365 1366 // char refs notification code 1367 if (fNotifyCharRefs && value != -1) { 1368 String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString(); 1369 if (!fScanningAttribute) { 1370 fCharRefLiteral = literal; 1371 } 1372 } 1373 1374 if (fEntityScanner.fCurrentEntity.isGE) { 1375 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, buf.length - initLen); 1376 } 1377 return value; 1378 } 1379 // returns true if the given character is not 1380 // valid with respect to the version of 1381 // XML understood by this scanner. 1382 protected boolean isInvalid(int value) { 1383 return (XMLChar.isInvalid(value)); 1384 } // isInvalid(int): boolean 1385 1386 // returns true if the given character is not 1387 // valid or may not be used outside a character reference 1388 // with respect to the version of XML understood by this scanner. 1389 protected boolean isInvalidLiteral(int value) { 1390 return (XMLChar.isInvalid(value)); 1391 } // isInvalidLiteral(int): boolean 1392 1393 // returns true if the given character is 1394 // a valid nameChar with respect to the version of 1395 // XML understood by this scanner. 1396 protected boolean isValidNameChar(int value) { 1397 return (XMLChar.isName(value)); 1398 } // isValidNameChar(int): boolean 1399 1400 // returns true if the given character is 1401 // a valid NCName character with respect to the version of 1402 // XML understood by this scanner. 1403 protected boolean isValidNCName(int value) { 1404 return (XMLChar.isNCName(value)); 1405 } // isValidNCName(int): boolean 1406 1407 // returns true if the given character is 1408 // a valid nameStartChar with respect to the version of 1409 // XML understood by this scanner. 1410 protected boolean isValidNameStartChar(int value) { 1411 return (XMLChar.isNameStart(value)); 1412 } // isValidNameStartChar(int): boolean 1413 1414 // returns true if the given character is 1415 // a valid high surrogate for a nameStartChar 1416 // with respect to the version of XML understood 1417 // by this scanner. 1418 protected boolean isValidNameStartHighSurrogate(int value) { 1419 return false; 1420 } // isValidNameStartHighSurrogate(int): boolean 1421 1422 protected boolean versionSupported(String version ) { 1423 return version.equals("1.0") || version.equals("1.1"); 1424 } // version Supported 1425 1426 /** 1427 * Scans surrogates and append them to the specified buffer. 1428 * <p> 1429 * <strong>Note:</strong> This assumes the current char has already been 1430 * identified as a high surrogate. 1431 * 1432 * @param buf The StringBuffer to append the read surrogates to. 1433 * @return True if it succeeded. 1434 */ 1435 protected boolean scanSurrogates(XMLStringBuffer buf) 1436 throws IOException, XNIException { 1437 1438 int high = fEntityScanner.scanChar(null); 1439 int low = fEntityScanner.peekChar(); 1440 if (!XMLChar.isLowSurrogate(low)) { 1441 reportFatalError("InvalidCharInContent", 1442 new Object[] {Integer.toString(high, 16)}); 1443 return false; 1444 } 1445 fEntityScanner.scanChar(null); 1446 1447 // convert surrogates to supplemental character 1448 int c = XMLChar.supplemental((char)high, (char)low); 1449 1450 // supplemental character must be a valid XML character 1451 if (isInvalid(c)) { 1452 reportFatalError("InvalidCharInContent", 1453 new Object[]{Integer.toString(c, 16)}); 1454 return false; 1455 } 1456 1457 // fill in the buffer 1458 buf.append((char)high); 1459 buf.append((char)low); 1460 1461 return true; 1462 1463 } // scanSurrogates():boolean 1464 1465 1466 /** 1467 * Convenience function used in all XML scanners. 1468 */ 1469 protected void reportFatalError(String msgId, Object[] args) 1470 throws XNIException { 1471 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN, 1472 msgId, args, 1473 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1474 } 1475 1476 // private methods 1477 private void init() { 1478 // initialize scanner 1479 fEntityScanner = null; 1480 // initialize vars 1481 fEntityDepth = 0; 1482 fReportEntity = true; 1483 fResourceIdentifier.clear(); 1484 1485 if(!fAttributeCacheInitDone){ 1486 for(int i = 0; i < initialCacheCount; i++){ 1487 attributeValueCache.add(new XMLString()); 1488 stringBufferCache.add(new XMLStringBuffer()); 1489 } 1490 fAttributeCacheInitDone = true; 1491 } 1492 fStringBufferIndex = 0; 1493 fAttributeCacheUsedCount = 0; 1494 1495 } 1496 1497 XMLStringBuffer getStringBuffer(){ 1498 if((fStringBufferIndex < initialCacheCount )|| (fStringBufferIndex < stringBufferCache.size())){ 1499 return stringBufferCache.get(fStringBufferIndex++); 1500 }else{ 1501 XMLStringBuffer tmpObj = new XMLStringBuffer(); 1502 fStringBufferIndex++; 1503 stringBufferCache.add(tmpObj); 1504 return tmpObj; 1505 } 1506 } 1507 1508 /** 1509 * Add the count of the content buffer and check if the accumulated 1510 * value exceeds the limit 1511 * @param isPEDecl a flag to indicate whether the entity is parameter 1512 * @param entityName entity name 1513 * @param buffer content buffer 1514 */ 1515 void checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer) { 1516 checkEntityLimit(isPEDecl, entityName, buffer.length); 1517 } 1518 1519 /** 1520 * Add the count and check limit 1521 * @param isPEDecl a flag to indicate whether the entity is parameter 1522 * @param entityName entity name 1523 * @param len length of the buffer 1524 */ 1525 void checkEntityLimit(boolean isPEDecl, String entityName, int len) { 1526 if (fLimitAnalyzer == null) { 1527 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 1528 } 1529 if (isPEDecl) { 1530 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, "%" + entityName, len); 1531 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1532 fSecurityManager.debugPrint(fLimitAnalyzer); 1533 reportFatalError("MaxEntitySizeLimit", new Object[]{"%" + entityName, 1534 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1535 fSecurityManager.getLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1536 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT)}); 1537 } 1538 } else { 1539 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, entityName, len); 1540 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1541 fSecurityManager.debugPrint(fLimitAnalyzer); 1542 reportFatalError("MaxEntitySizeLimit", new Object[]{entityName, 1543 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1544 fSecurityManager.getLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1545 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT)}); 1546 } 1547 } 1548 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1549 fSecurityManager.debugPrint(fLimitAnalyzer); 1550 reportFatalError("TotalEntitySizeLimit", 1551 new Object[]{fLimitAnalyzer.getTotalValue(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1552 fSecurityManager.getLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1553 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 1554 } 1555 } 1556 } // class XMLScanner