1 /* 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.xml.internal.stream.XMLBufferListener; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import com.sun.xml.internal.stream.XMLInputFactoryImpl; 27 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 28 29 import java.io.EOFException; 30 import java.io.IOException; 31 import javax.xml.stream.XMLInputFactory; 32 import javax.xml.stream.events.XMLEvent; 33 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 34 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 35 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 36 import com.sun.org.apache.xerces.internal.util.XMLChar; 37 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 38 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 39 import com.sun.org.apache.xerces.internal.xni.QName; 40 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 41 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 42 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 43 import com.sun.org.apache.xerces.internal.xni.XMLString; 44 import com.sun.org.apache.xerces.internal.xni.XNIException; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 46 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 47 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 50 import com.sun.org.apache.xerces.internal.xni.Augmentations; 51 import com.sun.org.apache.xerces.internal.impl.Constants; 52 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 53 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 54 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 55 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 56 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 57 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 58 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.State; 59 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 60 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 61 import javax.xml.XMLConstants; 62 import javax.xml.stream.XMLStreamConstants; 63 import javax.xml.stream.events.XMLEvent; 64 65 /** 66 * 67 * This class is responsible for scanning the structure and content 68 * of document fragments. 69 * 70 * This class has been modified as per the new design which is more suited to 71 * efficiently build pull parser. Lot of improvements have been done and 72 * the code has been added to support stax functionality/features. 73 * 74 * @author Neeraj Bajaj SUN Microsystems 75 * @author K.Venugopal SUN Microsystems 76 * @author Glenn Marcy, IBM 77 * @author Andy Clark, IBM 78 * @author Arnaud Le Hors, IBM 79 * @author Eric Ye, IBM 80 * @author Sunitha Reddy, SUN Microsystems 81 * 82 */ 83 public class XMLDocumentFragmentScannerImpl 84 extends XMLScanner 85 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 86 87 // 88 // Constants 89 // 90 91 protected int fElementAttributeLimit; 92 93 /** External subset resolver. **/ 94 protected ExternalSubsetResolver fExternalSubsetResolver; 95 96 // scanner states 97 98 //XXX this should be divided into more states. 99 /** Scanner state: start of markup. */ 100 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 101 102 /** Scanner state: content. */ 103 protected static final int SCANNER_STATE_CONTENT = 22; 104 105 /** Scanner state: processing instruction. */ 106 protected static final int SCANNER_STATE_PI = 23; 107 108 /** Scanner state: DOCTYPE. */ 109 protected static final int SCANNER_STATE_DOCTYPE = 24; 110 111 /** Scanner state: XML Declaration */ 112 protected static final int SCANNER_STATE_XML_DECL = 25; 113 114 /** Scanner state: root element. */ 115 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 116 117 /** Scanner state: comment. */ 118 protected static final int SCANNER_STATE_COMMENT = 27; 119 120 /** Scanner state: reference. */ 121 protected static final int SCANNER_STATE_REFERENCE = 28; 122 123 // <book type="hard"> reading attribute name 'type' 124 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 125 126 // <book type="hard"> //reading attribute value. 127 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 128 129 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 130 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 131 132 /** Scanner state: end of input. */ 133 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 134 135 /** Scanner state: terminated. */ 136 protected static final int SCANNER_STATE_TERMINATED = 34; 137 138 /** Scanner state: CDATA section. */ 139 protected static final int SCANNER_STATE_CDATA = 35; 140 141 /** Scanner state: Text declaration. */ 142 protected static final int SCANNER_STATE_TEXT_DECL = 36; 143 144 /** Scanner state: Text declaration. */ 145 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 146 147 //<book type="hard">foo</book> 148 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 149 150 //<book type="hard">foo</book> reading </book> 151 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 152 153 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 154 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 155 156 // feature identifiers 157 158 159 /** Feature identifier: notify built-in refereces. */ 160 protected static final String NOTIFY_BUILTIN_REFS = 161 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 162 163 /** Property identifier: entity resolver. */ 164 protected static final String ENTITY_RESOLVER = 165 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 166 167 /** Feature identifier: standard uri conformant */ 168 protected static final String STANDARD_URI_CONFORMANT = 169 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 170 171 /** Property identifier: Security property manager. */ 172 private static final String XML_SECURITY_PROPERTY_MANAGER = 173 Constants.XML_SECURITY_PROPERTY_MANAGER; 174 175 /** access external dtd: file protocol 176 * For DOM/SAX, the secure feature is set to true by default 177 */ 178 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 179 180 // recognized features and properties 181 182 /** Recognized features. */ 183 private static final String[] RECOGNIZED_FEATURES = { 184 NAMESPACES, 185 VALIDATION, 186 NOTIFY_BUILTIN_REFS, 187 NOTIFY_CHAR_REFS, 188 Constants.STAX_REPORT_CDATA_EVENT 189 }; 190 191 /** Feature defaults. */ 192 private static final Boolean[] FEATURE_DEFAULTS = { 193 Boolean.TRUE, 194 null, 195 Boolean.FALSE, 196 Boolean.FALSE, 197 Boolean.TRUE 198 }; 199 200 /** Recognized properties. */ 201 private static final String[] RECOGNIZED_PROPERTIES = { 202 SYMBOL_TABLE, 203 ERROR_REPORTER, 204 ENTITY_MANAGER, 205 XML_SECURITY_PROPERTY_MANAGER 206 }; 207 208 /** Property defaults. */ 209 private static final Object[] PROPERTY_DEFAULTS = { 210 null, 211 null, 212 null, 213 EXTERNAL_ACCESS_DEFAULT 214 }; 215 216 private static final char [] cdata = {'[','C','D','A','T','A','['}; 217 static final char [] xmlDecl = {'<','?','x','m','l'}; 218 private static final char [] endTag = {'<','/'}; 219 // debugging 220 221 /** Debug scanner state. */ 222 private static final boolean DEBUG_SCANNER_STATE = false; 223 224 /** Debug driver. */ 225 private static final boolean DEBUG_DISPATCHER = false; 226 227 /** Debug content driver scanning. */ 228 protected static final boolean DEBUG_START_END_ELEMENT = false; 229 230 231 /** Debug driver next */ 232 protected static final boolean DEBUG_NEXT = false ; 233 234 /** Debug driver next */ 235 protected static final boolean DEBUG = false; 236 protected static final boolean DEBUG_COALESCE = false; 237 // 238 // Data 239 // 240 241 // protected data 242 243 /** Document handler. */ 244 protected XMLDocumentHandler fDocumentHandler; 245 protected int fScannerLastState ; 246 247 /** Entity Storage */ 248 protected XMLEntityStorage fEntityStore; 249 250 /** Entity stack. */ 251 protected int[] fEntityStack = new int[4]; 252 253 /** Markup depth. */ 254 protected int fMarkupDepth; 255 256 //is the element empty 257 protected boolean fEmptyElement ; 258 259 //track if we are reading attributes, this is usefule while 260 //there is a callback 261 protected boolean fReadingAttributes = false; 262 263 /** Scanner state. */ 264 protected int fScannerState; 265 266 /** SubScanner state: inside scanContent method. */ 267 protected boolean fInScanContent = false; 268 protected boolean fLastSectionWasCData = false; 269 protected boolean fLastSectionWasEntityReference = false; 270 protected boolean fLastSectionWasCharacterData = false; 271 272 /** has external dtd */ 273 protected boolean fHasExternalDTD; 274 275 /** Standalone. */ 276 protected boolean fStandaloneSet; 277 protected boolean fStandalone; 278 protected String fVersion; 279 280 // element information 281 282 /** Current element. */ 283 protected QName fCurrentElement; 284 285 /** Element stack. */ 286 protected ElementStack fElementStack = new ElementStack(); 287 protected ElementStack2 fElementStack2 = new ElementStack2(); 288 289 // other info 290 291 /** Document system identifier. 292 * REVISIT: So what's this used for? - NG 293 * protected String fDocumentSystemId; 294 ******/ 295 296 protected String fPITarget ; 297 298 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 299 protected XMLString fPIData = new XMLString(); 300 301 // features 302 303 304 /** Notify built-in references. */ 305 protected boolean fNotifyBuiltInRefs = false; 306 307 //STAX related properties 308 //defaultValues. 309 protected boolean fSupportDTD = true; 310 protected boolean fReplaceEntityReferences = true; 311 protected boolean fSupportExternalEntities = false; 312 protected boolean fReportCdataEvent = false ; 313 protected boolean fIsCoalesce = false ; 314 protected String fDeclaredEncoding = null; 315 /** Xerces Feature: Disallow doctype declaration. */ 316 protected boolean fDisallowDoctype = false; 317 318 /** 319 * comma-delimited list of protocols that are allowed for the purpose 320 * of accessing external dtd or entity references 321 */ 322 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 323 324 /** 325 * standard uri conformant (strict uri). 326 * http://apache.org/xml/features/standard-uri-conformant 327 */ 328 protected boolean fStrictURI; 329 330 // drivers 331 332 /** Active driver. */ 333 protected Driver fDriver; 334 335 /** Content driver. */ 336 protected Driver fContentDriver = createContentDriver(); 337 338 // temporary variables 339 340 /** Element QName. */ 341 protected QName fElementQName = new QName(); 342 343 /** Attribute QName. */ 344 protected QName fAttributeQName = new QName(); 345 346 /** 347 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 348 * implements Iterator interface so we can directly give Attributes in the form of 349 * iterator. 350 */ 351 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 352 353 354 /** String. */ 355 protected XMLString fTempString = new XMLString(); 356 357 /** String. */ 358 protected XMLString fTempString2 = new XMLString(); 359 360 /** Array of 3 strings. */ 361 private String[] fStrings = new String[3]; 362 363 /** Making the buffer accesible to derived class -- String buffer. */ 364 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 365 366 /** Making the buffer accesible to derived class -- String buffer. */ 367 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 368 369 /** stores character data. */ 370 /** Making the buffer accesible to derived class -- stores PI data */ 371 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 372 373 /** Single character array. */ 374 private final char[] fSingleChar = new char[1]; 375 private String fCurrentEntityName = null; 376 377 // New members 378 protected boolean fScanToEnd = false; 379 380 protected DTDGrammarUtil dtdGrammarUtil= null; 381 382 protected boolean fAddDefaultAttr = false; 383 384 protected boolean foundBuiltInRefs = false; 385 386 387 //skip element algorithm 388 static final short MAX_DEPTH_LIMIT = 5 ; 389 static final short ELEMENT_ARRAY_LENGTH = 200 ; 390 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 391 static final boolean DEBUG_SKIP_ALGORITHM = false; 392 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 393 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 394 //pointer location where last element was skipped 395 short fLastPointerLocation = 0 ; 396 short fElementPointer = 0 ; 397 //2D array to store pointer info 398 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 399 protected String fElementRawname ; 400 protected boolean fShouldSkip = false; 401 protected boolean fAdd = false ; 402 protected boolean fSkip = false; 403 404 /** Reusable Augmentations. */ 405 private Augmentations fTempAugmentations = null; 406 // 407 // Constructors 408 // 409 410 /** Default constructor. */ 411 public XMLDocumentFragmentScannerImpl() { 412 } // <init>() 413 414 // 415 // XMLDocumentScanner methods 416 // 417 418 /** 419 * Sets the input source. 420 * 421 * @param inputSource The input source. 422 * 423 * @throws IOException Thrown on i/o error. 424 */ 425 public void setInputSource(XMLInputSource inputSource) throws IOException { 426 fEntityManager.setEntityHandler(this); 427 fEntityManager.startEntity("$fragment$", inputSource, false, true); 428 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 429 } // setInputSource(XMLInputSource) 430 431 /** 432 * Scans a document. 433 * 434 * @param complete True if the scanner should scan the document 435 * completely, pushing all events to the registered 436 * document handler. A value of false indicates that 437 * that the scanner should only scan the next portion 438 * of the document and return. A scanner instance is 439 * permitted to completely scan a document if it does 440 * not support this "pull" scanning model. 441 * 442 * @return True if there is more to scan, false otherwise. 443 */ 444 public boolean scanDocument(boolean complete) 445 throws IOException, XNIException { 446 447 // keep dispatching "events" 448 fEntityManager.setEntityHandler(this); 449 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 450 451 int event = next(); 452 do { 453 switch (event) { 454 case XMLStreamConstants.START_DOCUMENT : 455 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 456 break; 457 case XMLStreamConstants.START_ELEMENT : 458 //System.out.println(" in scann element"); 459 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 460 break; 461 case XMLStreamConstants.CHARACTERS : 462 fDocumentHandler.characters(getCharacterData(),null); 463 break; 464 case XMLStreamConstants.SPACE: 465 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 466 //System.out.println("in the space"); 467 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 468 break; 469 case XMLStreamConstants.ENTITY_REFERENCE : 470 //entity reference callback are given in startEntity 471 break; 472 case XMLStreamConstants.PROCESSING_INSTRUCTION : 473 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 474 break; 475 case XMLStreamConstants.COMMENT : 476 //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 477 fDocumentHandler.comment(getCharacterData(),null); 478 break; 479 case XMLStreamConstants.DTD : 480 //all DTD related callbacks are handled in DTDScanner. 481 //1. Stax doesn't define DTD states as it does for XML Document. 482 //therefore we don't need to take care of anything here. So Just break; 483 break; 484 case XMLStreamConstants.CDATA: 485 fDocumentHandler.startCDATA(null); 486 //xxx: check if CDATA values comes from getCharacterData() function 487 fDocumentHandler.characters(getCharacterData(),null); 488 fDocumentHandler.endCDATA(null); 489 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 490 break; 491 case XMLStreamConstants.NOTATION_DECLARATION : 492 break; 493 case XMLStreamConstants.ENTITY_DECLARATION : 494 break; 495 case XMLStreamConstants.NAMESPACE : 496 break; 497 case XMLStreamConstants.ATTRIBUTE : 498 break; 499 case XMLStreamConstants.END_ELEMENT : 500 //do not give callback here. 501 //this callback is given in scanEndElement function. 502 //fDocumentHandler.endElement(getElementQName(),null); 503 break; 504 default : 505 throw new InternalError("processing event: " + event); 506 507 } 508 //System.out.println("here in before calling next"); 509 event = next(); 510 //System.out.println("here in after calling next"); 511 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 512 513 if(event == XMLStreamConstants.END_DOCUMENT) { 514 fDocumentHandler.endDocument(null); 515 return false; 516 } 517 518 return true; 519 520 } // scanDocument(boolean):boolean 521 522 523 524 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 525 if(fScannerLastState == XMLEvent.END_ELEMENT){ 526 fElementQName.setValues(fElementStack.getLastPoppedElement()); 527 } 528 return fElementQName ; 529 } 530 531 /** return the next state on the input 532 * @return int 533 */ 534 535 public int next() throws IOException, XNIException { 536 return fDriver.next(); 537 } 538 539 // 540 // XMLComponent methods 541 // 542 543 /** 544 * Resets the component. The component can query the component manager 545 * about any features and properties that affect the operation of the 546 * component. 547 * 548 * @param componentManager The component manager. 549 * 550 * @throws SAXException Thrown by component on initialization error. 551 * For example, if a feature or property is 552 * required for the operation of the component, the 553 * component manager may throw a 554 * SAXNotRecognizedException or a 555 * SAXNotSupportedException. 556 */ 557 558 public void reset(XMLComponentManager componentManager) 559 throws XMLConfigurationException { 560 561 super.reset(componentManager); 562 563 // other settings 564 // fDocumentSystemId = null; 565 566 // sax features 567 //fAttributes.setNamespaces(fNamespaces); 568 569 // xerces features 570 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 571 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 572 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 573 574 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 575 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 576 (ExternalSubsetResolver) resolver : null; 577 578 //attribute 579 fReadingAttributes = false; 580 //xxx: external entities are supported in Xerces 581 // it would be good to define feature for this case 582 fSupportExternalEntities = true; 583 fReplaceEntityReferences = true; 584 fIsCoalesce = false; 585 586 // setup Driver 587 setScannerState(SCANNER_STATE_CONTENT); 588 setDriver(fContentDriver); 589 590 // JAXP 1.5 features and properties 591 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 592 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 593 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 594 595 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 596 597 resetCommon(); 598 //fEntityManager.test(); 599 } // reset(XMLComponentManager) 600 601 602 public void reset(PropertyManager propertyManager){ 603 604 super.reset(propertyManager); 605 606 // other settings 607 // fDocumentSystemId = null; 608 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 609 fNotifyBuiltInRefs = false ; 610 611 //fElementStack2.clear(); 612 //fReplaceEntityReferences = true; 613 //fSupportExternalEntities = true; 614 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 615 fReplaceEntityReferences = bo.booleanValue(); 616 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 617 fSupportExternalEntities = bo.booleanValue(); 618 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 619 if(cdata != null) 620 fReportCdataEvent = cdata.booleanValue() ; 621 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 622 if(coalesce != null) 623 fIsCoalesce = coalesce.booleanValue(); 624 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 625 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 626 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 627 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 628 // setup Driver 629 //we dont need to do this -- nb. 630 //setScannerState(SCANNER_STATE_CONTENT); 631 //setDriver(fContentDriver); 632 //fEntityManager.test(); 633 634 // JAXP 1.5 features and properties 635 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 636 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 637 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 638 639 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 640 resetCommon(); 641 } // reset(XMLComponentManager) 642 643 void resetCommon() { 644 // initialize vars 645 fMarkupDepth = 0; 646 fCurrentElement = null; 647 fElementStack.clear(); 648 fHasExternalDTD = false; 649 fStandaloneSet = false; 650 fStandalone = false; 651 fInScanContent = false; 652 //skipping algorithm 653 fShouldSkip = false; 654 fAdd = false; 655 fSkip = false; 656 657 fEntityStore = fEntityManager.getEntityStore(); 658 dtdGrammarUtil = null; 659 660 if (fSecurityManager != null) { 661 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 662 } else { 663 fElementAttributeLimit = 0; 664 } 665 fLimitAnalyzer = new XMLLimitAnalyzer(); 666 fEntityManager.setLimitAnalyzer(fLimitAnalyzer); 667 } 668 669 /** 670 * Returns a list of feature identifiers that are recognized by 671 * this component. This method may return null if no features 672 * are recognized by this component. 673 */ 674 public String[] getRecognizedFeatures() { 675 return (String[])(RECOGNIZED_FEATURES.clone()); 676 } // getRecognizedFeatures():String[] 677 678 /** 679 * Sets the state of a feature. This method is called by the component 680 * manager any time after reset when a feature changes state. 681 * <p> 682 * <strong>Note:</strong> Components should silently ignore features 683 * that do not affect the operation of the component. 684 * 685 * @param featureId The feature identifier. 686 * @param state The state of the feature. 687 * 688 * @throws SAXNotRecognizedException The component should not throw 689 * this exception. 690 * @throws SAXNotSupportedException The component should not throw 691 * this exception. 692 */ 693 public void setFeature(String featureId, boolean state) 694 throws XMLConfigurationException { 695 696 super.setFeature(featureId, state); 697 698 // Xerces properties 699 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 700 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 701 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 702 fNotifyBuiltInRefs = state; 703 } 704 } 705 706 } // setFeature(String,boolean) 707 708 /** 709 * Returns a list of property identifiers that are recognized by 710 * this component. This method may return null if no properties 711 * are recognized by this component. 712 */ 713 public String[] getRecognizedProperties() { 714 return (String[])(RECOGNIZED_PROPERTIES.clone()); 715 } // getRecognizedProperties():String[] 716 717 /** 718 * Sets the value of a property. This method is called by the component 719 * manager any time after reset when a property changes value. 720 * <p> 721 * <strong>Note:</strong> Components should silently ignore properties 722 * that do not affect the operation of the component. 723 * 724 * @param propertyId The property identifier. 725 * @param value The value of the property. 726 * 727 * @throws SAXNotRecognizedException The component should not throw 728 * this exception. 729 * @throws SAXNotSupportedException The component should not throw 730 * this exception. 731 */ 732 public void setProperty(String propertyId, Object value) 733 throws XMLConfigurationException { 734 735 super.setProperty(propertyId, value); 736 737 // Xerces properties 738 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 739 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 740 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 741 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 742 fEntityManager = (XMLEntityManager)value; 743 return; 744 } 745 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 746 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 747 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 748 (ExternalSubsetResolver) value : null; 749 return; 750 } 751 } 752 753 754 // Xerces properties 755 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 756 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 757 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 758 fEntityManager = (XMLEntityManager)value; 759 } 760 return; 761 } 762 763 //JAXP 1.5 properties 764 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 765 { 766 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 767 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 768 } 769 770 } // setProperty(String,Object) 771 772 /** 773 * Returns the default state for a feature, or null if this 774 * component does not want to report a default value for this 775 * feature. 776 * 777 * @param featureId The feature identifier. 778 * 779 * @since Xerces 2.2.0 780 */ 781 public Boolean getFeatureDefault(String featureId) { 782 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 783 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 784 return FEATURE_DEFAULTS[i]; 785 } 786 } 787 return null; 788 } // getFeatureDefault(String):Boolean 789 790 /** 791 * Returns the default state for a property, or null if this 792 * component does not want to report a default value for this 793 * property. 794 * 795 * @param propertyId The property identifier. 796 * 797 * @since Xerces 2.2.0 798 */ 799 public Object getPropertyDefault(String propertyId) { 800 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 801 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 802 return PROPERTY_DEFAULTS[i]; 803 } 804 } 805 return null; 806 } // getPropertyDefault(String):Object 807 808 // 809 // XMLDocumentSource methods 810 // 811 812 /** 813 * setDocumentHandler 814 * 815 * @param documentHandler 816 */ 817 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 818 fDocumentHandler = documentHandler; 819 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 820 } // setDocumentHandler(XMLDocumentHandler) 821 822 823 /** Returns the document handler */ 824 public XMLDocumentHandler getDocumentHandler(){ 825 return fDocumentHandler; 826 } 827 828 // 829 // XMLEntityHandler methods 830 // 831 832 /** 833 * This method notifies of the start of an entity. The DTD has the 834 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 835 * general entities are just specified by their name. 836 * 837 * @param name The name of the entity. 838 * @param identifier The resource identifier. 839 * @param encoding The auto-detected IANA encoding name of the entity 840 * stream. This value will be null in those situations 841 * where the entity encoding is not auto-detected (e.g. 842 * internal entities or a document entity that is 843 * parsed from a java.io.Reader). 844 * @param augs Additional information that may include infoset augmentations 845 * 846 * @throws XNIException Thrown by handler to signal an error. 847 */ 848 public void startEntity(String name, 849 XMLResourceIdentifier identifier, 850 String encoding, Augmentations augs) throws XNIException { 851 852 // keep track of this entity before fEntityDepth is increased 853 if (fEntityDepth == fEntityStack.length) { 854 int[] entityarray = new int[fEntityStack.length * 2]; 855 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 856 fEntityStack = entityarray; 857 } 858 fEntityStack[fEntityDepth] = fMarkupDepth; 859 860 super.startEntity(name, identifier, encoding, augs); 861 862 // WFC: entity declared in external subset in standalone doc 863 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 864 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 865 new Object[]{name}); 866 } 867 868 /** we are not calling the handlers yet.. */ 869 // call handler 870 if (fDocumentHandler != null && !fScanningAttribute) { 871 if (!name.equals("[xml]")) { 872 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 873 } 874 } 875 876 } // startEntity(String,XMLResourceIdentifier,String) 877 878 /** 879 * This method notifies the end of an entity. The DTD has the pseudo-name 880 * of "[dtd]" parameter entity names start with '%'; and general entities 881 * are just specified by their name. 882 * 883 * @param name The name of the entity. 884 * @param augs Additional information that may include infoset augmentations 885 * 886 * @throws XNIException Thrown by handler to signal an error. 887 */ 888 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 889 890 /** 891 * // flush possible pending output buffer - see scanContent 892 * if (fInScanContent && fStringBuffer.length != 0 893 * && fDocumentHandler != null) { 894 * fDocumentHandler.characters(fStringBuffer, null); 895 * fStringBuffer.length = 0; // make sure we know it's been flushed 896 * } 897 */ 898 super.endEntity(name, augs); 899 900 // make sure markup is properly balanced 901 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 902 reportFatalError("MarkupEntityMismatch", null); 903 } 904 905 /**/ 906 // call handler 907 if (fDocumentHandler != null && !fScanningAttribute) { 908 if (!name.equals("[xml]")) { 909 fDocumentHandler.endGeneralEntity(name, augs); 910 } 911 } 912 913 914 } // endEntity(String) 915 916 // 917 // Protected methods 918 // 919 920 // Driver factory methods 921 922 /** Creates a content Driver. */ 923 protected Driver createContentDriver() { 924 return new FragmentContentDriver(); 925 } // createContentDriver():Driver 926 927 // scanning methods 928 929 /** 930 * Scans an XML or text declaration. 931 * <p> 932 * <pre> 933 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 934 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 935 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 936 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 937 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 938 * | ('"' ('yes' | 'no') '"')) 939 * 940 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 941 * </pre> 942 * 943 * @param scanningTextDecl True if a text declaration is to 944 * be scanned instead of an XML 945 * declaration. 946 */ 947 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 948 throws IOException, XNIException { 949 950 // scan decl 951 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 952 fMarkupDepth--; 953 954 // pseudo-attribute values 955 String version = fStrings[0]; 956 String encoding = fStrings[1]; 957 String standalone = fStrings[2]; 958 fDeclaredEncoding = encoding; 959 // set standalone 960 fStandaloneSet = standalone != null; 961 fStandalone = fStandaloneSet && standalone.equals("yes"); 962 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 963 //but this information is only related with Document Entity. 964 fEntityManager.setStandalone(fStandalone); 965 966 967 // call handler 968 if (fDocumentHandler != null) { 969 if (scanningTextDecl) { 970 fDocumentHandler.textDecl(version, encoding, null); 971 } else { 972 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 973 } 974 } 975 976 if(version != null){ 977 fEntityScanner.setVersion(version); 978 fEntityScanner.setXMLVersion(version); 979 } 980 // set encoding on reader, only if encoding was not specified by the application explicitly 981 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 982 fEntityScanner.setEncoding(encoding); 983 } 984 985 } // scanXMLDeclOrTextDecl(boolean) 986 987 public String getPITarget(){ 988 return fPITarget ; 989 } 990 991 public XMLStringBuffer getPIData(){ 992 return fContentBuffer ; 993 } 994 995 //XXX: why not this function behave as per the state of the parser? 996 public XMLString getCharacterData(){ 997 if(fUsebuffer){ 998 return fContentBuffer ; 999 }else{ 1000 return fTempString; 1001 } 1002 1003 } 1004 1005 1006 /** 1007 * Scans a processing data. This is needed to handle the situation 1008 * where a document starts with a processing instruction whose 1009 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1010 * 1011 * @param target The PI target 1012 * @param data The XMLStringBuffer to fill in with the data 1013 */ 1014 protected void scanPIData(String target, XMLStringBuffer data) 1015 throws IOException, XNIException { 1016 1017 super.scanPIData(target, data); 1018 1019 //set the PI target and values 1020 fPITarget = target ; 1021 1022 fMarkupDepth--; 1023 1024 } // scanPIData(String) 1025 1026 /** 1027 * Scans a comment. 1028 * <p> 1029 * <pre> 1030 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1031 * </pre> 1032 * <p> 1033 * <strong>Note:</strong> Called after scanning past '<!--' 1034 */ 1035 protected void scanComment() throws IOException, XNIException { 1036 fContentBuffer.clear(); 1037 scanComment(fContentBuffer); 1038 //getTextCharacters can also be called for reading comments 1039 fUsebuffer = true; 1040 fMarkupDepth--; 1041 1042 } // scanComment() 1043 1044 //xxx value returned by this function may not remain valid if another event is scanned. 1045 public String getComment(){ 1046 return fContentBuffer.toString(); 1047 } 1048 1049 void addElement(String rawname){ 1050 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1051 //storing element raw name in a linear list of array 1052 fElementArray[fElementPointer] = rawname ; 1053 //storing elemnetPointer for particular element depth 1054 1055 if(DEBUG_SKIP_ALGORITHM){ 1056 StringBuffer sb = new StringBuffer() ; 1057 sb.append(" Storing element information ") ; 1058 sb.append(" fElementPointer = " + fElementPointer) ; 1059 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1060 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1061 System.out.println(sb.toString()) ; 1062 } 1063 1064 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1065 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1066 short column = storePointerForADepth(fElementPointer); 1067 if(column > 0){ 1068 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1069 //identity comparison shouldn't take much time and we can rely on this 1070 //since its guaranteed to have same object id for same string. 1071 if(rawname == fElementArray[pointer]){ 1072 fShouldSkip = true ; 1073 fLastPointerLocation = pointer ; 1074 //reset the things and return. 1075 resetPointer((short)fElementStack.fDepth , column) ; 1076 fElementArray[fElementPointer] = null ; 1077 return ; 1078 }else{ 1079 fShouldSkip = false ; 1080 } 1081 } 1082 } 1083 fElementPointer++ ; 1084 } 1085 } 1086 1087 1088 void resetPointer(short depth, short column){ 1089 fPointerInfo[depth] [column] = (short)0; 1090 } 1091 1092 //returns column information at which pointer was stored. 1093 short storePointerForADepth(short elementPointer){ 1094 short depth = (short) fElementStack.fDepth ; 1095 1096 //Stores element pointer locations at particular depth , only 4 pointer locations 1097 //are stored at particular depth for now. 1098 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1099 1100 if(canStore(depth, i)){ 1101 fPointerInfo[depth][i] = elementPointer ; 1102 if(DEBUG_SKIP_ALGORITHM){ 1103 StringBuffer sb = new StringBuffer() ; 1104 sb.append(" Pointer information ") ; 1105 sb.append(" fElementPointer = " + fElementPointer) ; 1106 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1107 sb.append(" column = " + i ) ; 1108 System.out.println(sb.toString()) ; 1109 } 1110 return i; 1111 } 1112 //else 1113 //pointer was not stored because we reached the limit 1114 } 1115 return -1 ; 1116 } 1117 1118 boolean canStore(short depth, short column){ 1119 //colum = 0 , means first element at particular depth 1120 //column = 1, means second element at particular depth 1121 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1122 return fPointerInfo[depth][column] == 0 ? true : false ; 1123 } 1124 1125 1126 short getElementPointer(short depth, short column){ 1127 //colum = 0 , means first element at particular depth 1128 //column = 1, means second element at particular depth 1129 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1130 return fPointerInfo[depth][column] ; 1131 } 1132 1133 //this function assumes that string passed is not null and skips 1134 //the following string from the buffer this makes sure 1135 boolean skipFromTheBuffer(String rawname) throws IOException{ 1136 if(fEntityScanner.skipString(rawname)){ 1137 char c = (char)fEntityScanner.peekChar() ; 1138 //If the start element was completely skipped we should encounter either ' '(space), 1139 //or '/' (in case of empty element) or '>' 1140 if( c == ' ' || c == '/' || c == '>'){ 1141 fElementRawname = rawname ; 1142 return true ; 1143 } else{ 1144 return false; 1145 } 1146 } else 1147 return false ; 1148 } 1149 1150 boolean skipQElement(String rawname) throws IOException{ 1151 1152 final int c = fEntityScanner.getChar(rawname.length()); 1153 //if this character is still valid element name -- this means string can't match 1154 if(XMLChar.isName(c)){ 1155 return false; 1156 }else{ 1157 return fEntityScanner.skipString(rawname); 1158 } 1159 } 1160 1161 protected boolean skipElement() throws IOException { 1162 1163 if(!fShouldSkip) return false ; 1164 1165 if(fLastPointerLocation != 0){ 1166 //Look at the next element stored in the array list.. we might just get a match. 1167 String rawname = fElementArray[fLastPointerLocation + 1] ; 1168 if(rawname != null && skipFromTheBuffer(rawname)){ 1169 fLastPointerLocation++ ; 1170 if(DEBUG_SKIP_ALGORITHM){ 1171 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1172 } 1173 return true ; 1174 } else{ 1175 //reset it back to zero... we haven't got the correct subset yet. 1176 fLastPointerLocation = 0 ; 1177 1178 } 1179 } 1180 //xxx: we can put some logic here as from what column it should start looking 1181 //for now we always start at 0 1182 //fallback to tolerant algorithm, it would look for differnt element stored at different 1183 //depth and get us the pointer location. 1184 return fShouldSkip && skipElement((short)0); 1185 1186 } 1187 1188 //start of the column at which it should try searching 1189 boolean skipElement(short column) throws IOException { 1190 short depth = (short)fElementStack.fDepth ; 1191 1192 if(depth > MAX_DEPTH_LIMIT){ 1193 return fShouldSkip = false ; 1194 } 1195 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1196 short pointer = getElementPointer(depth , i ) ; 1197 1198 if(pointer == 0){ 1199 return fShouldSkip = false ; 1200 } 1201 1202 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1203 if(DEBUG_SKIP_ALGORITHM){ 1204 System.out.println(); 1205 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1206 System.out.println(); 1207 } 1208 fLastPointerLocation = pointer ; 1209 return fShouldSkip = true ; 1210 } 1211 } 1212 return fShouldSkip = false ; 1213 } 1214 1215 /** 1216 * Scans a start element. This method will handle the binding of 1217 * namespace information and notifying the handler of the start 1218 * of the element. 1219 * <p> 1220 * <pre> 1221 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1222 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1223 * </pre> 1224 * <p> 1225 * <strong>Note:</strong> This method assumes that the leading 1226 * '<' character has been consumed. 1227 * <p> 1228 * <strong>Note:</strong> This method uses the fElementQName and 1229 * fAttributes variables. The contents of these variables will be 1230 * destroyed. The caller should copy important information out of 1231 * these variables before calling this method. 1232 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1233 * 1234 * @return True if element is empty. (i.e. It matches 1235 * production [44]. 1236 */ 1237 // fElementQName will have the details of element just read.. 1238 // fAttributes will have the details of all the attributes. 1239 protected boolean scanStartElement() 1240 throws IOException, XNIException { 1241 1242 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1243 //when skipping is true and no more elements should be added 1244 if(fSkip && !fAdd){ 1245 //get the stored element -- if everything goes right this should match the 1246 //token in the buffer 1247 1248 QName name = fElementStack.getNext(); 1249 1250 if(DEBUG_SKIP_ALGORITHM){ 1251 System.out.println("Trying to skip String = " + name.rawname); 1252 } 1253 1254 //Be conservative -- if skipping fails -- stop. 1255 fSkip = fEntityScanner.skipString(name.rawname); 1256 1257 if(fSkip){ 1258 if(DEBUG_SKIP_ALGORITHM){ 1259 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1260 } 1261 fElementStack.push(); 1262 fElementQName = name; 1263 }else{ 1264 //if skipping fails reposition the stack or fallback to normal way of processing 1265 fElementStack.reposition(); 1266 if(DEBUG_SKIP_ALGORITHM){ 1267 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1268 } 1269 } 1270 } 1271 1272 //we are still at the stage of adding elements 1273 //the elements were not matched or 1274 //fSkip is not set to true 1275 if(!fSkip || fAdd){ 1276 //get the next element from the stack 1277 fElementQName = fElementStack.nextElement(); 1278 // name 1279 if (fNamespaces) { 1280 fEntityScanner.scanQName(fElementQName); 1281 } else { 1282 String name = fEntityScanner.scanName(); 1283 fElementQName.setValues(null, name, name, null); 1284 } 1285 1286 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1287 if(DEBUG_SKIP_ALGORITHM){ 1288 if(fAdd){ 1289 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1290 } 1291 } 1292 1293 } 1294 1295 //when the elements are being added , we need to check if we are set for skipping the elements 1296 if(fAdd){ 1297 //this sets the value of fAdd variable 1298 fElementStack.matchElement(fElementQName); 1299 } 1300 1301 1302 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1303 fCurrentElement = fElementQName; 1304 1305 String rawname = fElementQName.rawname; 1306 1307 fEmptyElement = false; 1308 1309 fAttributes.removeAllAttributes(); 1310 1311 checkDepth(rawname); 1312 if(!seekCloseOfStartTag()){ 1313 fReadingAttributes = true; 1314 fAttributeCacheUsedCount =0; 1315 fStringBufferIndex =0; 1316 fAddDefaultAttr = true; 1317 do { 1318 scanAttribute(fAttributes); 1319 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1320 fAttributes.getLength() > fElementAttributeLimit){ 1321 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1322 "ElementAttributeLimit", 1323 new Object[]{rawname, fElementAttributeLimit }, 1324 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1325 } 1326 1327 } while (!seekCloseOfStartTag()); 1328 fReadingAttributes=false; 1329 } 1330 1331 if (fEmptyElement) { 1332 //decrease the markup depth.. 1333 fMarkupDepth--; 1334 1335 // check that this element was opened in the same entity 1336 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1337 reportFatalError("ElementEntityMismatch", 1338 new Object[]{fCurrentElement.rawname}); 1339 } 1340 // call handler 1341 if (fDocumentHandler != null) { 1342 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1343 } 1344 1345 //We should not be popping out the context here in endELement becaause the namespace context is still 1346 //valid when parser is at the endElement state. 1347 //if (fNamespaces) { 1348 // fNamespaceContext.popContext(); 1349 //} 1350 1351 //pop the element off the stack.. 1352 fElementStack.popElement(); 1353 1354 } else { 1355 1356 if(dtdGrammarUtil != null) 1357 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1358 if(fDocumentHandler != null){ 1359 //complete element and attributes are traversed in this function so we can send a callback 1360 //here. 1361 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1362 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1363 } 1364 } 1365 1366 1367 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1368 return fEmptyElement; 1369 1370 } // scanStartElement():boolean 1371 1372 /** 1373 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1374 * Characters are consumed. 1375 */ 1376 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1377 // spaces 1378 boolean sawSpace = fEntityScanner.skipSpaces(); 1379 1380 // end tag? 1381 final int c = fEntityScanner.peekChar(); 1382 if (c == '>') { 1383 fEntityScanner.scanChar(); 1384 return true; 1385 } else if (c == '/') { 1386 fEntityScanner.scanChar(); 1387 if (!fEntityScanner.skipChar('>')) { 1388 reportFatalError("ElementUnterminated", 1389 new Object[]{fElementQName.rawname}); 1390 } 1391 fEmptyElement = true; 1392 return true; 1393 } else if (!isValidNameStartChar(c) || !sawSpace) { 1394 reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); 1395 } 1396 1397 return false; 1398 } 1399 1400 public boolean hasAttributes(){ 1401 return fAttributes.getLength() > 0 ? true : false ; 1402 } 1403 1404 1405 /** 1406 * Scans an attribute. 1407 * <p> 1408 * <pre> 1409 * [41] Attribute ::= Name Eq AttValue 1410 * </pre> 1411 * <p> 1412 * <strong>Note:</strong> This method assumes that the next 1413 * character on the stream is the first character of the attribute 1414 * name. 1415 * <p> 1416 * <strong>Note:</strong> This method uses the fAttributeQName and 1417 * fQName variables. The contents of these variables will be 1418 * destroyed. 1419 * 1420 * @param attributes The attributes list for the scanned attribute. 1421 */ 1422 1423 /** 1424 * protected void scanAttribute(AttributeIteratorImpl attributes) 1425 * throws IOException, XNIException { 1426 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1427 * 1428 * 1429 * // name 1430 * if (fNamespaces) { 1431 * fEntityScanner.scanQName(fAttributeQName); 1432 * } 1433 * else { 1434 * String name = fEntityScanner.scanName(); 1435 * fAttributeQName.setValues(null, name, name, null); 1436 * } 1437 * 1438 * // equals 1439 * fEntityScanner.skipSpaces(); 1440 * if (!fEntityScanner.skipChar('=')) { 1441 * reportFatalError("EqRequiredInAttribute", 1442 * new Object[]{fAttributeQName.rawname}); 1443 * } 1444 * fEntityScanner.skipSpaces(); 1445 * 1446 * 1447 * // content 1448 * int oldLen = attributes.getLength(); 1449 */ 1450 /**xxx there is one check of duplicate attribute that has been removed. 1451 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1452 * 1453 * // WFC: Unique Att Spec 1454 * if (oldLen == attributes.getLength()) { 1455 * reportFatalError("AttributeNotUnique", 1456 * new Object[]{fCurrentElement.rawname, 1457 * fAttributeQName.rawname}); 1458 * } 1459 */ 1460 1461 /* 1462 //REVISIT: one more case needs to be included: external PE and standalone is no 1463 boolean isVC = fHasExternalDTD && !fStandalone; 1464 scanAttributeValue(fTempString, fTempString2, 1465 fAttributeQName.rawname, attributes, 1466 oldLen, isVC); 1467 1468 //attributes.setValue(oldLen, fTempString.toString()); 1469 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1470 //attributes.setSpecified(oldLen, true); 1471 1472 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1473 fAttributes.addAttribute(attribute); 1474 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1475 } // scanAttribute(XMLAttributes) 1476 1477 */ 1478 1479 /** return the attribute iterator implementation */ 1480 public XMLAttributesIteratorImpl getAttributeIterator(){ 1481 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1482 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1483 fAddDefaultAttr = false; 1484 } 1485 return fAttributes; 1486 } 1487 1488 /** return if standalone is set */ 1489 public boolean standaloneSet(){ 1490 return fStandaloneSet; 1491 } 1492 /** return if the doucment is standalone */ 1493 public boolean isStandAlone(){ 1494 return fStandalone ; 1495 } 1496 /** 1497 * Scans an attribute name value pair. 1498 * <p> 1499 * <pre> 1500 * [41] Attribute ::= Name Eq AttValue 1501 * </pre> 1502 * <p> 1503 * <strong>Note:</strong> This method assumes that the next 1504 * character on the stream is the first character of the attribute 1505 * name. 1506 * <p> 1507 * <strong>Note:</strong> This method uses the fAttributeQName and 1508 * fQName variables. The contents of these variables will be 1509 * destroyed. 1510 * 1511 * @param attributes The attributes list for the scanned attribute. 1512 */ 1513 1514 protected void scanAttribute(XMLAttributes attributes) 1515 throws IOException, XNIException { 1516 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1517 1518 // name 1519 if (fNamespaces) { 1520 fEntityScanner.scanQName(fAttributeQName); 1521 } else { 1522 String name = fEntityScanner.scanName(); 1523 fAttributeQName.setValues(null, name, name, null); 1524 } 1525 1526 // equals 1527 fEntityScanner.skipSpaces(); 1528 if (!fEntityScanner.skipChar('=')) { 1529 reportFatalError("EqRequiredInAttribute", 1530 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1531 } 1532 fEntityScanner.skipSpaces(); 1533 1534 int attIndex = 0 ; 1535 //REVISIT: one more case needs to be included: external PE and standalone is no 1536 boolean isVC = fHasExternalDTD && !fStandalone; 1537 //fTempString would store attribute value 1538 ///fTempString2 would store attribute non-normalized value 1539 1540 //this function doesn't use 'attIndex'. We are adding the attribute later 1541 //after we have figured out that current attribute is not namespace declaration 1542 //since scanAttributeValue doesn't use attIndex parameter therefore we 1543 //can safely add the attribute later.. 1544 XMLString tmpStr = getString(); 1545 1546 scanAttributeValue(tmpStr, fTempString2, 1547 fAttributeQName.rawname, attributes, 1548 attIndex, isVC); 1549 1550 // content 1551 int oldLen = attributes.getLength(); 1552 //if the attribute name already exists.. new value is replaced with old value 1553 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1554 1555 // WFC: Unique Att Spec 1556 //attributes count will be same if the current attribute name already exists for this element name. 1557 //this means there are two duplicate attributes. 1558 if (oldLen == attributes.getLength()) { 1559 reportFatalError("AttributeNotUnique", 1560 new Object[]{fCurrentElement.rawname, 1561 fAttributeQName.rawname}); 1562 } 1563 1564 //tmpString contains attribute value 1565 //we are passing null as the attribute value 1566 attributes.setValue(attIndex, null, tmpStr); 1567 1568 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1569 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1570 attributes.setSpecified(attIndex, true); 1571 1572 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1573 1574 } // scanAttribute(XMLAttributes) 1575 1576 /** 1577 * Scans element content. 1578 * 1579 * @return Returns the next character on the stream. 1580 */ 1581 //CHANGED: 1582 //EARLIER: scanContent() 1583 //NOW: scanContent(XMLStringBuffer) 1584 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1585 //this function appends the data to the buffer. 1586 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1587 //set the fTempString length to 0 before passing it on to scanContent 1588 //scanContent sets the correct co-ordinates as per the content read 1589 fTempString.length = 0; 1590 int c = fEntityScanner.scanContent(fTempString); 1591 content.append(fTempString); 1592 fTempString.length = 0; 1593 if (c == '\r') { 1594 // happens when there is the character reference 1595 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1596 fEntityScanner.scanChar(); 1597 content.append((char)c); 1598 c = -1; 1599 } else if (c == ']') { 1600 //fStringBuffer.clear(); 1601 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1602 content.append((char)fEntityScanner.scanChar()); 1603 // remember where we are in case we get an endEntity before we 1604 // could flush the buffer out - this happens when we're parsing an 1605 // entity which ends with a ] 1606 fInScanContent = true; 1607 // 1608 // We work on a single character basis to handle cases such as: 1609 // ']]]>' which we might otherwise miss. 1610 // 1611 if (fEntityScanner.skipChar(']')) { 1612 content.append(']'); 1613 while (fEntityScanner.skipChar(']')) { 1614 content.append(']'); 1615 } 1616 if (fEntityScanner.skipChar('>')) { 1617 reportFatalError("CDEndInContent", null); 1618 } 1619 } 1620 fInScanContent = false; 1621 c = -1; 1622 } 1623 if (fDocumentHandler != null && content.length > 0) { 1624 //fDocumentHandler.characters(content, null); 1625 } 1626 return c; 1627 1628 } // scanContent():int 1629 1630 1631 /** 1632 * Scans a CDATA section. 1633 * <p> 1634 * <strong>Note:</strong> This method uses the fTempString and 1635 * fStringBuffer variables. 1636 * 1637 * @param complete True if the CDATA section is to be scanned 1638 * completely. 1639 * 1640 * @return True if CDATA is completely scanned. 1641 */ 1642 //CHANGED: 1643 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1644 throws IOException, XNIException { 1645 1646 // call handler 1647 if (fDocumentHandler != null) { 1648 //fDocumentHandler.startCDATA(null); 1649 } 1650 1651 while (true) { 1652 //scanData will fill the contentBuffer 1653 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1654 break ; 1655 /** We dont need all this code if we pass ']]>' as delimeter.. 1656 * int brackets = 2; 1657 * while (fEntityScanner.skipChar(']')) { 1658 * brackets++; 1659 * } 1660 * 1661 * //When we find more than 2 square brackets 1662 * if (fDocumentHandler != null && brackets > 2) { 1663 * //we dont need to clear the buffer.. 1664 * //contentBuffer.clear(); 1665 * for (int i = 2; i < brackets; i++) { 1666 * contentBuffer.append(']'); 1667 * } 1668 * fDocumentHandler.characters(contentBuffer, null); 1669 * } 1670 * 1671 * if (fEntityScanner.skipChar('>')) { 1672 * break; 1673 * } 1674 * if (fDocumentHandler != null) { 1675 * //we dont need to clear the buffer now.. 1676 * //contentBuffer.clear(); 1677 * contentBuffer.append("]]"); 1678 * fDocumentHandler.characters(contentBuffer, null); 1679 * } 1680 **/ 1681 } else { 1682 int c = fEntityScanner.peekChar(); 1683 if (c != -1 && isInvalidLiteral(c)) { 1684 if (XMLChar.isHighSurrogate(c)) { 1685 //contentBuffer.clear(); 1686 //scan surrogates if any.... 1687 scanSurrogates(contentBuffer); 1688 } else { 1689 reportFatalError("InvalidCharInCDSect", 1690 new Object[]{Integer.toString(c,16)}); 1691 fEntityScanner.scanChar(); 1692 } 1693 } 1694 //by this time we have also read surrogate contents if any... 1695 if (fDocumentHandler != null) { 1696 //fDocumentHandler.characters(contentBuffer, null); 1697 } 1698 } 1699 } 1700 fMarkupDepth--; 1701 1702 if (fDocumentHandler != null && contentBuffer.length > 0) { 1703 //fDocumentHandler.characters(contentBuffer, null); 1704 } 1705 1706 // call handler 1707 if (fDocumentHandler != null) { 1708 //fDocumentHandler.endCDATA(null); 1709 } 1710 1711 return true; 1712 1713 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1714 1715 /** 1716 * Scans an end element. 1717 * <p> 1718 * <pre> 1719 * [42] ETag ::= '</' Name S? '>' 1720 * </pre> 1721 * <p> 1722 * <strong>Note:</strong> This method uses the fElementQName variable. 1723 * The contents of this variable will be destroyed. The caller should 1724 * copy the needed information out of this variable before calling 1725 * this method. 1726 * 1727 * @return The element depth. 1728 */ 1729 protected int scanEndElement() throws IOException, XNIException { 1730 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1731 1732 // pop context 1733 QName endElementName = fElementStack.popElement(); 1734 1735 String rawname = endElementName.rawname; 1736 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1737 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1738 //In scanners most of the time is consumed on checks done for XML characters, we can 1739 // optimize on it and avoid the checks done for endElement, 1740 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1741 1742 // this should work both for namespace processing true or false... 1743 1744 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1745 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1746 1747 if (!fEntityScanner.skipString(endElementName.rawname)) { 1748 reportFatalError("ETagRequired", new Object[]{rawname}); 1749 } 1750 1751 // end 1752 fEntityScanner.skipSpaces(); 1753 if (!fEntityScanner.skipChar('>')) { 1754 reportFatalError("ETagUnterminated", 1755 new Object[]{rawname}); 1756 } 1757 fMarkupDepth--; 1758 1759 //we have increased the depth for two markup "<" characters 1760 fMarkupDepth--; 1761 1762 // check that this element was opened in the same entity 1763 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1764 reportFatalError("ElementEntityMismatch", 1765 new Object[]{rawname}); 1766 } 1767 1768 //We should not be popping out the context here in endELement becaause the namespace context is still 1769 //valid when parser is at the endElement state. 1770 1771 //if (fNamespaces) { 1772 // fNamespaceContext.popContext(); 1773 //} 1774 1775 // call handler 1776 if (fDocumentHandler != null ) { 1777 //end element is scanned in this function so we can send a callback 1778 //here. 1779 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1780 1781 fDocumentHandler.endElement(endElementName, null); 1782 } 1783 if(dtdGrammarUtil != null) 1784 dtdGrammarUtil.endElement(endElementName); 1785 1786 return fMarkupDepth; 1787 1788 } // scanEndElement():int 1789 1790 /** 1791 * Scans a character reference. 1792 * <p> 1793 * <pre> 1794 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1795 * </pre> 1796 */ 1797 protected void scanCharReference() 1798 throws IOException, XNIException { 1799 1800 fStringBuffer2.clear(); 1801 int ch = scanCharReferenceValue(fStringBuffer2, null); 1802 fMarkupDepth--; 1803 if (ch != -1) { 1804 // call handler 1805 1806 if (fDocumentHandler != null) { 1807 if (fNotifyCharRefs) { 1808 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1809 } 1810 Augmentations augs = null; 1811 if (fValidation && ch <= 0x20) { 1812 if (fTempAugmentations != null) { 1813 fTempAugmentations.removeAllItems(); 1814 } 1815 else { 1816 fTempAugmentations = new AugmentationsImpl(); 1817 } 1818 augs = fTempAugmentations; 1819 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1820 } 1821 //xxx: How do we deal with this - how to return charReferenceValues 1822 //now this is being commented because this is taken care in scanDocument() 1823 //fDocumentHandler.characters(fStringBuffer2, null); 1824 if (fNotifyCharRefs) { 1825 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1826 } 1827 } 1828 } 1829 1830 } // scanCharReference() 1831 1832 1833 /** 1834 * Scans an entity reference. 1835 * 1836 * @return returns true if the new entity is started. If it was built-in entity 1837 * 'false' is returned. 1838 * @throws IOException Thrown if i/o error occurs. 1839 * @throws XNIException Thrown if handler throws exception upon 1840 * notification. 1841 */ 1842 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1843 String name = fEntityScanner.scanName(); 1844 if (name == null) { 1845 reportFatalError("NameRequiredInReference", null); 1846 return; 1847 } 1848 if (!fEntityScanner.skipChar(';')) { 1849 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1850 } 1851 if (fEntityStore.isUnparsedEntity(name)) { 1852 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1853 } 1854 fMarkupDepth--; 1855 fCurrentEntityName = name; 1856 1857 // handle built-in entities 1858 if (name == fAmpSymbol) { 1859 handleCharacter('&', fAmpSymbol, content); 1860 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1861 return ; 1862 } else if (name == fLtSymbol) { 1863 handleCharacter('<', fLtSymbol, content); 1864 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1865 return ; 1866 } else if (name == fGtSymbol) { 1867 handleCharacter('>', fGtSymbol, content); 1868 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1869 return ; 1870 } else if (name == fQuotSymbol) { 1871 handleCharacter('"', fQuotSymbol, content); 1872 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1873 return ; 1874 } else if (name == fAposSymbol) { 1875 handleCharacter('\'', fAposSymbol, content); 1876 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1877 return ; 1878 } 1879 1880 //1. if the entity is external and support to external entities is not required 1881 // 2. or entities should not be replaced 1882 //3. or if it is built in entity reference. 1883 boolean isEE = fEntityStore.isExternalEntity(name); 1884 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1885 fScannerState = SCANNER_STATE_REFERENCE; 1886 return ; 1887 } 1888 // start general entity 1889 if (!fEntityStore.isDeclaredEntity(name)) { 1890 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1891 if (!fSupportDTD && fReplaceEntityReferences) { 1892 reportFatalError("EntityNotDeclared", new Object[]{name}); 1893 return; 1894 } 1895 //REVISIT: one more case needs to be included: external PE and standalone is no 1896 if ( fHasExternalDTD && !fStandalone) { 1897 if (fValidation) 1898 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1899 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1900 } else 1901 reportFatalError("EntityNotDeclared", new Object[]{name}); 1902 } 1903 //we are starting the entity even if the entity was not declared 1904 //if that was the case it its taken care in XMLEntityManager.startEntity() 1905 //we immediately call the endEntity. Application gets to know if there was 1906 //any entity that was not declared. 1907 fEntityManager.startEntity(name, false); 1908 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1909 //setScannerState(SCANNER_STATE_CONTENT); 1910 //return true ; 1911 } // scanEntityReference() 1912 1913 // utility methods 1914 1915 /** 1916 * Check if the depth exceeds the maxElementDepth limit 1917 * @param elementName name of the current element 1918 */ 1919 void checkDepth(String elementName) { 1920 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1921 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1922 fSecurityManager.debugPrint(fLimitAnalyzer); 1923 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1924 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1925 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1926 "maxElementDepth"}); 1927 } 1928 } 1929 1930 /** 1931 * Calls document handler with a single character resulting from 1932 * built-in entity resolution. 1933 * 1934 * @param c 1935 * @param entity built-in name 1936 * @param XMLStringBuffer append the character to buffer 1937 * 1938 * we really dont need to call this function -- this function is only required when 1939 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1940 * calling this function to hanlde built-in entity reference. 1941 * 1942 */ 1943 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1944 foundBuiltInRefs = true; 1945 content.append(c); 1946 if (fDocumentHandler != null) { 1947 fSingleChar[0] = c; 1948 if (fNotifyBuiltInRefs) { 1949 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1950 } 1951 fTempString.setValues(fSingleChar, 0, 1); 1952 //fDocumentHandler.characters(fTempString, null); 1953 1954 if (fNotifyBuiltInRefs) { 1955 fDocumentHandler.endGeneralEntity(entity, null); 1956 } 1957 } 1958 } // handleCharacter(char) 1959 1960 // helper methods 1961 1962 /** 1963 * Sets the scanner state. 1964 * 1965 * @param state The new scanner state. 1966 */ 1967 protected final void setScannerState(int state) { 1968 1969 fScannerState = state; 1970 if (DEBUG_SCANNER_STATE) { 1971 System.out.print("### setScannerState: "); 1972 //System.out.print(fScannerState); 1973 System.out.print(getScannerStateName(state)); 1974 System.out.println(); 1975 } 1976 1977 } // setScannerState(int) 1978 1979 1980 /** 1981 * Sets the Driver. 1982 * 1983 * @param Driver The new Driver. 1984 */ 1985 protected final void setDriver(Driver driver) { 1986 fDriver = driver; 1987 if (DEBUG_DISPATCHER) { 1988 System.out.print("%%% setDriver: "); 1989 System.out.print(getDriverName(driver)); 1990 System.out.println(); 1991 } 1992 } 1993 1994 // 1995 // Private methods 1996 // 1997 1998 /** Returns the scanner state name. */ 1999 protected String getScannerStateName(int state) { 2000 2001 switch (state) { 2002 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 2003 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 2004 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 2005 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 2006 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 2007 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 2008 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 2009 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 2010 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 2011 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 2012 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 2013 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 2014 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 2015 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 2016 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 2017 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 2018 } 2019 2020 return "??? ("+state+')'; 2021 2022 } // getScannerStateName(int):String 2023 public String getEntityName(){ 2024 //return the cached name 2025 return fCurrentEntityName; 2026 } 2027 2028 /** Returns the driver name. */ 2029 public String getDriverName(Driver driver) { 2030 2031 if (DEBUG_DISPATCHER) { 2032 if (driver != null) { 2033 String name = driver.getClass().getName(); 2034 int index = name.lastIndexOf('.'); 2035 if (index != -1) { 2036 name = name.substring(index + 1); 2037 index = name.lastIndexOf('$'); 2038 if (index != -1) { 2039 name = name.substring(index + 1); 2040 } 2041 } 2042 return name; 2043 } 2044 } 2045 return "null"; 2046 2047 } // getDriverName():String 2048 2049 /** 2050 * Check the protocol used in the systemId against allowed protocols 2051 * 2052 * @param systemId the Id of the URI 2053 * @param allowedProtocols a list of allowed protocols separated by comma 2054 * @return the name of the protocol if rejected, null otherwise 2055 */ 2056 String checkAccess(String systemId, String allowedProtocols) throws IOException { 2057 String baseSystemId = fEntityScanner.getBaseSystemId(); 2058 String expandedSystemId = fEntityManager.expandSystemId(systemId, baseSystemId,fStrictURI); 2059 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 2060 } 2061 2062 // 2063 // Classes 2064 // 2065 2066 /** 2067 * @author Neeraj Bajaj, Sun Microsystems. 2068 */ 2069 protected static final class Element { 2070 2071 // 2072 // Data 2073 // 2074 2075 /** Symbol. */ 2076 public QName qname; 2077 2078 //raw name stored as characters 2079 public char[] fRawname; 2080 2081 /** The next Element entry. */ 2082 public Element next; 2083 2084 // 2085 // Constructors 2086 // 2087 2088 /** 2089 * Constructs a new Element from the given QName and next Element 2090 * reference. 2091 */ 2092 public Element(QName qname, Element next) { 2093 this.qname.setValues(qname); 2094 this.fRawname = qname.rawname.toCharArray(); 2095 this.next = next; 2096 } 2097 2098 } // class Element 2099 2100 /** 2101 * Element stack. 2102 * 2103 * @author Neeraj Bajaj, Sun Microsystems. 2104 */ 2105 protected class ElementStack2 { 2106 2107 // 2108 // Data 2109 // 2110 2111 /** The stack data. */ 2112 protected QName [] fQName = new QName[20]; 2113 2114 //Element depth 2115 protected int fDepth; 2116 //total number of elements 2117 protected int fCount; 2118 //current position 2119 protected int fPosition; 2120 //Mark refers to the position 2121 protected int fMark; 2122 2123 protected int fLastDepth ; 2124 2125 // 2126 // Constructors 2127 // 2128 2129 /** Default constructor. */ 2130 public ElementStack2() { 2131 for (int i = 0; i < fQName.length; i++) { 2132 fQName[i] = new QName(); 2133 } 2134 fMark = fPosition = 1; 2135 } // <init>() 2136 2137 public void resize(){ 2138 /** 2139 * int length = fElements.length; 2140 * Element [] temp = new Element[length * 2]; 2141 * System.arraycopy(fElements, 0, temp, 0, length); 2142 * fElements = temp; 2143 */ 2144 //resize QNames 2145 int oldLength = fQName.length; 2146 QName [] tmp = new QName[oldLength * 2]; 2147 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2148 fQName = tmp; 2149 2150 for (int i = oldLength; i < fQName.length; i++) { 2151 fQName[i] = new QName(); 2152 } 2153 2154 } 2155 2156 2157 // 2158 // Public methods 2159 // 2160 2161 /** Check if the element scanned during the start element 2162 *matches the stored element. 2163 * 2164 *@return true if the match suceeds. 2165 */ 2166 public boolean matchElement(QName element) { 2167 //last depth is the depth when last elemnt was pushed 2168 //if last depth is greater than current depth 2169 if(DEBUG_SKIP_ALGORITHM){ 2170 System.out.println("fLastDepth = " + fLastDepth); 2171 System.out.println("fDepth = " + fDepth); 2172 } 2173 boolean match = false; 2174 if(fLastDepth > fDepth && fDepth <= 2){ 2175 if(DEBUG_SKIP_ALGORITHM){ 2176 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2177 } 2178 if(element.rawname == fQName[fDepth].rawname){ 2179 fAdd = false; 2180 //mark this position 2181 //decrease the depth by 1 as arrays are 0 based 2182 fMark = fDepth - 1; 2183 //we found the match and from next element skipping will start, add 1 2184 fPosition = fMark + 1 ; 2185 match = true; 2186 //Once we get match decrease the count -- this was increased by nextElement() 2187 --fCount; 2188 if(DEBUG_SKIP_ALGORITHM){ 2189 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2190 System.out.println("fMark = " + fMark); 2191 System.out.println("fPosition = " + fPosition); 2192 System.out.println("fDepth = " + fDepth); 2193 System.out.println("fCount = " + fCount); 2194 } 2195 }else{ 2196 fAdd = true; 2197 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2198 } 2199 } 2200 //store the last depth 2201 fLastDepth = fDepth++; 2202 return match; 2203 } // pushElement(QName):QName 2204 2205 /** 2206 * This function doesn't increase depth. The function in this function is 2207 *broken down into two functions for efficiency. <@see>matchElement</see>. 2208 * This function just returns the pointer to the object and its values are set. 2209 * 2210 *@return QName reference to the next element in the list 2211 */ 2212 public QName nextElement() { 2213 2214 //if number of elements becomes equal to the length of array -- stop the skipping 2215 if (fCount == fQName.length) { 2216 fShouldSkip = false; 2217 fAdd = false; 2218 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2219 //xxx: this is not correct, we are returning the last element 2220 //this wont make any difference since flag has been set to 'false' 2221 return fQName[--fCount]; 2222 } 2223 if(DEBUG_SKIP_ALGORITHM){ 2224 System.out.println("fCount = " + fCount); 2225 } 2226 return fQName[fCount++]; 2227 2228 } 2229 2230 /** Note that this function is considerably different than nextElement() 2231 * This function just returns the previously stored elements 2232 */ 2233 public QName getNext(){ 2234 //when position reaches number of elements in the list.. 2235 //set the position back to mark, making it a circular linked list. 2236 if(fPosition == fCount){ 2237 fPosition = fMark; 2238 } 2239 return fQName[fPosition++]; 2240 } 2241 2242 /** returns the current depth 2243 */ 2244 public int popElement(){ 2245 return fDepth--; 2246 } 2247 2248 2249 /** Clears the stack without throwing away existing QName objects. */ 2250 public void clear() { 2251 fLastDepth = 0; 2252 fDepth = 0; 2253 fCount = 0 ; 2254 fPosition = fMark = 1; 2255 } // clear() 2256 2257 } // class ElementStack 2258 2259 /** 2260 * Element stack. This stack operates without synchronization, error 2261 * checking, and it re-uses objects instead of throwing popped items 2262 * away. 2263 * 2264 * @author Andy Clark, IBM 2265 */ 2266 protected class ElementStack { 2267 2268 // 2269 // Data 2270 // 2271 2272 /** The stack data. */ 2273 protected QName[] fElements; 2274 protected int [] fInt = new int[20]; 2275 2276 2277 //Element depth 2278 protected int fDepth; 2279 //total number of elements 2280 protected int fCount; 2281 //current position 2282 protected int fPosition; 2283 //Mark refers to the position 2284 protected int fMark; 2285 2286 protected int fLastDepth ; 2287 2288 // 2289 // Constructors 2290 // 2291 2292 /** Default constructor. */ 2293 public ElementStack() { 2294 fElements = new QName[20]; 2295 for (int i = 0; i < fElements.length; i++) { 2296 fElements[i] = new QName(); 2297 } 2298 } // <init>() 2299 2300 // 2301 // Public methods 2302 // 2303 2304 /** 2305 * Pushes an element on the stack. 2306 * <p> 2307 * <strong>Note:</strong> The QName values are copied into the 2308 * stack. In other words, the caller does <em>not</em> orphan 2309 * the element to the stack. Also, the QName object returned 2310 * is <em>not</em> orphaned to the caller. It should be 2311 * considered read-only. 2312 * 2313 * @param element The element to push onto the stack. 2314 * 2315 * @return Returns the actual QName object that stores the 2316 */ 2317 //XXX: THIS FUNCTION IS NOT USED 2318 public QName pushElement(QName element) { 2319 if (fDepth == fElements.length) { 2320 QName[] array = new QName[fElements.length * 2]; 2321 System.arraycopy(fElements, 0, array, 0, fDepth); 2322 fElements = array; 2323 for (int i = fDepth; i < fElements.length; i++) { 2324 fElements[i] = new QName(); 2325 } 2326 } 2327 fElements[fDepth].setValues(element); 2328 return fElements[fDepth++]; 2329 } // pushElement(QName):QName 2330 2331 2332 /** Note that this function is considerably different than nextElement() 2333 * This function just returns the previously stored elements 2334 */ 2335 public QName getNext(){ 2336 //when position reaches number of elements in the list.. 2337 //set the position back to mark, making it a circular linked list. 2338 if(fPosition == fCount){ 2339 fPosition = fMark; 2340 } 2341 //store the position of last opened tag at particular depth 2342 //fInt[++fDepth] = fPosition; 2343 if(DEBUG_SKIP_ALGORITHM){ 2344 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2345 } 2346 //return fElements[fPosition++]; 2347 return fElements[fPosition]; 2348 } 2349 2350 /** This function should be called only when element was skipped sucessfully. 2351 * 1. Increase the depth - because element was sucessfully skipped. 2352 *2. Store the position of the element token in array "last opened tag" at depth. 2353 *3. increase the position counter so as to point to the next element in the array 2354 */ 2355 public void push(){ 2356 2357 fInt[++fDepth] = fPosition++; 2358 } 2359 2360 /** Check if the element scanned during the start element 2361 *matches the stored element. 2362 * 2363 *@return true if the match suceeds. 2364 */ 2365 public boolean matchElement(QName element) { 2366 //last depth is the depth when last elemnt was pushed 2367 //if last depth is greater than current depth 2368 //if(DEBUG_SKIP_ALGORITHM){ 2369 // System.out.println("Check if the element " + element.rawname + " matches"); 2370 // System.out.println("fLastDepth = " + fLastDepth); 2371 // System.out.println("fDepth = " + fDepth); 2372 //} 2373 boolean match = false; 2374 if(fLastDepth > fDepth && fDepth <= 3){ 2375 if(DEBUG_SKIP_ALGORITHM){ 2376 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2377 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2378 } 2379 if(element.rawname == fElements[fDepth - 1].rawname){ 2380 fAdd = false; 2381 //mark this position 2382 //decrease the depth by 1 as arrays are 0 based 2383 fMark = fDepth - 1; 2384 //we found the match 2385 fPosition = fMark; 2386 match = true; 2387 //Once we get match decrease the count -- this was increased by nextElement() 2388 --fCount; 2389 if(DEBUG_SKIP_ALGORITHM){ 2390 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2391 System.out.println("fMark = " + fMark); 2392 System.out.println("fPosition = " + fPosition); 2393 System.out.println("fDepth = " + fDepth); 2394 System.out.println("fCount = " + fCount); 2395 System.out.println("---------MATCH SUCEEDED-----------------"); 2396 System.out.println(""); 2397 } 2398 }else{ 2399 fAdd = true; 2400 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2401 } 2402 } 2403 //store the position for the current depth 2404 //when we are adding the elements, when skipping 2405 //starts even then this should be tracked ie. when 2406 //calling getNext() 2407 if(match){ 2408 //from next element skipping will start, add 1 2409 fInt[fDepth] = fPosition++; 2410 } else{ 2411 if(DEBUG_SKIP_ALGORITHM){ 2412 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2413 } 2414 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2415 fInt[fDepth] = fCount - 1; 2416 } 2417 2418 //if number of elements becomes equal to the length of array -- stop the skipping 2419 //xxx: should we do "fCount == fInt.length" 2420 if (fCount == fElements.length) { 2421 fSkip = false; 2422 fAdd = false; 2423 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2424 reposition(); 2425 if(DEBUG_SKIP_ALGORITHM){ 2426 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2427 System.out.println("REPOSITIONING THE STACK"); 2428 System.out.println("-----------SKIPPING STOPPED----------"); 2429 System.out.println(""); 2430 } 2431 return false; 2432 } 2433 if(DEBUG_SKIP_ALGORITHM){ 2434 if(match){ 2435 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2436 }else{ 2437 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2438 } 2439 } 2440 //store the last depth 2441 fLastDepth = fDepth; 2442 return match; 2443 } // matchElement(QName):QName 2444 2445 2446 /** 2447 * Returns the next element on the stack. 2448 * 2449 * @return Returns the actual QName object. Callee should 2450 * use this object to store the details of next element encountered. 2451 */ 2452 public QName nextElement() { 2453 if(fSkip){ 2454 fDepth++; 2455 //boundary checks are done in matchElement() 2456 return fElements[fCount++]; 2457 } else if (fDepth == fElements.length) { 2458 QName[] array = new QName[fElements.length * 2]; 2459 System.arraycopy(fElements, 0, array, 0, fDepth); 2460 fElements = array; 2461 for (int i = fDepth; i < fElements.length; i++) { 2462 fElements[i] = new QName(); 2463 } 2464 } 2465 2466 return fElements[fDepth++]; 2467 2468 } // pushElement(QName):QName 2469 2470 2471 /** 2472 * Pops an element off of the stack by setting the values of 2473 * the specified QName. 2474 * <p> 2475 * <strong>Note:</strong> The object returned is <em>not</em> 2476 * orphaned to the caller. Therefore, the caller should consider 2477 * the object to be read-only. 2478 */ 2479 public QName popElement() { 2480 //return the same object that was pushed -- this would avoid 2481 //setting the values for every end element. 2482 //STRONG: this object is read only -- this object reference shouldn't be stored. 2483 if(fSkip || fAdd ){ 2484 if(DEBUG_SKIP_ALGORITHM){ 2485 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2486 System.out.println(""); 2487 } 2488 return fElements[fInt[fDepth--]]; 2489 } else{ 2490 if(DEBUG_SKIP_ALGORITHM){ 2491 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2492 } 2493 return fElements[--fDepth] ; 2494 } 2495 //element.setValues(fElements[--fDepth]); 2496 } // popElement(QName) 2497 2498 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2499 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2500 *as per the depth. 2501 */ 2502 public void reposition(){ 2503 for( int i = 2 ; i <= fDepth ; i++){ 2504 fElements[i-1] = fElements[fInt[i]]; 2505 } 2506 if(DEBUG_SKIP_ALGORITHM){ 2507 for( int i = 0 ; i < fDepth ; i++){ 2508 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2509 } 2510 } 2511 } 2512 2513 /** Clears the stack without throwing away existing QName objects. */ 2514 public void clear() { 2515 fDepth = 0; 2516 fLastDepth = 0; 2517 fCount = 0 ; 2518 fPosition = fMark = 1; 2519 2520 } // clear() 2521 2522 /** 2523 * This function is as a result of optimization done for endElement -- 2524 * we dont need to set the value for every end element encouterd. 2525 * For Well formedness checks we can have the same QName object that was pushed. 2526 * the values will be set only if application need to know about the endElement 2527 * -- neeraj.bajaj@sun.com 2528 */ 2529 2530 public QName getLastPoppedElement(){ 2531 return fElements[fDepth]; 2532 } 2533 } // class ElementStack 2534 2535 /** 2536 * Drives the parser to the next state/event on the input. Parser is guaranteed 2537 * to stop at the next state/event. 2538 * 2539 * Internally XML document is divided into several states. Each state represents 2540 * a sections of XML document. When this functions returns normally, it has read 2541 * the section of XML document and returns the state corresponding to section of 2542 * document which has been read. For optimizations, a particular driver 2543 * can read ahead of the section of document (state returned) just read and 2544 * can maintain a different internal state. 2545 * 2546 * 2547 * @author Neeraj Bajaj, Sun Microsystems 2548 */ 2549 protected interface Driver { 2550 2551 2552 /** 2553 * Drives the parser to the next state/event on the input. Parser is guaranteed 2554 * to stop at the next state/event. 2555 * 2556 * Internally XML document is divided into several states. Each state represents 2557 * a sections of XML document. When this functions returns normally, it has read 2558 * the section of XML document and returns the state corresponding to section of 2559 * document which has been read. For optimizations, a particular driver 2560 * can read ahead of the section of document (state returned) just read and 2561 * can maintain a different internal state. 2562 * 2563 * @return state representing the section of document just read. 2564 * 2565 * @throws IOException Thrown on i/o error. 2566 * @throws XNIException Thrown on parse error. 2567 */ 2568 2569 public int next() throws IOException, XNIException; 2570 2571 } // interface Driver 2572 2573 /** 2574 * Driver to handle content scanning. This driver is capable of reading 2575 * the fragment of XML document. When it has finished reading fragment 2576 * of XML documents, it can pass the job of reading to another driver. 2577 * 2578 * This class has been modified as per the new design which is more suited to 2579 * efficiently build pull parser. Lot of performance improvements have been done and 2580 * the code has been added to support stax functionality/features. 2581 * 2582 * @author Neeraj Bajaj, Sun Microsystems 2583 * 2584 * 2585 * @author Andy Clark, IBM 2586 * @author Eric Ye, IBM 2587 */ 2588 protected class FragmentContentDriver 2589 implements Driver { 2590 2591 // 2592 // Driver methods 2593 // 2594 private boolean fContinueDispatching = true; 2595 private boolean fScanningForMarkup = true; 2596 2597 /** 2598 * decides the appropriate state of the parser 2599 */ 2600 private void startOfMarkup() throws IOException { 2601 fMarkupDepth++; 2602 final int ch = fEntityScanner.peekChar(); 2603 2604 switch(ch){ 2605 case '?' :{ 2606 setScannerState(SCANNER_STATE_PI); 2607 fEntityScanner.skipChar(ch); 2608 break; 2609 } 2610 case '!' :{ 2611 fEntityScanner.skipChar(ch); 2612 if (fEntityScanner.skipChar('-')) { 2613 if (!fEntityScanner.skipChar('-')) { 2614 reportFatalError("InvalidCommentStart", 2615 null); 2616 } 2617 setScannerState(SCANNER_STATE_COMMENT); 2618 } else if (fEntityScanner.skipString(cdata)) { 2619 setScannerState(SCANNER_STATE_CDATA ); 2620 } else if (!scanForDoctypeHook()) { 2621 reportFatalError("MarkupNotRecognizedInContent", 2622 null); 2623 } 2624 break; 2625 } 2626 case '/' :{ 2627 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2628 fEntityScanner.skipChar(ch); 2629 break; 2630 } 2631 default :{ 2632 if (isValidNameStartChar(ch)) { 2633 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2634 } else { 2635 reportFatalError("MarkupNotRecognizedInContent", 2636 null); 2637 } 2638 } 2639 } 2640 2641 }//startOfMarkup 2642 2643 private void startOfContent() throws IOException { 2644 if (fEntityScanner.skipChar('<')) { 2645 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2646 } else if (fEntityScanner.skipChar('&')) { 2647 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2648 } else { 2649 //element content is there.. 2650 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2651 } 2652 }//startOfContent 2653 2654 2655 /** 2656 * 2657 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2658 * At any point of time when in doubt over the current state of the parser, the state should be 2659 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2660 * the parser to one of its sub state. 2661 * sub states are defined in the parser on the basis of different XML component like 2662 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2663 * These sub states help the parser to have fine control over the parsing. These are the 2664 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2665 * decided if paresr needs to stop at next milepost ?? 2666 * 2667 */ 2668 public void decideSubState() throws IOException { 2669 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2670 2671 switch (fScannerState) { 2672 2673 case SCANNER_STATE_CONTENT: { 2674 startOfContent() ; 2675 break; 2676 } 2677 2678 case SCANNER_STATE_START_OF_MARKUP: { 2679 startOfMarkup() ; 2680 break; 2681 } 2682 } 2683 } 2684 }//decideSubState 2685 2686 /** 2687 * Drives the parser to the next state/event on the input. Parser is guaranteed 2688 * to stop at the next state/event. Internally XML document 2689 * is divided into several states. Each state represents a sections of XML 2690 * document. When this functions returns normally, it has read the section 2691 * of XML document and returns the state corresponding to section of 2692 * document which has been read. For optimizations, a particular driver 2693 * can read ahead of the section of document (state returned) just read and 2694 * can maintain a different internal state. 2695 * 2696 * State returned corresponds to Stax states. 2697 * 2698 * @return state representing the section of document just read. 2699 * 2700 * @throws IOException Thrown on i/o error. 2701 * @throws XNIException Thrown on parse error. 2702 */ 2703 2704 public int next() throws IOException, XNIException { 2705 while (true) { 2706 try { 2707 if(DEBUG_NEXT){ 2708 System.out.println("NOW IN FragmentContentDriver"); 2709 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2710 } 2711 2712 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2713 //decideSubState. 2714 2715 switch (fScannerState) { 2716 case SCANNER_STATE_CONTENT: { 2717 final int ch = fEntityScanner.peekChar(); 2718 if (ch == '<') { 2719 fEntityScanner.scanChar(); 2720 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2721 } else if (ch == '&') { 2722 fEntityScanner.scanChar(); 2723 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2724 break; 2725 } else { 2726 //element content is there.. 2727 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2728 break; 2729 } 2730 } 2731 2732 case SCANNER_STATE_START_OF_MARKUP: { 2733 startOfMarkup(); 2734 break; 2735 }//case: SCANNER_STATE_START_OF_MARKUP 2736 2737 }//end of switch 2738 //decideSubState() ; 2739 2740 //do some special handling if isCoalesce is set to true. 2741 if(fIsCoalesce){ 2742 fUsebuffer = true ; 2743 //if the last section was character data 2744 if(fLastSectionWasCharacterData){ 2745 2746 //if we dont encounter any CDATA or ENTITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2747 //return the last scanned charactrer data. 2748 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2749 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2750 fLastSectionWasCharacterData = false; 2751 return XMLEvent.CHARACTERS; 2752 } 2753 }//if last section was CDATA or ENTITY REFERENCE 2754 //xxx: there might be another entity reference or CDATA after this 2755 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2756 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2757 //and current state is not SCANNER_STATE_CHARACTER_DATA 2758 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2759 //this means there is nothing more to be coalesced. 2760 //return the CHARACTERS event. 2761 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2762 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2763 2764 fLastSectionWasCData = false; 2765 fLastSectionWasEntityReference = false; 2766 return XMLEvent.CHARACTERS; 2767 } 2768 } 2769 } 2770 2771 2772 if(DEBUG_NEXT){ 2773 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2774 } 2775 2776 switch(fScannerState){ 2777 2778 case XMLEvent.START_DOCUMENT : 2779 return XMLEvent.START_DOCUMENT; 2780 2781 case SCANNER_STATE_START_ELEMENT_TAG :{ 2782 2783 //xxx this function returns true when element is empty.. can be linked to end element event. 2784 //returns true if the element is empty 2785 fEmptyElement = scanStartElement() ; 2786 //if the element is empty the next event is "end element" 2787 if(fEmptyElement){ 2788 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2789 }else{ 2790 //set the next possible state 2791 setScannerState(SCANNER_STATE_CONTENT); 2792 } 2793 return XMLEvent.START_ELEMENT ; 2794 } 2795 2796 case SCANNER_STATE_CHARACTER_DATA: { 2797 if(DEBUG_COALESCE){ 2798 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2799 System.out.println("fIsCoalesce = " + fIsCoalesce); 2800 } 2801 //if last section was either entity reference or cdata or character data we should be using buffer 2802 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2803 2804 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2805 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2806 fLastSectionWasEntityReference = false; 2807 fLastSectionWasCData = false; 2808 fLastSectionWasCharacterData = true ; 2809 fUsebuffer = true; 2810 }else{ 2811 //clear the buffer 2812 fContentBuffer.clear(); 2813 } 2814 2815 //set the fTempString length to 0 before passing it on to scanContent 2816 //scanContent sets the correct co-ordinates as per the content read 2817 fTempString.length = 0; 2818 int c = fEntityScanner.scanContent(fTempString); 2819 if(DEBUG){ 2820 System.out.println("fTempString = " + fTempString); 2821 } 2822 if(fEntityScanner.skipChar('<')){ 2823 //check if we have reached end of element 2824 if(fEntityScanner.skipChar('/')){ 2825 //increase the mark up depth 2826 fMarkupDepth++; 2827 fLastSectionWasCharacterData = false; 2828 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2829 //check if its start of new element 2830 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2831 fMarkupDepth++; 2832 fLastSectionWasCharacterData = false; 2833 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2834 }else{ 2835 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2836 //there can be cdata ahead if coalesce is true we should call again 2837 if(fIsCoalesce){ 2838 fUsebuffer = true; 2839 fLastSectionWasCharacterData = true; 2840 fContentBuffer.append(fTempString); 2841 fTempString.length = 0; 2842 continue; 2843 } 2844 } 2845 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2846 if(fUsebuffer){ 2847 fContentBuffer.append(fTempString); 2848 fTempString.length = 0; 2849 } 2850 if(DEBUG){ 2851 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2852 } 2853 //check limit before returning event 2854 checkLimit(fContentBuffer); 2855 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2856 if(DEBUG)System.out.println("Return SPACE EVENT"); 2857 return XMLEvent.SPACE; 2858 }else 2859 return XMLEvent.CHARACTERS; 2860 2861 } else{ 2862 fUsebuffer = true ; 2863 if(DEBUG){ 2864 System.out.println("fContentBuffer = " + fContentBuffer); 2865 System.out.println("fTempString = " + fTempString); 2866 } 2867 fContentBuffer.append(fTempString); 2868 fTempString.length = 0; 2869 } 2870 if (c == '\r') { 2871 if(DEBUG){ 2872 System.out.println("'\r' character found"); 2873 } 2874 // happens when there is the character reference 2875 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2876 fEntityScanner.scanChar(); 2877 fUsebuffer = true; 2878 fContentBuffer.append((char)c); 2879 c = -1 ; 2880 } else if (c == ']') { 2881 //fStringBuffer.clear(); 2882 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2883 fUsebuffer = true; 2884 fContentBuffer.append((char)fEntityScanner.scanChar()); 2885 // remember where we are in case we get an endEntity before we 2886 // could flush the buffer out - this happens when we're parsing an 2887 // entity which ends with a ] 2888 fInScanContent = true; 2889 2890 // We work on a single character basis to handle cases such as: 2891 // ']]]>' which we might otherwise miss. 2892 // 2893 if (fEntityScanner.skipChar(']')) { 2894 fContentBuffer.append(']'); 2895 while (fEntityScanner.skipChar(']')) { 2896 fContentBuffer.append(']'); 2897 } 2898 if (fEntityScanner.skipChar('>')) { 2899 reportFatalError("CDEndInContent", null); 2900 } 2901 } 2902 c = -1 ; 2903 fInScanContent = false; 2904 } 2905 2906 do{ 2907 //xxx: we should be using only one buffer.. 2908 // we need not to grow the buffer only when isCoalesce() is not true; 2909 2910 if (c == '<') { 2911 fEntityScanner.scanChar(); 2912 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2913 break; 2914 }//xxx what should be the behavior if entity reference is present in the content ? 2915 else if (c == '&') { 2916 fEntityScanner.scanChar(); 2917 setScannerState(SCANNER_STATE_REFERENCE); 2918 break; 2919 }///xxx since this part is also characters, it should be merged... 2920 else if (c != -1 && isInvalidLiteral(c)) { 2921 if (XMLChar.isHighSurrogate(c)) { 2922 // special case: surrogates 2923 scanSurrogates(fContentBuffer) ; 2924 setScannerState(SCANNER_STATE_CONTENT); 2925 } else { 2926 reportFatalError("InvalidCharInContent", 2927 new Object[] { 2928 Integer.toString(c, 16)}); 2929 fEntityScanner.scanChar(); 2930 } 2931 break; 2932 } 2933 //xxx: scanContent also gives character callback. 2934 c = scanContent(fContentBuffer) ; 2935 //we should not be iterating again if fIsCoalesce is not set to true 2936 2937 if(!fIsCoalesce){ 2938 setScannerState(SCANNER_STATE_CONTENT); 2939 break; 2940 } 2941 2942 }while(true); 2943 2944 //if (fDocumentHandler != null) { 2945 // fDocumentHandler.characters(fContentBuffer, null); 2946 //} 2947 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2948 //if fIsCoalesce is true there might be more data so call fDriver.next() 2949 if(fIsCoalesce){ 2950 fLastSectionWasCharacterData = true ; 2951 continue; 2952 }else{ 2953 //check limit before returning event 2954 checkLimit(fContentBuffer); 2955 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2956 if(DEBUG)System.out.println("Return SPACE EVENT"); 2957 return XMLEvent.SPACE; 2958 } else 2959 return XMLEvent.CHARACTERS ; 2960 } 2961 } 2962 2963 case SCANNER_STATE_END_ELEMENT_TAG :{ 2964 if(fEmptyElement){ 2965 //set it back to false. 2966 fEmptyElement = false; 2967 setScannerState(SCANNER_STATE_CONTENT); 2968 //check the case when there is comment after single element document 2969 //<foo/> and some comment after this 2970 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2971 2972 } else if(scanEndElement() == 0) { 2973 //It is last element of the document 2974 if (elementDepthIsZeroHook()) { 2975 //if element depth is zero , it indicates the end of the document 2976 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2977 //xxx understand this point once again.. 2978 return XMLEvent.END_ELEMENT ; 2979 } 2980 2981 } 2982 setScannerState(SCANNER_STATE_CONTENT); 2983 return XMLEvent.END_ELEMENT ; 2984 } 2985 2986 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2987 scanComment(); 2988 setScannerState(SCANNER_STATE_CONTENT); 2989 return XMLEvent.COMMENT; 2990 //break; 2991 } 2992 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2993 //clear the buffer first 2994 fContentBuffer.clear() ; 2995 //xxx: which buffer should be passed. Ideally we shouldn't have 2996 //more than two buffers -- 2997 //xxx: where should we add the switch for buffering. 2998 scanPI(fContentBuffer); 2999 setScannerState(SCANNER_STATE_CONTENT); 3000 return XMLEvent.PROCESSING_INSTRUCTION; 3001 //break; 3002 } 3003 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 3004 //xxx: What if CDATA is the first event 3005 //<foo><![CDATA[hello<><>]]>append</foo> 3006 3007 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 3008 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3009 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3010 fLastSectionWasCData = true ; 3011 fLastSectionWasEntityReference = false; 3012 fLastSectionWasCharacterData = false; 3013 }//if we dont need to coalesce clear the buffer 3014 else{ 3015 fContentBuffer.clear(); 3016 } 3017 fUsebuffer = true; 3018 //CDATA section is completely read in all the case. 3019 scanCDATASection(fContentBuffer , true); 3020 setScannerState(SCANNER_STATE_CONTENT); 3021 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 3022 //and just call fDispatche.next(). Since we have set the scanner state to 3023 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 3024 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 3025 //2. Check if application has set for reporting CDATA event 3026 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 3027 //return the cdata event as characters. 3028 if(fIsCoalesce){ 3029 fLastSectionWasCData = true ; 3030 //there might be more data to coalesce. 3031 continue; 3032 }else if(fReportCdataEvent){ 3033 return XMLEvent.CDATA; 3034 } else{ 3035 return XMLEvent.CHARACTERS; 3036 } 3037 } 3038 3039 case SCANNER_STATE_REFERENCE :{ 3040 fMarkupDepth++; 3041 foundBuiltInRefs = false; 3042 3043 //we should not clear the buffer only when the last state was either CDATA or 3044 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3045 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3046 //fLastSectionWasEntityReference or fLastSectionWasCData are only 3047 //used when fIsCoalesce is set to true. 3048 fLastSectionWasEntityReference = true ; 3049 fLastSectionWasCData = false; 3050 fLastSectionWasCharacterData = false; 3051 }//if we dont need to coalesce clear the buffer 3052 else{ 3053 fContentBuffer.clear(); 3054 } 3055 fUsebuffer = true ; 3056 //take care of character reference 3057 if (fEntityScanner.skipChar('#')) { 3058 scanCharReferenceValue(fContentBuffer, null); 3059 fMarkupDepth--; 3060 if(!fIsCoalesce){ 3061 setScannerState(SCANNER_STATE_CONTENT); 3062 return XMLEvent.CHARACTERS; 3063 } 3064 } else { 3065 // this function also starts new entity 3066 scanEntityReference(fContentBuffer); 3067 //if there was built-in entity reference & coalesce is not true 3068 //return CHARACTERS 3069 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3070 setScannerState(SCANNER_STATE_CONTENT); 3071 return XMLEvent.CHARACTERS; 3072 } 3073 3074 //if there was a text declaration, call next() it will be taken care. 3075 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3076 fLastSectionWasEntityReference = true ; 3077 continue; 3078 } 3079 3080 if(fScannerState == SCANNER_STATE_REFERENCE){ 3081 setScannerState(SCANNER_STATE_CONTENT); 3082 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3083 // Skip the entity reference, we don't care 3084 continue; 3085 } 3086 return XMLEvent.ENTITY_REFERENCE; 3087 } 3088 } 3089 //Wether it was character reference, entity reference or built-in entity 3090 //set the next possible state to SCANNER_STATE_CONTENT 3091 setScannerState(SCANNER_STATE_CONTENT); 3092 fLastSectionWasEntityReference = true ; 3093 continue; 3094 } 3095 3096 case SCANNER_STATE_TEXT_DECL: { 3097 // scan text decl 3098 if (fEntityScanner.skipString("<?xml")) { 3099 fMarkupDepth++; 3100 // NOTE: special case where entity starts with a PI 3101 // whose name starts with "xml" (e.g. "xmlfoo") 3102 if (isValidNameChar(fEntityScanner.peekChar())) { 3103 fStringBuffer.clear(); 3104 fStringBuffer.append("xml"); 3105 3106 if (fNamespaces) { 3107 while (isValidNCName(fEntityScanner.peekChar())) { 3108 fStringBuffer.append((char)fEntityScanner.scanChar()); 3109 } 3110 } else { 3111 while (isValidNameChar(fEntityScanner.peekChar())) { 3112 fStringBuffer.append((char)fEntityScanner.scanChar()); 3113 } 3114 } 3115 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3116 fContentBuffer.clear(); 3117 scanPIData(target, fContentBuffer); 3118 } 3119 3120 // standard text declaration 3121 else { 3122 //xxx: this function gives callback 3123 scanXMLDeclOrTextDecl(true); 3124 } 3125 } 3126 // now that we've straightened out the readers, we can read in chunks: 3127 fEntityManager.fCurrentEntity.mayReadChunks = true; 3128 setScannerState(SCANNER_STATE_CONTENT); 3129 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3130 //it seems we have to careful when to allow function issue a callback 3131 //and when to allow adapter issue a callback. 3132 continue; 3133 } 3134 3135 3136 case SCANNER_STATE_ROOT_ELEMENT: { 3137 if (scanRootElementHook()) { 3138 fEmptyElement = true; 3139 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3140 return XMLEvent.START_ELEMENT; 3141 } 3142 setScannerState(SCANNER_STATE_CONTENT); 3143 return XMLEvent.START_ELEMENT ; 3144 } 3145 case SCANNER_STATE_CHAR_REFERENCE : { 3146 fContentBuffer.clear(); 3147 scanCharReferenceValue(fContentBuffer, null); 3148 fMarkupDepth--; 3149 setScannerState(SCANNER_STATE_CONTENT); 3150 return XMLEvent.CHARACTERS; 3151 } 3152 default: 3153 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3154 3155 }//switch 3156 } 3157 // premature end of file 3158 catch (EOFException e) { 3159 endOfFileHook(e); 3160 return -1; 3161 } 3162 } //while loop 3163 }//next 3164 3165 /** 3166 * Add the count of the content buffer and check if the accumulated 3167 * value exceeds the limit 3168 * @param buffer content buffer 3169 */ 3170 protected void checkLimit(XMLStringBuffer buffer) { 3171 if (fLimitAnalyzer.isTracking(fCurrentEntityName)) { 3172 fLimitAnalyzer.addValue(Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntityName, buffer.length); 3173 if (fSecurityManager.isOverLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 3174 fSecurityManager.debugPrint(fLimitAnalyzer); 3175 reportFatalError("MaxEntitySizeLimit", new Object[]{fCurrentEntityName, 3176 fLimitAnalyzer.getValue(Limit.GENERAL_ENTITY_SIZE_LIMIT), 3177 fSecurityManager.getLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT), 3178 fSecurityManager.getStateLiteral(Limit.GENERAL_ENTITY_SIZE_LIMIT)}); 3179 } 3180 if (fSecurityManager.isOverLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 3181 fSecurityManager.debugPrint(fLimitAnalyzer); 3182 reportFatalError("TotalEntitySizeLimit", 3183 new Object[]{fLimitAnalyzer.getTotalValue(Limit.TOTAL_ENTITY_SIZE_LIMIT), 3184 fSecurityManager.getLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT), 3185 fSecurityManager.getStateLiteral(Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 3186 } 3187 } 3188 } 3189 3190 // 3191 // Protected methods 3192 // 3193 3194 // hooks 3195 3196 // NOTE: These hook methods are added so that the full document 3197 // scanner can share the majority of code with this class. 3198 3199 /** 3200 * Scan for DOCTYPE hook. This method is a hook for subclasses 3201 * to add code to handle scanning for a the "DOCTYPE" string 3202 * after the string "<!" has been scanned. 3203 * 3204 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3205 * was not scanned. 3206 */ 3207 protected boolean scanForDoctypeHook() 3208 throws IOException, XNIException { 3209 return false; 3210 } // scanForDoctypeHook():boolean 3211 3212 /** 3213 * Element depth iz zero. This methos is a hook for subclasses 3214 * to add code to handle when the element depth hits zero. When 3215 * scanning a document fragment, an element depth of zero is 3216 * normal. However, when scanning a full XML document, the 3217 * scanner must handle the trailing miscellanous section of 3218 * the document after the end of the document's root element. 3219 * 3220 * @return True if the caller should stop and return true which 3221 * allows the scanner to switch to a new scanning 3222 * driver. A return value of false indicates that 3223 * the content driver should continue as normal. 3224 */ 3225 protected boolean elementDepthIsZeroHook() 3226 throws IOException, XNIException { 3227 return false; 3228 } // elementDepthIsZeroHook():boolean 3229 3230 /** 3231 * Scan for root element hook. This method is a hook for 3232 * subclasses to add code that handles scanning for the root 3233 * element. When scanning a document fragment, there is no 3234 * "root" element. However, when scanning a full XML document, 3235 * the scanner must handle the root element specially. 3236 * 3237 * @return True if the caller should stop and return true which 3238 * allows the scanner to switch to a new scanning 3239 * driver. A return value of false indicates that 3240 * the content driver should continue as normal. 3241 */ 3242 protected boolean scanRootElementHook() 3243 throws IOException, XNIException { 3244 return false; 3245 } // scanRootElementHook():boolean 3246 3247 /** 3248 * End of file hook. This method is a hook for subclasses to 3249 * add code that handles the end of file. The end of file in 3250 * a document fragment is OK if the markup depth is zero. 3251 * However, when scanning a full XML document, an end of file 3252 * is always premature. 3253 */ 3254 protected void endOfFileHook(EOFException e) 3255 throws IOException, XNIException { 3256 3257 // NOTE: An end of file is only only an error if we were 3258 // in the middle of scanning some markup. -Ac 3259 if (fMarkupDepth != 0) { 3260 reportFatalError("PrematureEOF", null); 3261 } 3262 3263 } // endOfFileHook() 3264 3265 } // class FragmentContentDriver 3266 3267 static void pr(String str) { 3268 System.out.println(str) ; 3269 } 3270 3271 protected boolean fUsebuffer ; 3272 3273 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3274 * maintained for attributes. 3275 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3276 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3277 * XMLString. 3278 * 3279 * @return XMLString XMLString used to store an attribute value. 3280 */ 3281 3282 protected XMLString getString(){ 3283 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3284 return (XMLString)attributeValueCache.get(fAttributeCacheUsedCount++); 3285 } else{ 3286 XMLString str = new XMLString(); 3287 fAttributeCacheUsedCount++; 3288 attributeValueCache.add(str); 3289 return str; 3290 } 3291 } 3292 3293 /** 3294 * Implements XMLBufferListener interface. 3295 */ 3296 3297 public void refresh(){ 3298 refresh(0); 3299 } 3300 3301 /** 3302 * receives callbacks from {@link XMLEntityReader } when buffer 3303 * is being changed. 3304 * @param refreshPosition 3305 */ 3306 public void refresh(int refreshPosition){ 3307 //If you are reading attributes and you got a callback 3308 //cache available attributes. 3309 if(fReadingAttributes){ 3310 fAttributes.refresh(); 3311 } 3312 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3313 //since fTempString directly matches to the underlying main buffer 3314 //store the data into buffer 3315 fContentBuffer.append(fTempString); 3316 //clear the XMLString so that data can't be added again. 3317 fTempString.length = 0; 3318 fUsebuffer = true; 3319 } 3320 } 3321 3322 } // class XMLDocumentFragmentScannerImpl