1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.xml.internal.stream.XMLBufferListener; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import com.sun.xml.internal.stream.XMLInputFactoryImpl; 27 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 28 29 import java.io.EOFException; 30 import java.io.IOException; 31 import javax.xml.stream.XMLInputFactory; 32 import javax.xml.stream.events.XMLEvent; 33 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 34 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 35 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 36 import com.sun.org.apache.xerces.internal.util.XMLChar; 37 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 38 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 39 import com.sun.org.apache.xerces.internal.xni.QName; 40 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 41 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 42 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 43 import com.sun.org.apache.xerces.internal.xni.XMLString; 44 import com.sun.org.apache.xerces.internal.xni.XNIException; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 46 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 47 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 50 import com.sun.org.apache.xerces.internal.xni.Augmentations; 51 import com.sun.org.apache.xerces.internal.impl.Constants; 52 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 53 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 54 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 55 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 56 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 57 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 58 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.State; 59 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 60 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 61 import javax.xml.XMLConstants; 62 import javax.xml.stream.XMLStreamConstants; 63 import javax.xml.stream.events.XMLEvent; 64 65 /** 66 * 67 * This class is responsible for scanning the structure and content 68 * of document fragments. 69 * 70 * This class has been modified as per the new design which is more suited to 71 * efficiently build pull parser. Lot of improvements have been done and 72 * the code has been added to support stax functionality/features. 73 * 74 * @author Neeraj Bajaj SUN Microsystems 75 * @author K.Venugopal SUN Microsystems 76 * @author Glenn Marcy, IBM 77 * @author Andy Clark, IBM 78 * @author Arnaud Le Hors, IBM 79 * @author Eric Ye, IBM 80 * @author Sunitha Reddy, SUN Microsystems 81 * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.19 2010-11-02 19:54:55 joehw Exp $ 82 * 83 */ 84 public class XMLDocumentFragmentScannerImpl 85 extends XMLScanner 86 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 87 88 // 89 // Constants 90 // 91 92 protected int fElementAttributeLimit; 93 94 /** External subset resolver. **/ 95 protected ExternalSubsetResolver fExternalSubsetResolver; 96 97 // scanner states 98 99 //XXX this should be divided into more states. 100 /** Scanner state: start of markup. */ 101 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 102 103 /** Scanner state: content. */ 104 protected static final int SCANNER_STATE_CONTENT = 22; 105 106 /** Scanner state: processing instruction. */ 107 protected static final int SCANNER_STATE_PI = 23; 108 109 /** Scanner state: DOCTYPE. */ 110 protected static final int SCANNER_STATE_DOCTYPE = 24; 111 112 /** Scanner state: XML Declaration */ 113 protected static final int SCANNER_STATE_XML_DECL = 25; 114 115 /** Scanner state: root element. */ 116 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 117 118 /** Scanner state: comment. */ 119 protected static final int SCANNER_STATE_COMMENT = 27; 120 121 /** Scanner state: reference. */ 122 protected static final int SCANNER_STATE_REFERENCE = 28; 123 124 // <book type="hard"> reading attribute name 'type' 125 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 126 127 // <book type="hard"> //reading attribute value. 128 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 129 130 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 131 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 132 133 /** Scanner state: end of input. */ 134 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 135 136 /** Scanner state: terminated. */ 137 protected static final int SCANNER_STATE_TERMINATED = 34; 138 139 /** Scanner state: CDATA section. */ 140 protected static final int SCANNER_STATE_CDATA = 35; 141 142 /** Scanner state: Text declaration. */ 143 protected static final int SCANNER_STATE_TEXT_DECL = 36; 144 145 /** Scanner state: Text declaration. */ 146 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 147 148 //<book type="hard">foo</book> 149 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 150 151 //<book type="hard">foo</book> reading </book> 152 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 153 154 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 155 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 156 157 // feature identifiers 158 159 160 /** Feature identifier: notify built-in refereces. */ 161 protected static final String NOTIFY_BUILTIN_REFS = 162 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 163 164 /** Property identifier: entity resolver. */ 165 protected static final String ENTITY_RESOLVER = 166 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 167 168 /** Feature identifier: standard uri conformant */ 169 protected static final String STANDARD_URI_CONFORMANT = 170 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 171 172 /** Property identifier: Security property manager. */ 173 private static final String XML_SECURITY_PROPERTY_MANAGER = 174 Constants.XML_SECURITY_PROPERTY_MANAGER; 175 176 /** access external dtd: file protocol 177 * For DOM/SAX, the secure feature is set to true by default 178 */ 179 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 180 181 // recognized features and properties 182 183 /** Recognized features. */ 184 private static final String[] RECOGNIZED_FEATURES = { 185 NAMESPACES, 186 VALIDATION, 187 NOTIFY_BUILTIN_REFS, 188 NOTIFY_CHAR_REFS, 189 Constants.STAX_REPORT_CDATA_EVENT 190 }; 191 192 /** Feature defaults. */ 193 private static final Boolean[] FEATURE_DEFAULTS = { 194 Boolean.TRUE, 195 null, 196 Boolean.FALSE, 197 Boolean.FALSE, 198 Boolean.TRUE 199 }; 200 201 /** Recognized properties. */ 202 private static final String[] RECOGNIZED_PROPERTIES = { 203 SYMBOL_TABLE, 204 ERROR_REPORTER, 205 ENTITY_MANAGER, 206 XML_SECURITY_PROPERTY_MANAGER 207 }; 208 209 /** Property defaults. */ 210 private static final Object[] PROPERTY_DEFAULTS = { 211 null, 212 null, 213 null, 214 EXTERNAL_ACCESS_DEFAULT 215 }; 216 217 private static final char [] cdata = {'[','C','D','A','T','A','['}; 218 static final char [] xmlDecl = {'<','?','x','m','l'}; 219 private static final char [] endTag = {'<','/'}; 220 // debugging 221 222 /** Debug scanner state. */ 223 private static final boolean DEBUG_SCANNER_STATE = false; 224 225 /** Debug driver. */ 226 private static final boolean DEBUG_DISPATCHER = false; 227 228 /** Debug content driver scanning. */ 229 protected static final boolean DEBUG_START_END_ELEMENT = false; 230 231 232 /** Debug driver next */ 233 protected static final boolean DEBUG_NEXT = false ; 234 235 /** Debug driver next */ 236 protected static final boolean DEBUG = false; 237 protected static final boolean DEBUG_COALESCE = false; 238 // 239 // Data 240 // 241 242 // protected data 243 244 /** Document handler. */ 245 protected XMLDocumentHandler fDocumentHandler; 246 protected int fScannerLastState ; 247 248 /** Entity Storage */ 249 protected XMLEntityStorage fEntityStore; 250 251 /** Entity stack. */ 252 protected int[] fEntityStack = new int[4]; 253 254 /** Markup depth. */ 255 protected int fMarkupDepth; 256 257 //is the element empty 258 protected boolean fEmptyElement ; 259 260 //track if we are reading attributes, this is usefule while 261 //there is a callback 262 protected boolean fReadingAttributes = false; 263 264 /** Scanner state. */ 265 protected int fScannerState; 266 267 /** SubScanner state: inside scanContent method. */ 268 protected boolean fInScanContent = false; 269 protected boolean fLastSectionWasCData = false; 270 protected boolean fLastSectionWasEntityReference = false; 271 protected boolean fLastSectionWasCharacterData = false; 272 273 /** has external dtd */ 274 protected boolean fHasExternalDTD; 275 276 /** Standalone. */ 277 protected boolean fStandaloneSet; 278 protected boolean fStandalone; 279 protected String fVersion; 280 281 // element information 282 283 /** Current element. */ 284 protected QName fCurrentElement; 285 286 /** Element stack. */ 287 protected ElementStack fElementStack = new ElementStack(); 288 protected ElementStack2 fElementStack2 = new ElementStack2(); 289 290 // other info 291 292 /** Document system identifier. 293 * REVISIT: So what's this used for? - NG 294 * protected String fDocumentSystemId; 295 ******/ 296 297 protected String fPITarget ; 298 299 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 300 protected XMLString fPIData = new XMLString(); 301 302 // features 303 304 305 /** Notify built-in references. */ 306 protected boolean fNotifyBuiltInRefs = false; 307 308 //STAX related properties 309 //defaultValues. 310 protected boolean fSupportDTD = true; 311 protected boolean fReplaceEntityReferences = true; 312 protected boolean fSupportExternalEntities = false; 313 protected boolean fReportCdataEvent = false ; 314 protected boolean fIsCoalesce = false ; 315 protected String fDeclaredEncoding = null; 316 /** Xerces Feature: Disallow doctype declaration. */ 317 protected boolean fDisallowDoctype = false; 318 319 /** 320 * comma-delimited list of protocols that are allowed for the purpose 321 * of accessing external dtd or entity references 322 */ 323 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 324 325 /** 326 * standard uri conformant (strict uri). 327 * http://apache.org/xml/features/standard-uri-conformant 328 */ 329 protected boolean fStrictURI; 330 331 // drivers 332 333 /** Active driver. */ 334 protected Driver fDriver; 335 336 /** Content driver. */ 337 protected Driver fContentDriver = createContentDriver(); 338 339 // temporary variables 340 341 /** Element QName. */ 342 protected QName fElementQName = new QName(); 343 344 /** Attribute QName. */ 345 protected QName fAttributeQName = new QName(); 346 347 /** 348 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 349 * implements Iterator interface so we can directly give Attributes in the form of 350 * iterator. 351 */ 352 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 353 354 355 /** String. */ 356 protected XMLString fTempString = new XMLString(); 357 358 /** String. */ 359 protected XMLString fTempString2 = new XMLString(); 360 361 /** Array of 3 strings. */ 362 private String[] fStrings = new String[3]; 363 364 /** Making the buffer accesible to derived class -- String buffer. */ 365 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 366 367 /** Making the buffer accesible to derived class -- String buffer. */ 368 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 369 370 /** stores character data. */ 371 /** Making the buffer accesible to derived class -- stores PI data */ 372 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 373 374 /** Single character array. */ 375 private final char[] fSingleChar = new char[1]; 376 private String fCurrentEntityName = null; 377 378 // New members 379 protected boolean fScanToEnd = false; 380 381 protected DTDGrammarUtil dtdGrammarUtil= null; 382 383 protected boolean fAddDefaultAttr = false; 384 385 protected boolean foundBuiltInRefs = false; 386 387 388 //skip element algorithm 389 static final short MAX_DEPTH_LIMIT = 5 ; 390 static final short ELEMENT_ARRAY_LENGTH = 200 ; 391 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 392 static final boolean DEBUG_SKIP_ALGORITHM = false; 393 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 394 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 395 //pointer location where last element was skipped 396 short fLastPointerLocation = 0 ; 397 short fElementPointer = 0 ; 398 //2D array to store pointer info 399 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 400 protected String fElementRawname ; 401 protected boolean fShouldSkip = false; 402 protected boolean fAdd = false ; 403 protected boolean fSkip = false; 404 405 /** Reusable Augmentations. */ 406 private Augmentations fTempAugmentations = null; 407 // 408 // Constructors 409 // 410 411 /** Default constructor. */ 412 public XMLDocumentFragmentScannerImpl() { 413 } // <init>() 414 415 // 416 // XMLDocumentScanner methods 417 // 418 419 /** 420 * Sets the input source. 421 * 422 * @param inputSource The input source. 423 * 424 * @throws IOException Thrown on i/o error. 425 */ 426 public void setInputSource(XMLInputSource inputSource) throws IOException { 427 fEntityManager.setEntityHandler(this); 428 fEntityManager.startEntity("$fragment$", inputSource, false, true); 429 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 430 } // setInputSource(XMLInputSource) 431 432 /** 433 * Scans a document. 434 * 435 * @param complete True if the scanner should scan the document 436 * completely, pushing all events to the registered 437 * document handler. A value of false indicates that 438 * that the scanner should only scan the next portion 439 * of the document and return. A scanner instance is 440 * permitted to completely scan a document if it does 441 * not support this "pull" scanning model. 442 * 443 * @return True if there is more to scan, false otherwise. 444 */ 445 public boolean scanDocument(boolean complete) 446 throws IOException, XNIException { 447 448 // keep dispatching "events" 449 fEntityManager.setEntityHandler(this); 450 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 451 452 int event = next(); 453 do { 454 switch (event) { 455 case XMLStreamConstants.START_DOCUMENT : 456 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 457 break; 458 case XMLStreamConstants.START_ELEMENT : 459 //System.out.println(" in scann element"); 460 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 461 break; 462 case XMLStreamConstants.CHARACTERS : 463 fDocumentHandler.characters(getCharacterData(),null); 464 break; 465 case XMLStreamConstants.SPACE: 466 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 467 //System.out.println("in the space"); 468 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 469 break; 470 case XMLStreamConstants.ENTITY_REFERENCE : 471 //entity reference callback are given in startEntity 472 break; 473 case XMLStreamConstants.PROCESSING_INSTRUCTION : 474 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 475 break; 476 case XMLStreamConstants.COMMENT : 477 //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 478 fDocumentHandler.comment(getCharacterData(),null); 479 break; 480 case XMLStreamConstants.DTD : 481 //all DTD related callbacks are handled in DTDScanner. 482 //1. Stax doesn't define DTD states as it does for XML Document. 483 //therefore we don't need to take care of anything here. So Just break; 484 break; 485 case XMLStreamConstants.CDATA: 486 fDocumentHandler.startCDATA(null); 487 //xxx: check if CDATA values comes from getCharacterData() function 488 fDocumentHandler.characters(getCharacterData(),null); 489 fDocumentHandler.endCDATA(null); 490 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 491 break; 492 case XMLStreamConstants.NOTATION_DECLARATION : 493 break; 494 case XMLStreamConstants.ENTITY_DECLARATION : 495 break; 496 case XMLStreamConstants.NAMESPACE : 497 break; 498 case XMLStreamConstants.ATTRIBUTE : 499 break; 500 case XMLStreamConstants.END_ELEMENT : 501 //do not give callback here. 502 //this callback is given in scanEndElement function. 503 //fDocumentHandler.endElement(getElementQName(),null); 504 break; 505 default : 506 throw new InternalError("processing event: " + event); 507 508 } 509 //System.out.println("here in before calling next"); 510 event = next(); 511 //System.out.println("here in after calling next"); 512 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 513 514 if(event == XMLStreamConstants.END_DOCUMENT) { 515 fDocumentHandler.endDocument(null); 516 return false; 517 } 518 519 return true; 520 521 } // scanDocument(boolean):boolean 522 523 524 525 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 526 if(fScannerLastState == XMLEvent.END_ELEMENT){ 527 fElementQName.setValues(fElementStack.getLastPoppedElement()); 528 } 529 return fElementQName ; 530 } 531 532 /** return the next state on the input 533 * @return int 534 */ 535 536 public int next() throws IOException, XNIException { 537 return fDriver.next(); 538 } 539 540 // 541 // XMLComponent methods 542 // 543 544 /** 545 * Resets the component. The component can query the component manager 546 * about any features and properties that affect the operation of the 547 * component. 548 * 549 * @param componentManager The component manager. 550 * 551 * @throws SAXException Thrown by component on initialization error. 552 * For example, if a feature or property is 553 * required for the operation of the component, the 554 * component manager may throw a 555 * SAXNotRecognizedException or a 556 * SAXNotSupportedException. 557 */ 558 559 public void reset(XMLComponentManager componentManager) 560 throws XMLConfigurationException { 561 562 super.reset(componentManager); 563 564 // other settings 565 // fDocumentSystemId = null; 566 567 // sax features 568 //fAttributes.setNamespaces(fNamespaces); 569 570 // xerces features 571 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 572 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 573 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 574 575 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 576 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 577 (ExternalSubsetResolver) resolver : null; 578 579 //attribute 580 fReadingAttributes = false; 581 //xxx: external entities are supported in Xerces 582 // it would be good to define feature for this case 583 fSupportExternalEntities = true; 584 fReplaceEntityReferences = true; 585 fIsCoalesce = false; 586 587 // setup Driver 588 setScannerState(SCANNER_STATE_CONTENT); 589 setDriver(fContentDriver); 590 591 // JAXP 1.5 features and properties 592 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 593 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 594 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 595 596 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 597 598 resetCommon(); 599 //fEntityManager.test(); 600 } // reset(XMLComponentManager) 601 602 603 public void reset(PropertyManager propertyManager){ 604 605 super.reset(propertyManager); 606 607 // other settings 608 // fDocumentSystemId = null; 609 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 610 fNotifyBuiltInRefs = false ; 611 612 //fElementStack2.clear(); 613 //fReplaceEntityReferences = true; 614 //fSupportExternalEntities = true; 615 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_REPLACING_ENTITY_REFERENCES); 616 fReplaceEntityReferences = bo.booleanValue(); 617 bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_SUPPORTING_EXTERNAL_ENTITIES); 618 fSupportExternalEntities = bo.booleanValue(); 619 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 620 if(cdata != null) 621 fReportCdataEvent = cdata.booleanValue() ; 622 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 623 if(coalesce != null) 624 fIsCoalesce = coalesce.booleanValue(); 625 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 626 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 627 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 628 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 629 // setup Driver 630 //we dont need to do this -- nb. 631 //setScannerState(SCANNER_STATE_CONTENT); 632 //setDriver(fContentDriver); 633 //fEntityManager.test(); 634 635 // JAXP 1.5 features and properties 636 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 637 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 638 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 639 640 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 641 resetCommon(); 642 } // reset(XMLComponentManager) 643 644 void resetCommon() { 645 // initialize vars 646 fMarkupDepth = 0; 647 fCurrentElement = null; 648 fElementStack.clear(); 649 fHasExternalDTD = false; 650 fStandaloneSet = false; 651 fStandalone = false; 652 fInScanContent = false; 653 //skipping algorithm 654 fShouldSkip = false; 655 fAdd = false; 656 fSkip = false; 657 658 fEntityStore = fEntityManager.getEntityStore(); 659 dtdGrammarUtil = null; 660 661 if (fSecurityManager != null) { 662 fLimitAnalyzer = fSecurityManager.getLimitAnalyzer(); 663 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 664 } else { 665 fLimitAnalyzer = null; 666 fElementAttributeLimit = 0; 667 } 668 } 669 670 /** 671 * Returns a list of feature identifiers that are recognized by 672 * this component. This method may return null if no features 673 * are recognized by this component. 674 */ 675 public String[] getRecognizedFeatures() { 676 return (String[])(RECOGNIZED_FEATURES.clone()); 677 } // getRecognizedFeatures():String[] 678 679 /** 680 * Sets the state of a feature. This method is called by the component 681 * manager any time after reset when a feature changes state. 682 * <p> 683 * <strong>Note:</strong> Components should silently ignore features 684 * that do not affect the operation of the component. 685 * 686 * @param featureId The feature identifier. 687 * @param state The state of the feature. 688 * 689 * @throws SAXNotRecognizedException The component should not throw 690 * this exception. 691 * @throws SAXNotSupportedException The component should not throw 692 * this exception. 693 */ 694 public void setFeature(String featureId, boolean state) 695 throws XMLConfigurationException { 696 697 super.setFeature(featureId, state); 698 699 // Xerces properties 700 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 701 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 702 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 703 fNotifyBuiltInRefs = state; 704 } 705 } 706 707 } // setFeature(String,boolean) 708 709 /** 710 * Returns a list of property identifiers that are recognized by 711 * this component. This method may return null if no properties 712 * are recognized by this component. 713 */ 714 public String[] getRecognizedProperties() { 715 return (String[])(RECOGNIZED_PROPERTIES.clone()); 716 } // getRecognizedProperties():String[] 717 718 /** 719 * Sets the value of a property. This method is called by the component 720 * manager any time after reset when a property changes value. 721 * <p> 722 * <strong>Note:</strong> Components should silently ignore properties 723 * that do not affect the operation of the component. 724 * 725 * @param propertyId The property identifier. 726 * @param value The value of the property. 727 * 728 * @throws SAXNotRecognizedException The component should not throw 729 * this exception. 730 * @throws SAXNotSupportedException The component should not throw 731 * this exception. 732 */ 733 public void setProperty(String propertyId, Object value) 734 throws XMLConfigurationException { 735 736 super.setProperty(propertyId, value); 737 738 // Xerces properties 739 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 740 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 741 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 742 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 743 fEntityManager = (XMLEntityManager)value; 744 return; 745 } 746 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 747 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 748 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 749 (ExternalSubsetResolver) value : null; 750 return; 751 } 752 } 753 754 755 // Xerces properties 756 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 757 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 758 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 759 fEntityManager = (XMLEntityManager)value; 760 } 761 return; 762 } 763 764 //JAXP 1.5 properties 765 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 766 { 767 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 768 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 769 } 770 771 } // setProperty(String,Object) 772 773 /** 774 * Returns the default state for a feature, or null if this 775 * component does not want to report a default value for this 776 * feature. 777 * 778 * @param featureId The feature identifier. 779 * 780 * @since Xerces 2.2.0 781 */ 782 public Boolean getFeatureDefault(String featureId) { 783 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 784 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 785 return FEATURE_DEFAULTS[i]; 786 } 787 } 788 return null; 789 } // getFeatureDefault(String):Boolean 790 791 /** 792 * Returns the default state for a property, or null if this 793 * component does not want to report a default value for this 794 * property. 795 * 796 * @param propertyId The property identifier. 797 * 798 * @since Xerces 2.2.0 799 */ 800 public Object getPropertyDefault(String propertyId) { 801 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 802 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 803 return PROPERTY_DEFAULTS[i]; 804 } 805 } 806 return null; 807 } // getPropertyDefault(String):Object 808 809 // 810 // XMLDocumentSource methods 811 // 812 813 /** 814 * setDocumentHandler 815 * 816 * @param documentHandler 817 */ 818 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 819 fDocumentHandler = documentHandler; 820 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 821 } // setDocumentHandler(XMLDocumentHandler) 822 823 824 /** Returns the document handler */ 825 public XMLDocumentHandler getDocumentHandler(){ 826 return fDocumentHandler; 827 } 828 829 // 830 // XMLEntityHandler methods 831 // 832 833 /** 834 * This method notifies of the start of an entity. The DTD has the 835 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 836 * general entities are just specified by their name. 837 * 838 * @param name The name of the entity. 839 * @param identifier The resource identifier. 840 * @param encoding The auto-detected IANA encoding name of the entity 841 * stream. This value will be null in those situations 842 * where the entity encoding is not auto-detected (e.g. 843 * internal entities or a document entity that is 844 * parsed from a java.io.Reader). 845 * @param augs Additional information that may include infoset augmentations 846 * 847 * @throws XNIException Thrown by handler to signal an error. 848 */ 849 public void startEntity(String name, 850 XMLResourceIdentifier identifier, 851 String encoding, Augmentations augs) throws XNIException { 852 853 // keep track of this entity before fEntityDepth is increased 854 if (fEntityDepth == fEntityStack.length) { 855 int[] entityarray = new int[fEntityStack.length * 2]; 856 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 857 fEntityStack = entityarray; 858 } 859 fEntityStack[fEntityDepth] = fMarkupDepth; 860 861 super.startEntity(name, identifier, encoding, augs); 862 863 // WFC: entity declared in external subset in standalone doc 864 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 865 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 866 new Object[]{name}); 867 } 868 869 /** we are not calling the handlers yet.. */ 870 // call handler 871 if (fDocumentHandler != null && !fScanningAttribute) { 872 if (!name.equals("[xml]")) { 873 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 874 } 875 } 876 877 } // startEntity(String,XMLResourceIdentifier,String) 878 879 /** 880 * This method notifies the end of an entity. The DTD has the pseudo-name 881 * of "[dtd]" parameter entity names start with '%'; and general entities 882 * are just specified by their name. 883 * 884 * @param name The name of the entity. 885 * @param augs Additional information that may include infoset augmentations 886 * 887 * @throws XNIException Thrown by handler to signal an error. 888 */ 889 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 890 891 /** 892 * // flush possible pending output buffer - see scanContent 893 * if (fInScanContent && fStringBuffer.length != 0 894 * && fDocumentHandler != null) { 895 * fDocumentHandler.characters(fStringBuffer, null); 896 * fStringBuffer.length = 0; // make sure we know it's been flushed 897 * } 898 */ 899 super.endEntity(name, augs); 900 901 // make sure markup is properly balanced 902 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 903 reportFatalError("MarkupEntityMismatch", null); 904 } 905 906 /**/ 907 // call handler 908 if (fDocumentHandler != null && !fScanningAttribute) { 909 if (!name.equals("[xml]")) { 910 fDocumentHandler.endGeneralEntity(name, augs); 911 } 912 } 913 914 915 } // endEntity(String) 916 917 // 918 // Protected methods 919 // 920 921 // Driver factory methods 922 923 /** Creates a content Driver. */ 924 protected Driver createContentDriver() { 925 return new FragmentContentDriver(); 926 } // createContentDriver():Driver 927 928 // scanning methods 929 930 /** 931 * Scans an XML or text declaration. 932 * <p> 933 * <pre> 934 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 935 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 936 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 937 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 938 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 939 * | ('"' ('yes' | 'no') '"')) 940 * 941 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 942 * </pre> 943 * 944 * @param scanningTextDecl True if a text declaration is to 945 * be scanned instead of an XML 946 * declaration. 947 */ 948 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 949 throws IOException, XNIException { 950 951 // scan decl 952 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 953 fMarkupDepth--; 954 955 // pseudo-attribute values 956 String version = fStrings[0]; 957 String encoding = fStrings[1]; 958 String standalone = fStrings[2]; 959 fDeclaredEncoding = encoding; 960 // set standalone 961 fStandaloneSet = standalone != null; 962 fStandalone = fStandaloneSet && standalone.equals("yes"); 963 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 964 //but this information is only related with Document Entity. 965 fEntityManager.setStandalone(fStandalone); 966 967 968 // call handler 969 if (fDocumentHandler != null) { 970 if (scanningTextDecl) { 971 fDocumentHandler.textDecl(version, encoding, null); 972 } else { 973 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 974 } 975 } 976 977 if(version != null){ 978 fEntityScanner.setVersion(version); 979 fEntityScanner.setXMLVersion(version); 980 } 981 // set encoding on reader, only if encoding was not specified by the application explicitly 982 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 983 fEntityScanner.setEncoding(encoding); 984 } 985 986 } // scanXMLDeclOrTextDecl(boolean) 987 988 public String getPITarget(){ 989 return fPITarget ; 990 } 991 992 public XMLStringBuffer getPIData(){ 993 return fContentBuffer ; 994 } 995 996 //XXX: why not this function behave as per the state of the parser? 997 public XMLString getCharacterData(){ 998 if(fUsebuffer){ 999 return fContentBuffer ; 1000 }else{ 1001 return fTempString; 1002 } 1003 1004 } 1005 1006 1007 /** 1008 * Scans a processing data. This is needed to handle the situation 1009 * where a document starts with a processing instruction whose 1010 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1011 * 1012 * @param target The PI target 1013 * @param data The XMLStringBuffer to fill in with the data 1014 */ 1015 protected void scanPIData(String target, XMLStringBuffer data) 1016 throws IOException, XNIException { 1017 1018 super.scanPIData(target, data); 1019 1020 //set the PI target and values 1021 fPITarget = target ; 1022 1023 fMarkupDepth--; 1024 1025 } // scanPIData(String) 1026 1027 /** 1028 * Scans a comment. 1029 * <p> 1030 * <pre> 1031 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1032 * </pre> 1033 * <p> 1034 * <strong>Note:</strong> Called after scanning past '<!--' 1035 */ 1036 protected void scanComment() throws IOException, XNIException { 1037 fContentBuffer.clear(); 1038 scanComment(fContentBuffer); 1039 //getTextCharacters can also be called for reading comments 1040 fUsebuffer = true; 1041 fMarkupDepth--; 1042 1043 } // scanComment() 1044 1045 //xxx value returned by this function may not remain valid if another event is scanned. 1046 public String getComment(){ 1047 return fContentBuffer.toString(); 1048 } 1049 1050 void addElement(String rawname){ 1051 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1052 //storing element raw name in a linear list of array 1053 fElementArray[fElementPointer] = rawname ; 1054 //storing elemnetPointer for particular element depth 1055 1056 if(DEBUG_SKIP_ALGORITHM){ 1057 StringBuffer sb = new StringBuffer() ; 1058 sb.append(" Storing element information ") ; 1059 sb.append(" fElementPointer = " + fElementPointer) ; 1060 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1061 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1062 System.out.println(sb.toString()) ; 1063 } 1064 1065 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1066 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1067 short column = storePointerForADepth(fElementPointer); 1068 if(column > 0){ 1069 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1070 //identity comparison shouldn't take much time and we can rely on this 1071 //since its guaranteed to have same object id for same string. 1072 if(rawname == fElementArray[pointer]){ 1073 fShouldSkip = true ; 1074 fLastPointerLocation = pointer ; 1075 //reset the things and return. 1076 resetPointer((short)fElementStack.fDepth , column) ; 1077 fElementArray[fElementPointer] = null ; 1078 return ; 1079 }else{ 1080 fShouldSkip = false ; 1081 } 1082 } 1083 } 1084 fElementPointer++ ; 1085 } 1086 } 1087 1088 1089 void resetPointer(short depth, short column){ 1090 fPointerInfo[depth] [column] = (short)0; 1091 } 1092 1093 //returns column information at which pointer was stored. 1094 short storePointerForADepth(short elementPointer){ 1095 short depth = (short) fElementStack.fDepth ; 1096 1097 //Stores element pointer locations at particular depth , only 4 pointer locations 1098 //are stored at particular depth for now. 1099 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1100 1101 if(canStore(depth, i)){ 1102 fPointerInfo[depth][i] = elementPointer ; 1103 if(DEBUG_SKIP_ALGORITHM){ 1104 StringBuffer sb = new StringBuffer() ; 1105 sb.append(" Pointer information ") ; 1106 sb.append(" fElementPointer = " + fElementPointer) ; 1107 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1108 sb.append(" column = " + i ) ; 1109 System.out.println(sb.toString()) ; 1110 } 1111 return i; 1112 } 1113 //else 1114 //pointer was not stored because we reached the limit 1115 } 1116 return -1 ; 1117 } 1118 1119 boolean canStore(short depth, short column){ 1120 //colum = 0 , means first element at particular depth 1121 //column = 1, means second element at particular depth 1122 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1123 return fPointerInfo[depth][column] == 0 ? true : false ; 1124 } 1125 1126 1127 short getElementPointer(short depth, short column){ 1128 //colum = 0 , means first element at particular depth 1129 //column = 1, means second element at particular depth 1130 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1131 return fPointerInfo[depth][column] ; 1132 } 1133 1134 //this function assumes that string passed is not null and skips 1135 //the following string from the buffer this makes sure 1136 boolean skipFromTheBuffer(String rawname) throws IOException{ 1137 if(fEntityScanner.skipString(rawname)){ 1138 char c = (char)fEntityScanner.peekChar() ; 1139 //If the start element was completely skipped we should encounter either ' '(space), 1140 //or '/' (in case of empty element) or '>' 1141 if( c == ' ' || c == '/' || c == '>'){ 1142 fElementRawname = rawname ; 1143 return true ; 1144 } else{ 1145 return false; 1146 } 1147 } else 1148 return false ; 1149 } 1150 1151 boolean skipQElement(String rawname) throws IOException{ 1152 1153 final int c = fEntityScanner.getChar(rawname.length()); 1154 //if this character is still valid element name -- this means string can't match 1155 if(XMLChar.isName(c)){ 1156 return false; 1157 }else{ 1158 return fEntityScanner.skipString(rawname); 1159 } 1160 } 1161 1162 protected boolean skipElement() throws IOException { 1163 1164 if(!fShouldSkip) return false ; 1165 1166 if(fLastPointerLocation != 0){ 1167 //Look at the next element stored in the array list.. we might just get a match. 1168 String rawname = fElementArray[fLastPointerLocation + 1] ; 1169 if(rawname != null && skipFromTheBuffer(rawname)){ 1170 fLastPointerLocation++ ; 1171 if(DEBUG_SKIP_ALGORITHM){ 1172 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1173 } 1174 return true ; 1175 } else{ 1176 //reset it back to zero... we haven't got the correct subset yet. 1177 fLastPointerLocation = 0 ; 1178 1179 } 1180 } 1181 //xxx: we can put some logic here as from what column it should start looking 1182 //for now we always start at 0 1183 //fallback to tolerant algorithm, it would look for differnt element stored at different 1184 //depth and get us the pointer location. 1185 return fShouldSkip && skipElement((short)0); 1186 1187 } 1188 1189 //start of the column at which it should try searching 1190 boolean skipElement(short column) throws IOException { 1191 short depth = (short)fElementStack.fDepth ; 1192 1193 if(depth > MAX_DEPTH_LIMIT){ 1194 return fShouldSkip = false ; 1195 } 1196 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1197 short pointer = getElementPointer(depth , i ) ; 1198 1199 if(pointer == 0){ 1200 return fShouldSkip = false ; 1201 } 1202 1203 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1204 if(DEBUG_SKIP_ALGORITHM){ 1205 System.out.println(); 1206 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1207 System.out.println(); 1208 } 1209 fLastPointerLocation = pointer ; 1210 return fShouldSkip = true ; 1211 } 1212 } 1213 return fShouldSkip = false ; 1214 } 1215 1216 /** 1217 * Scans a start element. This method will handle the binding of 1218 * namespace information and notifying the handler of the start 1219 * of the element. 1220 * <p> 1221 * <pre> 1222 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1223 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1224 * </pre> 1225 * <p> 1226 * <strong>Note:</strong> This method assumes that the leading 1227 * '<' character has been consumed. 1228 * <p> 1229 * <strong>Note:</strong> This method uses the fElementQName and 1230 * fAttributes variables. The contents of these variables will be 1231 * destroyed. The caller should copy important information out of 1232 * these variables before calling this method. 1233 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1234 * 1235 * @return True if element is empty. (i.e. It matches 1236 * production [44]. 1237 */ 1238 // fElementQName will have the details of element just read.. 1239 // fAttributes will have the details of all the attributes. 1240 protected boolean scanStartElement() 1241 throws IOException, XNIException { 1242 1243 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1244 //when skipping is true and no more elements should be added 1245 if(fSkip && !fAdd){ 1246 //get the stored element -- if everything goes right this should match the 1247 //token in the buffer 1248 1249 QName name = fElementStack.getNext(); 1250 1251 if(DEBUG_SKIP_ALGORITHM){ 1252 System.out.println("Trying to skip String = " + name.rawname); 1253 } 1254 1255 //Be conservative -- if skipping fails -- stop. 1256 fSkip = fEntityScanner.skipString(name.rawname); 1257 1258 if(fSkip){ 1259 if(DEBUG_SKIP_ALGORITHM){ 1260 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1261 } 1262 fElementStack.push(); 1263 fElementQName = name; 1264 }else{ 1265 //if skipping fails reposition the stack or fallback to normal way of processing 1266 fElementStack.reposition(); 1267 if(DEBUG_SKIP_ALGORITHM){ 1268 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1269 } 1270 } 1271 } 1272 1273 //we are still at the stage of adding elements 1274 //the elements were not matched or 1275 //fSkip is not set to true 1276 if(!fSkip || fAdd){ 1277 //get the next element from the stack 1278 fElementQName = fElementStack.nextElement(); 1279 // name 1280 if (fNamespaces) { 1281 fEntityScanner.scanQName(fElementQName); 1282 } else { 1283 String name = fEntityScanner.scanName(); 1284 fElementQName.setValues(null, name, name, null); 1285 } 1286 1287 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1288 if(DEBUG_SKIP_ALGORITHM){ 1289 if(fAdd){ 1290 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1291 } 1292 } 1293 1294 } 1295 1296 //when the elements are being added , we need to check if we are set for skipping the elements 1297 if(fAdd){ 1298 //this sets the value of fAdd variable 1299 fElementStack.matchElement(fElementQName); 1300 } 1301 1302 1303 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1304 fCurrentElement = fElementQName; 1305 1306 String rawname = fElementQName.rawname; 1307 1308 fEmptyElement = false; 1309 1310 fAttributes.removeAllAttributes(); 1311 1312 if(!seekCloseOfStartTag()){ 1313 fReadingAttributes = true; 1314 fAttributeCacheUsedCount =0; 1315 fStringBufferIndex =0; 1316 fAddDefaultAttr = true; 1317 do { 1318 scanAttribute(fAttributes); 1319 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1320 fAttributes.getLength() > fElementAttributeLimit){ 1321 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1322 "ElementAttributeLimit", 1323 new Object[]{rawname, fElementAttributeLimit }, 1324 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1325 } 1326 1327 } while (!seekCloseOfStartTag()); 1328 fReadingAttributes=false; 1329 } 1330 1331 if (fEmptyElement) { 1332 //decrease the markup depth.. 1333 fMarkupDepth--; 1334 1335 // check that this element was opened in the same entity 1336 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1337 reportFatalError("ElementEntityMismatch", 1338 new Object[]{fCurrentElement.rawname}); 1339 } 1340 // call handler 1341 if (fDocumentHandler != null) { 1342 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1343 } 1344 1345 //We should not be popping out the context here in endELement becaause the namespace context is still 1346 //valid when parser is at the endElement state. 1347 //if (fNamespaces) { 1348 // fNamespaceContext.popContext(); 1349 //} 1350 1351 //pop the element off the stack.. 1352 fElementStack.popElement(); 1353 1354 } else { 1355 1356 if(dtdGrammarUtil != null) 1357 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1358 if(fDocumentHandler != null){ 1359 //complete element and attributes are traversed in this function so we can send a callback 1360 //here. 1361 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1362 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1363 } 1364 } 1365 1366 1367 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1368 return fEmptyElement; 1369 1370 } // scanStartElement():boolean 1371 1372 /** 1373 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1374 * Characters are consumed. 1375 */ 1376 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1377 // spaces 1378 boolean sawSpace = fEntityScanner.skipSpaces(); 1379 1380 // end tag? 1381 final int c = fEntityScanner.peekChar(); 1382 if (c == '>') { 1383 fEntityScanner.scanChar(); 1384 return true; 1385 } else if (c == '/') { 1386 fEntityScanner.scanChar(); 1387 if (!fEntityScanner.skipChar('>')) { 1388 reportFatalError("ElementUnterminated", 1389 new Object[]{fElementQName.rawname}); 1390 } 1391 fEmptyElement = true; 1392 return true; 1393 } else if (!isValidNameStartChar(c) || !sawSpace) { 1394 reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); 1395 } 1396 1397 return false; 1398 } 1399 1400 public boolean hasAttributes(){ 1401 return fAttributes.getLength() > 0 ? true : false ; 1402 } 1403 1404 1405 /** 1406 * Scans an attribute. 1407 * <p> 1408 * <pre> 1409 * [41] Attribute ::= Name Eq AttValue 1410 * </pre> 1411 * <p> 1412 * <strong>Note:</strong> This method assumes that the next 1413 * character on the stream is the first character of the attribute 1414 * name. 1415 * <p> 1416 * <strong>Note:</strong> This method uses the fAttributeQName and 1417 * fQName variables. The contents of these variables will be 1418 * destroyed. 1419 * 1420 * @param attributes The attributes list for the scanned attribute. 1421 */ 1422 1423 /** 1424 * protected void scanAttribute(AttributeIteratorImpl attributes) 1425 * throws IOException, XNIException { 1426 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1427 * 1428 * 1429 * // name 1430 * if (fNamespaces) { 1431 * fEntityScanner.scanQName(fAttributeQName); 1432 * } 1433 * else { 1434 * String name = fEntityScanner.scanName(); 1435 * fAttributeQName.setValues(null, name, name, null); 1436 * } 1437 * 1438 * // equals 1439 * fEntityScanner.skipSpaces(); 1440 * if (!fEntityScanner.skipChar('=')) { 1441 * reportFatalError("EqRequiredInAttribute", 1442 * new Object[]{fAttributeQName.rawname}); 1443 * } 1444 * fEntityScanner.skipSpaces(); 1445 * 1446 * 1447 * // content 1448 * int oldLen = attributes.getLength(); 1449 */ 1450 /**xxx there is one check of duplicate attribute that has been removed. 1451 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1452 * 1453 * // WFC: Unique Att Spec 1454 * if (oldLen == attributes.getLength()) { 1455 * reportFatalError("AttributeNotUnique", 1456 * new Object[]{fCurrentElement.rawname, 1457 * fAttributeQName.rawname}); 1458 * } 1459 */ 1460 1461 /* 1462 //REVISIT: one more case needs to be included: external PE and standalone is no 1463 boolean isVC = fHasExternalDTD && !fStandalone; 1464 scanAttributeValue(fTempString, fTempString2, 1465 fAttributeQName.rawname, attributes, 1466 oldLen, isVC); 1467 1468 //attributes.setValue(oldLen, fTempString.toString()); 1469 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1470 //attributes.setSpecified(oldLen, true); 1471 1472 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1473 fAttributes.addAttribute(attribute); 1474 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1475 } // scanAttribute(XMLAttributes) 1476 1477 */ 1478 1479 /** return the attribute iterator implementation */ 1480 public XMLAttributesIteratorImpl getAttributeIterator(){ 1481 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1482 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1483 fAddDefaultAttr = false; 1484 } 1485 return fAttributes; 1486 } 1487 1488 /** return if standalone is set */ 1489 public boolean standaloneSet(){ 1490 return fStandaloneSet; 1491 } 1492 /** return if the doucment is standalone */ 1493 public boolean isStandAlone(){ 1494 return fStandalone ; 1495 } 1496 /** 1497 * Scans an attribute name value pair. 1498 * <p> 1499 * <pre> 1500 * [41] Attribute ::= Name Eq AttValue 1501 * </pre> 1502 * <p> 1503 * <strong>Note:</strong> This method assumes that the next 1504 * character on the stream is the first character of the attribute 1505 * name. 1506 * <p> 1507 * <strong>Note:</strong> This method uses the fAttributeQName and 1508 * fQName variables. The contents of these variables will be 1509 * destroyed. 1510 * 1511 * @param attributes The attributes list for the scanned attribute. 1512 */ 1513 1514 protected void scanAttribute(XMLAttributes attributes) 1515 throws IOException, XNIException { 1516 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1517 1518 // name 1519 if (fNamespaces) { 1520 fEntityScanner.scanQName(fAttributeQName); 1521 } else { 1522 String name = fEntityScanner.scanName(); 1523 fAttributeQName.setValues(null, name, name, null); 1524 } 1525 1526 // equals 1527 fEntityScanner.skipSpaces(); 1528 if (!fEntityScanner.skipChar('=')) { 1529 reportFatalError("EqRequiredInAttribute", 1530 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1531 } 1532 fEntityScanner.skipSpaces(); 1533 1534 int attIndex = 0 ; 1535 //REVISIT: one more case needs to be included: external PE and standalone is no 1536 boolean isVC = fHasExternalDTD && !fStandalone; 1537 //fTempString would store attribute value 1538 ///fTempString2 would store attribute non-normalized value 1539 1540 //this function doesn't use 'attIndex'. We are adding the attribute later 1541 //after we have figured out that current attribute is not namespace declaration 1542 //since scanAttributeValue doesn't use attIndex parameter therefore we 1543 //can safely add the attribute later.. 1544 XMLString tmpStr = getString(); 1545 1546 scanAttributeValue(tmpStr, fTempString2, 1547 fAttributeQName.rawname, attributes, 1548 attIndex, isVC); 1549 1550 // content 1551 int oldLen = attributes.getLength(); 1552 //if the attribute name already exists.. new value is replaced with old value 1553 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1554 1555 // WFC: Unique Att Spec 1556 //attributes count will be same if the current attribute name already exists for this element name. 1557 //this means there are two duplicate attributes. 1558 if (oldLen == attributes.getLength()) { 1559 reportFatalError("AttributeNotUnique", 1560 new Object[]{fCurrentElement.rawname, 1561 fAttributeQName.rawname}); 1562 } 1563 1564 //tmpString contains attribute value 1565 //we are passing null as the attribute value 1566 attributes.setValue(attIndex, null, tmpStr); 1567 1568 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1569 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1570 attributes.setSpecified(attIndex, true); 1571 1572 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1573 1574 } // scanAttribute(XMLAttributes) 1575 1576 /** 1577 * Scans element content. 1578 * 1579 * @return Returns the next character on the stream. 1580 */ 1581 //CHANGED: 1582 //EARLIER: scanContent() 1583 //NOW: scanContent(XMLStringBuffer) 1584 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1585 //this function appends the data to the buffer. 1586 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1587 //set the fTempString length to 0 before passing it on to scanContent 1588 //scanContent sets the correct co-ordinates as per the content read 1589 fTempString.length = 0; 1590 int c = fEntityScanner.scanContent(fTempString); 1591 content.append(fTempString); 1592 fTempString.length = 0; 1593 if (c == '\r') { 1594 // happens when there is the character reference 1595 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1596 fEntityScanner.scanChar(); 1597 content.append((char)c); 1598 c = -1; 1599 } else if (c == ']') { 1600 //fStringBuffer.clear(); 1601 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1602 content.append((char)fEntityScanner.scanChar()); 1603 // remember where we are in case we get an endEntity before we 1604 // could flush the buffer out - this happens when we're parsing an 1605 // entity which ends with a ] 1606 fInScanContent = true; 1607 // 1608 // We work on a single character basis to handle cases such as: 1609 // ']]]>' which we might otherwise miss. 1610 // 1611 if (fEntityScanner.skipChar(']')) { 1612 content.append(']'); 1613 while (fEntityScanner.skipChar(']')) { 1614 content.append(']'); 1615 } 1616 if (fEntityScanner.skipChar('>')) { 1617 reportFatalError("CDEndInContent", null); 1618 } 1619 } 1620 fInScanContent = false; 1621 c = -1; 1622 } 1623 if (fDocumentHandler != null && content.length > 0) { 1624 //fDocumentHandler.characters(content, null); 1625 } 1626 return c; 1627 1628 } // scanContent():int 1629 1630 1631 /** 1632 * Scans a CDATA section. 1633 * <p> 1634 * <strong>Note:</strong> This method uses the fTempString and 1635 * fStringBuffer variables. 1636 * 1637 * @param complete True if the CDATA section is to be scanned 1638 * completely. 1639 * 1640 * @return True if CDATA is completely scanned. 1641 */ 1642 //CHANGED: 1643 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1644 throws IOException, XNIException { 1645 1646 // call handler 1647 if (fDocumentHandler != null) { 1648 //fDocumentHandler.startCDATA(null); 1649 } 1650 1651 while (true) { 1652 //scanData will fill the contentBuffer 1653 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1654 break ; 1655 /** We dont need all this code if we pass ']]>' as delimeter.. 1656 * int brackets = 2; 1657 * while (fEntityScanner.skipChar(']')) { 1658 * brackets++; 1659 * } 1660 * 1661 * //When we find more than 2 square brackets 1662 * if (fDocumentHandler != null && brackets > 2) { 1663 * //we dont need to clear the buffer.. 1664 * //contentBuffer.clear(); 1665 * for (int i = 2; i < brackets; i++) { 1666 * contentBuffer.append(']'); 1667 * } 1668 * fDocumentHandler.characters(contentBuffer, null); 1669 * } 1670 * 1671 * if (fEntityScanner.skipChar('>')) { 1672 * break; 1673 * } 1674 * if (fDocumentHandler != null) { 1675 * //we dont need to clear the buffer now.. 1676 * //contentBuffer.clear(); 1677 * contentBuffer.append("]]"); 1678 * fDocumentHandler.characters(contentBuffer, null); 1679 * } 1680 **/ 1681 } else { 1682 int c = fEntityScanner.peekChar(); 1683 if (c != -1 && isInvalidLiteral(c)) { 1684 if (XMLChar.isHighSurrogate(c)) { 1685 //contentBuffer.clear(); 1686 //scan surrogates if any.... 1687 scanSurrogates(contentBuffer); 1688 } else { 1689 reportFatalError("InvalidCharInCDSect", 1690 new Object[]{Integer.toString(c,16)}); 1691 fEntityScanner.scanChar(); 1692 } 1693 } 1694 //by this time we have also read surrogate contents if any... 1695 if (fDocumentHandler != null) { 1696 //fDocumentHandler.characters(contentBuffer, null); 1697 } 1698 } 1699 } 1700 fMarkupDepth--; 1701 1702 if (fDocumentHandler != null && contentBuffer.length > 0) { 1703 //fDocumentHandler.characters(contentBuffer, null); 1704 } 1705 1706 // call handler 1707 if (fDocumentHandler != null) { 1708 //fDocumentHandler.endCDATA(null); 1709 } 1710 1711 return true; 1712 1713 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1714 1715 /** 1716 * Scans an end element. 1717 * <p> 1718 * <pre> 1719 * [42] ETag ::= '</' Name S? '>' 1720 * </pre> 1721 * <p> 1722 * <strong>Note:</strong> This method uses the fElementQName variable. 1723 * The contents of this variable will be destroyed. The caller should 1724 * copy the needed information out of this variable before calling 1725 * this method. 1726 * 1727 * @return The element depth. 1728 */ 1729 protected int scanEndElement() throws IOException, XNIException { 1730 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1731 1732 // pop context 1733 QName endElementName = fElementStack.popElement(); 1734 1735 String rawname = endElementName.rawname; 1736 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1737 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1738 //In scanners most of the time is consumed on checks done for XML characters, we can 1739 // optimize on it and avoid the checks done for endElement, 1740 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1741 1742 // this should work both for namespace processing true or false... 1743 1744 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1745 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1746 1747 if (!fEntityScanner.skipString(endElementName.rawname)) { 1748 reportFatalError("ETagRequired", new Object[]{rawname}); 1749 } 1750 1751 // end 1752 fEntityScanner.skipSpaces(); 1753 if (!fEntityScanner.skipChar('>')) { 1754 reportFatalError("ETagUnterminated", 1755 new Object[]{rawname}); 1756 } 1757 fMarkupDepth--; 1758 1759 //we have increased the depth for two markup "<" characters 1760 fMarkupDepth--; 1761 1762 // check that this element was opened in the same entity 1763 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1764 reportFatalError("ElementEntityMismatch", 1765 new Object[]{rawname}); 1766 } 1767 1768 //We should not be popping out the context here in endELement becaause the namespace context is still 1769 //valid when parser is at the endElement state. 1770 1771 //if (fNamespaces) { 1772 // fNamespaceContext.popContext(); 1773 //} 1774 1775 // call handler 1776 if (fDocumentHandler != null ) { 1777 //end element is scanned in this function so we can send a callback 1778 //here. 1779 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1780 1781 fDocumentHandler.endElement(endElementName, null); 1782 } 1783 if(dtdGrammarUtil != null) 1784 dtdGrammarUtil.endElement(endElementName); 1785 1786 return fMarkupDepth; 1787 1788 } // scanEndElement():int 1789 1790 /** 1791 * Scans a character reference. 1792 * <p> 1793 * <pre> 1794 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1795 * </pre> 1796 */ 1797 protected void scanCharReference() 1798 throws IOException, XNIException { 1799 1800 fStringBuffer2.clear(); 1801 int ch = scanCharReferenceValue(fStringBuffer2, null); 1802 fMarkupDepth--; 1803 if (ch != -1) { 1804 // call handler 1805 1806 if (fDocumentHandler != null) { 1807 if (fNotifyCharRefs) { 1808 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1809 } 1810 Augmentations augs = null; 1811 if (fValidation && ch <= 0x20) { 1812 if (fTempAugmentations != null) { 1813 fTempAugmentations.removeAllItems(); 1814 } 1815 else { 1816 fTempAugmentations = new AugmentationsImpl(); 1817 } 1818 augs = fTempAugmentations; 1819 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1820 } 1821 //xxx: How do we deal with this - how to return charReferenceValues 1822 //now this is being commented because this is taken care in scanDocument() 1823 //fDocumentHandler.characters(fStringBuffer2, null); 1824 if (fNotifyCharRefs) { 1825 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1826 } 1827 } 1828 } 1829 1830 } // scanCharReference() 1831 1832 1833 /** 1834 * Scans an entity reference. 1835 * 1836 * @return returns true if the new entity is started. If it was built-in entity 1837 * 'false' is returned. 1838 * @throws IOException Thrown if i/o error occurs. 1839 * @throws XNIException Thrown if handler throws exception upon 1840 * notification. 1841 */ 1842 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1843 String name = fEntityScanner.scanName(); 1844 if (name == null) { 1845 reportFatalError("NameRequiredInReference", null); 1846 return; 1847 } 1848 if (!fEntityScanner.skipChar(';')) { 1849 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1850 } 1851 if (fEntityStore.isUnparsedEntity(name)) { 1852 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1853 } 1854 fMarkupDepth--; 1855 fCurrentEntityName = name; 1856 1857 // handle built-in entities 1858 if (name == fAmpSymbol) { 1859 handleCharacter('&', fAmpSymbol, content); 1860 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1861 return ; 1862 } else if (name == fLtSymbol) { 1863 handleCharacter('<', fLtSymbol, content); 1864 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1865 return ; 1866 } else if (name == fGtSymbol) { 1867 handleCharacter('>', fGtSymbol, content); 1868 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1869 return ; 1870 } else if (name == fQuotSymbol) { 1871 handleCharacter('"', fQuotSymbol, content); 1872 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1873 return ; 1874 } else if (name == fAposSymbol) { 1875 handleCharacter('\'', fAposSymbol, content); 1876 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1877 return ; 1878 } 1879 1880 //1. if the entity is external and support to external entities is not required 1881 // 2. or entities should not be replaced 1882 //3. or if it is built in entity reference. 1883 boolean isEE = fEntityStore.isExternalEntity(name); 1884 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1885 fScannerState = SCANNER_STATE_REFERENCE; 1886 return ; 1887 } 1888 // start general entity 1889 if (!fEntityStore.isDeclaredEntity(name)) { 1890 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1891 if (!fSupportDTD && fReplaceEntityReferences) { 1892 reportFatalError("EntityNotDeclared", new Object[]{name}); 1893 return; 1894 } 1895 //REVISIT: one more case needs to be included: external PE and standalone is no 1896 if ( fHasExternalDTD && !fStandalone) { 1897 if (fValidation) 1898 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1899 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1900 } else 1901 reportFatalError("EntityNotDeclared", new Object[]{name}); 1902 } 1903 //we are starting the entity even if the entity was not declared 1904 //if that was the case it its taken care in XMLEntityManager.startEntity() 1905 //we immediately call the endEntity. Application gets to know if there was 1906 //any entity that was not declared. 1907 fEntityManager.startEntity(name, false); 1908 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1909 //setScannerState(SCANNER_STATE_CONTENT); 1910 //return true ; 1911 } // scanEntityReference() 1912 1913 // utility methods 1914 1915 /** 1916 * Calls document handler with a single character resulting from 1917 * built-in entity resolution. 1918 * 1919 * @param c 1920 * @param entity built-in name 1921 * @param XMLStringBuffer append the character to buffer 1922 * 1923 * we really dont need to call this function -- this function is only required when 1924 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1925 * calling this function to hanlde built-in entity reference. 1926 * 1927 */ 1928 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1929 foundBuiltInRefs = true; 1930 content.append(c); 1931 if (fDocumentHandler != null) { 1932 fSingleChar[0] = c; 1933 if (fNotifyBuiltInRefs) { 1934 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1935 } 1936 fTempString.setValues(fSingleChar, 0, 1); 1937 //fDocumentHandler.characters(fTempString, null); 1938 1939 if (fNotifyBuiltInRefs) { 1940 fDocumentHandler.endGeneralEntity(entity, null); 1941 } 1942 } 1943 } // handleCharacter(char) 1944 1945 // helper methods 1946 1947 /** 1948 * Sets the scanner state. 1949 * 1950 * @param state The new scanner state. 1951 */ 1952 protected final void setScannerState(int state) { 1953 1954 fScannerState = state; 1955 if (DEBUG_SCANNER_STATE) { 1956 System.out.print("### setScannerState: "); 1957 //System.out.print(fScannerState); 1958 System.out.print(getScannerStateName(state)); 1959 System.out.println(); 1960 } 1961 1962 } // setScannerState(int) 1963 1964 1965 /** 1966 * Sets the Driver. 1967 * 1968 * @param Driver The new Driver. 1969 */ 1970 protected final void setDriver(Driver driver) { 1971 fDriver = driver; 1972 if (DEBUG_DISPATCHER) { 1973 System.out.print("%%% setDriver: "); 1974 System.out.print(getDriverName(driver)); 1975 System.out.println(); 1976 } 1977 } 1978 1979 // 1980 // Private methods 1981 // 1982 1983 /** Returns the scanner state name. */ 1984 protected String getScannerStateName(int state) { 1985 1986 switch (state) { 1987 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1988 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1989 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1990 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1991 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1992 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1993 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1994 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1995 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1996 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1997 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1998 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1999 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 2000 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 2001 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 2002 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 2003 } 2004 2005 return "??? ("+state+')'; 2006 2007 } // getScannerStateName(int):String 2008 public String getEntityName(){ 2009 //return the cached name 2010 return fCurrentEntityName; 2011 } 2012 2013 /** Returns the driver name. */ 2014 public String getDriverName(Driver driver) { 2015 2016 if (DEBUG_DISPATCHER) { 2017 if (driver != null) { 2018 String name = driver.getClass().getName(); 2019 int index = name.lastIndexOf('.'); 2020 if (index != -1) { 2021 name = name.substring(index + 1); 2022 index = name.lastIndexOf('$'); 2023 if (index != -1) { 2024 name = name.substring(index + 1); 2025 } 2026 } 2027 return name; 2028 } 2029 } 2030 return "null"; 2031 2032 } // getDriverName():String 2033 2034 /** 2035 * Check the protocol used in the systemId against allowed protocols 2036 * 2037 * @param systemId the Id of the URI 2038 * @param allowedProtocols a list of allowed protocols separated by comma 2039 * @return the name of the protocol if rejected, null otherwise 2040 */ 2041 String checkAccess(String systemId, String allowedProtocols) throws IOException { 2042 String baseSystemId = fEntityScanner.getBaseSystemId(); 2043 String expandedSystemId = fEntityManager.expandSystemId(systemId, baseSystemId,fStrictURI); 2044 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 2045 } 2046 2047 // 2048 // Classes 2049 // 2050 2051 /** 2052 * @author Neeraj Bajaj, Sun Microsystems. 2053 */ 2054 protected static final class Element { 2055 2056 // 2057 // Data 2058 // 2059 2060 /** Symbol. */ 2061 public QName qname; 2062 2063 //raw name stored as characters 2064 public char[] fRawname; 2065 2066 /** The next Element entry. */ 2067 public Element next; 2068 2069 // 2070 // Constructors 2071 // 2072 2073 /** 2074 * Constructs a new Element from the given QName and next Element 2075 * reference. 2076 */ 2077 public Element(QName qname, Element next) { 2078 this.qname.setValues(qname); 2079 this.fRawname = qname.rawname.toCharArray(); 2080 this.next = next; 2081 } 2082 2083 } // class Element 2084 2085 /** 2086 * Element stack. 2087 * 2088 * @author Neeraj Bajaj, Sun Microsystems. 2089 */ 2090 protected class ElementStack2 { 2091 2092 // 2093 // Data 2094 // 2095 2096 /** The stack data. */ 2097 protected QName [] fQName = new QName[20]; 2098 2099 //Element depth 2100 protected int fDepth; 2101 //total number of elements 2102 protected int fCount; 2103 //current position 2104 protected int fPosition; 2105 //Mark refers to the position 2106 protected int fMark; 2107 2108 protected int fLastDepth ; 2109 2110 // 2111 // Constructors 2112 // 2113 2114 /** Default constructor. */ 2115 public ElementStack2() { 2116 for (int i = 0; i < fQName.length; i++) { 2117 fQName[i] = new QName(); 2118 } 2119 fMark = fPosition = 1; 2120 } // <init>() 2121 2122 public void resize(){ 2123 /** 2124 * int length = fElements.length; 2125 * Element [] temp = new Element[length * 2]; 2126 * System.arraycopy(fElements, 0, temp, 0, length); 2127 * fElements = temp; 2128 */ 2129 //resize QNames 2130 int oldLength = fQName.length; 2131 QName [] tmp = new QName[oldLength * 2]; 2132 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2133 fQName = tmp; 2134 2135 for (int i = oldLength; i < fQName.length; i++) { 2136 fQName[i] = new QName(); 2137 } 2138 2139 } 2140 2141 2142 // 2143 // Public methods 2144 // 2145 2146 /** Check if the element scanned during the start element 2147 *matches the stored element. 2148 * 2149 *@return true if the match suceeds. 2150 */ 2151 public boolean matchElement(QName element) { 2152 //last depth is the depth when last elemnt was pushed 2153 //if last depth is greater than current depth 2154 if(DEBUG_SKIP_ALGORITHM){ 2155 System.out.println("fLastDepth = " + fLastDepth); 2156 System.out.println("fDepth = " + fDepth); 2157 } 2158 boolean match = false; 2159 if(fLastDepth > fDepth && fDepth <= 2){ 2160 if(DEBUG_SKIP_ALGORITHM){ 2161 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2162 } 2163 if(element.rawname == fQName[fDepth].rawname){ 2164 fAdd = false; 2165 //mark this position 2166 //decrease the depth by 1 as arrays are 0 based 2167 fMark = fDepth - 1; 2168 //we found the match and from next element skipping will start, add 1 2169 fPosition = fMark + 1 ; 2170 match = true; 2171 //Once we get match decrease the count -- this was increased by nextElement() 2172 --fCount; 2173 if(DEBUG_SKIP_ALGORITHM){ 2174 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2175 System.out.println("fMark = " + fMark); 2176 System.out.println("fPosition = " + fPosition); 2177 System.out.println("fDepth = " + fDepth); 2178 System.out.println("fCount = " + fCount); 2179 } 2180 }else{ 2181 fAdd = true; 2182 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2183 } 2184 } 2185 //store the last depth 2186 fLastDepth = fDepth++; 2187 return match; 2188 } // pushElement(QName):QName 2189 2190 /** 2191 * This function doesn't increase depth. The function in this function is 2192 *broken down into two functions for efficiency. <@see>matchElement</see>. 2193 * This function just returns the pointer to the object and its values are set. 2194 * 2195 *@return QName reference to the next element in the list 2196 */ 2197 public QName nextElement() { 2198 2199 //if number of elements becomes equal to the length of array -- stop the skipping 2200 if (fCount == fQName.length) { 2201 fShouldSkip = false; 2202 fAdd = false; 2203 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2204 //xxx: this is not correct, we are returning the last element 2205 //this wont make any difference since flag has been set to 'false' 2206 return fQName[--fCount]; 2207 } 2208 if(DEBUG_SKIP_ALGORITHM){ 2209 System.out.println("fCount = " + fCount); 2210 } 2211 return fQName[fCount++]; 2212 2213 } 2214 2215 /** Note that this function is considerably different than nextElement() 2216 * This function just returns the previously stored elements 2217 */ 2218 public QName getNext(){ 2219 //when position reaches number of elements in the list.. 2220 //set the position back to mark, making it a circular linked list. 2221 if(fPosition == fCount){ 2222 fPosition = fMark; 2223 } 2224 return fQName[fPosition++]; 2225 } 2226 2227 /** returns the current depth 2228 */ 2229 public int popElement(){ 2230 return fDepth--; 2231 } 2232 2233 2234 /** Clears the stack without throwing away existing QName objects. */ 2235 public void clear() { 2236 fLastDepth = 0; 2237 fDepth = 0; 2238 fCount = 0 ; 2239 fPosition = fMark = 1; 2240 } // clear() 2241 2242 } // class ElementStack 2243 2244 /** 2245 * Element stack. This stack operates without synchronization, error 2246 * checking, and it re-uses objects instead of throwing popped items 2247 * away. 2248 * 2249 * @author Andy Clark, IBM 2250 */ 2251 protected class ElementStack { 2252 2253 // 2254 // Data 2255 // 2256 2257 /** The stack data. */ 2258 protected QName[] fElements; 2259 protected int [] fInt = new int[20]; 2260 2261 2262 //Element depth 2263 protected int fDepth; 2264 //total number of elements 2265 protected int fCount; 2266 //current position 2267 protected int fPosition; 2268 //Mark refers to the position 2269 protected int fMark; 2270 2271 protected int fLastDepth ; 2272 2273 // 2274 // Constructors 2275 // 2276 2277 /** Default constructor. */ 2278 public ElementStack() { 2279 fElements = new QName[20]; 2280 for (int i = 0; i < fElements.length; i++) { 2281 fElements[i] = new QName(); 2282 } 2283 } // <init>() 2284 2285 // 2286 // Public methods 2287 // 2288 2289 /** 2290 * Pushes an element on the stack. 2291 * <p> 2292 * <strong>Note:</strong> The QName values are copied into the 2293 * stack. In other words, the caller does <em>not</em> orphan 2294 * the element to the stack. Also, the QName object returned 2295 * is <em>not</em> orphaned to the caller. It should be 2296 * considered read-only. 2297 * 2298 * @param element The element to push onto the stack. 2299 * 2300 * @return Returns the actual QName object that stores the 2301 */ 2302 //XXX: THIS FUNCTION IS NOT USED 2303 public QName pushElement(QName element) { 2304 if (fDepth == fElements.length) { 2305 QName[] array = new QName[fElements.length * 2]; 2306 System.arraycopy(fElements, 0, array, 0, fDepth); 2307 fElements = array; 2308 for (int i = fDepth; i < fElements.length; i++) { 2309 fElements[i] = new QName(); 2310 } 2311 } 2312 fElements[fDepth].setValues(element); 2313 return fElements[fDepth++]; 2314 } // pushElement(QName):QName 2315 2316 2317 /** Note that this function is considerably different than nextElement() 2318 * This function just returns the previously stored elements 2319 */ 2320 public QName getNext(){ 2321 //when position reaches number of elements in the list.. 2322 //set the position back to mark, making it a circular linked list. 2323 if(fPosition == fCount){ 2324 fPosition = fMark; 2325 } 2326 //store the position of last opened tag at particular depth 2327 //fInt[++fDepth] = fPosition; 2328 if(DEBUG_SKIP_ALGORITHM){ 2329 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2330 } 2331 //return fElements[fPosition++]; 2332 return fElements[fPosition]; 2333 } 2334 2335 /** This function should be called only when element was skipped sucessfully. 2336 * 1. Increase the depth - because element was sucessfully skipped. 2337 *2. Store the position of the element token in array "last opened tag" at depth. 2338 *3. increase the position counter so as to point to the next element in the array 2339 */ 2340 public void push(){ 2341 2342 fInt[++fDepth] = fPosition++; 2343 } 2344 2345 /** Check if the element scanned during the start element 2346 *matches the stored element. 2347 * 2348 *@return true if the match suceeds. 2349 */ 2350 public boolean matchElement(QName element) { 2351 //last depth is the depth when last elemnt was pushed 2352 //if last depth is greater than current depth 2353 //if(DEBUG_SKIP_ALGORITHM){ 2354 // System.out.println("Check if the element " + element.rawname + " matches"); 2355 // System.out.println("fLastDepth = " + fLastDepth); 2356 // System.out.println("fDepth = " + fDepth); 2357 //} 2358 boolean match = false; 2359 if(fLastDepth > fDepth && fDepth <= 3){ 2360 if(DEBUG_SKIP_ALGORITHM){ 2361 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2362 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2363 } 2364 if(element.rawname == fElements[fDepth - 1].rawname){ 2365 fAdd = false; 2366 //mark this position 2367 //decrease the depth by 1 as arrays are 0 based 2368 fMark = fDepth - 1; 2369 //we found the match 2370 fPosition = fMark; 2371 match = true; 2372 //Once we get match decrease the count -- this was increased by nextElement() 2373 --fCount; 2374 if(DEBUG_SKIP_ALGORITHM){ 2375 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2376 System.out.println("fMark = " + fMark); 2377 System.out.println("fPosition = " + fPosition); 2378 System.out.println("fDepth = " + fDepth); 2379 System.out.println("fCount = " + fCount); 2380 System.out.println("---------MATCH SUCEEDED-----------------"); 2381 System.out.println(""); 2382 } 2383 }else{ 2384 fAdd = true; 2385 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2386 } 2387 } 2388 //store the position for the current depth 2389 //when we are adding the elements, when skipping 2390 //starts even then this should be tracked ie. when 2391 //calling getNext() 2392 if(match){ 2393 //from next element skipping will start, add 1 2394 fInt[fDepth] = fPosition++; 2395 } else{ 2396 if(DEBUG_SKIP_ALGORITHM){ 2397 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2398 } 2399 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2400 fInt[fDepth] = fCount - 1; 2401 } 2402 2403 //if number of elements becomes equal to the length of array -- stop the skipping 2404 //xxx: should we do "fCount == fInt.length" 2405 if (fCount == fElements.length) { 2406 fSkip = false; 2407 fAdd = false; 2408 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2409 reposition(); 2410 if(DEBUG_SKIP_ALGORITHM){ 2411 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2412 System.out.println("REPOSITIONING THE STACK"); 2413 System.out.println("-----------SKIPPING STOPPED----------"); 2414 System.out.println(""); 2415 } 2416 return false; 2417 } 2418 if(DEBUG_SKIP_ALGORITHM){ 2419 if(match){ 2420 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2421 }else{ 2422 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2423 } 2424 } 2425 //store the last depth 2426 fLastDepth = fDepth; 2427 return match; 2428 } // matchElement(QName):QName 2429 2430 2431 /** 2432 * Returns the next element on the stack. 2433 * 2434 * @return Returns the actual QName object. Callee should 2435 * use this object to store the details of next element encountered. 2436 */ 2437 public QName nextElement() { 2438 if(fSkip){ 2439 fDepth++; 2440 //boundary checks are done in matchElement() 2441 return fElements[fCount++]; 2442 } else if (fDepth == fElements.length) { 2443 QName[] array = new QName[fElements.length * 2]; 2444 System.arraycopy(fElements, 0, array, 0, fDepth); 2445 fElements = array; 2446 for (int i = fDepth; i < fElements.length; i++) { 2447 fElements[i] = new QName(); 2448 } 2449 } 2450 2451 return fElements[fDepth++]; 2452 2453 } // pushElement(QName):QName 2454 2455 2456 /** 2457 * Pops an element off of the stack by setting the values of 2458 * the specified QName. 2459 * <p> 2460 * <strong>Note:</strong> The object returned is <em>not</em> 2461 * orphaned to the caller. Therefore, the caller should consider 2462 * the object to be read-only. 2463 */ 2464 public QName popElement() { 2465 //return the same object that was pushed -- this would avoid 2466 //setting the values for every end element. 2467 //STRONG: this object is read only -- this object reference shouldn't be stored. 2468 if(fSkip || fAdd ){ 2469 if(DEBUG_SKIP_ALGORITHM){ 2470 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2471 System.out.println(""); 2472 } 2473 return fElements[fInt[fDepth--]]; 2474 } else{ 2475 if(DEBUG_SKIP_ALGORITHM){ 2476 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2477 } 2478 return fElements[--fDepth] ; 2479 } 2480 //element.setValues(fElements[--fDepth]); 2481 } // popElement(QName) 2482 2483 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2484 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2485 *as per the depth. 2486 */ 2487 public void reposition(){ 2488 for( int i = 2 ; i <= fDepth ; i++){ 2489 fElements[i-1] = fElements[fInt[i]]; 2490 } 2491 if(DEBUG_SKIP_ALGORITHM){ 2492 for( int i = 0 ; i < fDepth ; i++){ 2493 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2494 } 2495 } 2496 } 2497 2498 /** Clears the stack without throwing away existing QName objects. */ 2499 public void clear() { 2500 fDepth = 0; 2501 fLastDepth = 0; 2502 fCount = 0 ; 2503 fPosition = fMark = 1; 2504 2505 } // clear() 2506 2507 /** 2508 * This function is as a result of optimization done for endElement -- 2509 * we dont need to set the value for every end element encouterd. 2510 * For Well formedness checks we can have the same QName object that was pushed. 2511 * the values will be set only if application need to know about the endElement 2512 * -- neeraj.bajaj@sun.com 2513 */ 2514 2515 public QName getLastPoppedElement(){ 2516 return fElements[fDepth]; 2517 } 2518 } // class ElementStack 2519 2520 /** 2521 * Drives the parser to the next state/event on the input. Parser is guaranteed 2522 * to stop at the next state/event. 2523 * 2524 * Internally XML document is divided into several states. Each state represents 2525 * a sections of XML document. When this functions returns normally, it has read 2526 * the section of XML document and returns the state corresponding to section of 2527 * document which has been read. For optimizations, a particular driver 2528 * can read ahead of the section of document (state returned) just read and 2529 * can maintain a different internal state. 2530 * 2531 * 2532 * @author Neeraj Bajaj, Sun Microsystems 2533 */ 2534 protected interface Driver { 2535 2536 2537 /** 2538 * Drives the parser to the next state/event on the input. Parser is guaranteed 2539 * to stop at the next state/event. 2540 * 2541 * Internally XML document is divided into several states. Each state represents 2542 * a sections of XML document. When this functions returns normally, it has read 2543 * the section of XML document and returns the state corresponding to section of 2544 * document which has been read. For optimizations, a particular driver 2545 * can read ahead of the section of document (state returned) just read and 2546 * can maintain a different internal state. 2547 * 2548 * @return state representing the section of document just read. 2549 * 2550 * @throws IOException Thrown on i/o error. 2551 * @throws XNIException Thrown on parse error. 2552 */ 2553 2554 public int next() throws IOException, XNIException; 2555 2556 } // interface Driver 2557 2558 /** 2559 * Driver to handle content scanning. This driver is capable of reading 2560 * the fragment of XML document. When it has finished reading fragment 2561 * of XML documents, it can pass the job of reading to another driver. 2562 * 2563 * This class has been modified as per the new design which is more suited to 2564 * efficiently build pull parser. Lot of performance improvements have been done and 2565 * the code has been added to support stax functionality/features. 2566 * 2567 * @author Neeraj Bajaj, Sun Microsystems 2568 * 2569 * 2570 * @author Andy Clark, IBM 2571 * @author Eric Ye, IBM 2572 */ 2573 protected class FragmentContentDriver 2574 implements Driver { 2575 2576 // 2577 // Driver methods 2578 // 2579 private boolean fContinueDispatching = true; 2580 private boolean fScanningForMarkup = true; 2581 2582 /** 2583 * decides the appropriate state of the parser 2584 */ 2585 private void startOfMarkup() throws IOException { 2586 fMarkupDepth++; 2587 final int ch = fEntityScanner.peekChar(); 2588 2589 switch(ch){ 2590 case '?' :{ 2591 setScannerState(SCANNER_STATE_PI); 2592 fEntityScanner.skipChar(ch); 2593 break; 2594 } 2595 case '!' :{ 2596 fEntityScanner.skipChar(ch); 2597 if (fEntityScanner.skipChar('-')) { 2598 if (!fEntityScanner.skipChar('-')) { 2599 reportFatalError("InvalidCommentStart", 2600 null); 2601 } 2602 setScannerState(SCANNER_STATE_COMMENT); 2603 } else if (fEntityScanner.skipString(cdata)) { 2604 setScannerState(SCANNER_STATE_CDATA ); 2605 } else if (!scanForDoctypeHook()) { 2606 reportFatalError("MarkupNotRecognizedInContent", 2607 null); 2608 } 2609 break; 2610 } 2611 case '/' :{ 2612 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2613 fEntityScanner.skipChar(ch); 2614 break; 2615 } 2616 default :{ 2617 if (isValidNameStartChar(ch)) { 2618 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2619 } else { 2620 reportFatalError("MarkupNotRecognizedInContent", 2621 null); 2622 } 2623 } 2624 } 2625 2626 }//startOfMarkup 2627 2628 private void startOfContent() throws IOException { 2629 if (fEntityScanner.skipChar('<')) { 2630 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2631 } else if (fEntityScanner.skipChar('&')) { 2632 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2633 } else { 2634 //element content is there.. 2635 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2636 } 2637 }//startOfContent 2638 2639 2640 /** 2641 * 2642 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2643 * At any point of time when in doubt over the current state of the parser, the state should be 2644 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2645 * the parser to one of its sub state. 2646 * sub states are defined in the parser on the basis of different XML component like 2647 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2648 * These sub states help the parser to have fine control over the parsing. These are the 2649 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2650 * decided if paresr needs to stop at next milepost ?? 2651 * 2652 */ 2653 public void decideSubState() throws IOException { 2654 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2655 2656 switch (fScannerState) { 2657 2658 case SCANNER_STATE_CONTENT: { 2659 startOfContent() ; 2660 break; 2661 } 2662 2663 case SCANNER_STATE_START_OF_MARKUP: { 2664 startOfMarkup() ; 2665 break; 2666 } 2667 } 2668 } 2669 }//decideSubState 2670 2671 /** 2672 * Drives the parser to the next state/event on the input. Parser is guaranteed 2673 * to stop at the next state/event. Internally XML document 2674 * is divided into several states. Each state represents a sections of XML 2675 * document. When this functions returns normally, it has read the section 2676 * of XML document and returns the state corresponding to section of 2677 * document which has been read. For optimizations, a particular driver 2678 * can read ahead of the section of document (state returned) just read and 2679 * can maintain a different internal state. 2680 * 2681 * State returned corresponds to Stax states. 2682 * 2683 * @return state representing the section of document just read. 2684 * 2685 * @throws IOException Thrown on i/o error. 2686 * @throws XNIException Thrown on parse error. 2687 */ 2688 2689 public int next() throws IOException, XNIException { 2690 while (true) { 2691 try { 2692 if(DEBUG_NEXT){ 2693 System.out.println("NOW IN FragmentContentDriver"); 2694 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2695 } 2696 2697 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2698 //decideSubState. 2699 2700 switch (fScannerState) { 2701 case SCANNER_STATE_CONTENT: { 2702 final int ch = fEntityScanner.peekChar(); 2703 if (ch == '<') { 2704 fEntityScanner.scanChar(); 2705 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2706 } else if (ch == '&') { 2707 fEntityScanner.scanChar(); 2708 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2709 break; 2710 } else { 2711 //element content is there.. 2712 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2713 break; 2714 } 2715 } 2716 2717 case SCANNER_STATE_START_OF_MARKUP: { 2718 startOfMarkup(); 2719 break; 2720 }//case: SCANNER_STATE_START_OF_MARKUP 2721 2722 }//end of switch 2723 //decideSubState() ; 2724 2725 //do some special handling if isCoalesce is set to true. 2726 if(fIsCoalesce){ 2727 fUsebuffer = true ; 2728 //if the last section was character data 2729 if(fLastSectionWasCharacterData){ 2730 2731 //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2732 //return the last scanned charactrer data. 2733 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2734 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2735 fLastSectionWasCharacterData = false; 2736 return XMLEvent.CHARACTERS; 2737 } 2738 }//if last section was CDATA or ENTITY REFERENCE 2739 //xxx: there might be another entity reference or CDATA after this 2740 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2741 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2742 //and current state is not SCANNER_STATE_CHARACTER_DATA 2743 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2744 //this means there is nothing more to be coalesced. 2745 //return the CHARACTERS event. 2746 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2747 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2748 2749 fLastSectionWasCData = false; 2750 fLastSectionWasEntityReference = false; 2751 return XMLEvent.CHARACTERS; 2752 } 2753 } 2754 } 2755 2756 2757 if(DEBUG_NEXT){ 2758 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2759 } 2760 2761 switch(fScannerState){ 2762 2763 case XMLEvent.START_DOCUMENT : 2764 return XMLEvent.START_DOCUMENT; 2765 2766 case SCANNER_STATE_START_ELEMENT_TAG :{ 2767 2768 //xxx this function returns true when element is empty.. can be linked to end element event. 2769 //returns true if the element is empty 2770 fEmptyElement = scanStartElement() ; 2771 //if the element is empty the next event is "end element" 2772 if(fEmptyElement){ 2773 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2774 }else{ 2775 //set the next possible state 2776 setScannerState(SCANNER_STATE_CONTENT); 2777 } 2778 return XMLEvent.START_ELEMENT ; 2779 } 2780 2781 case SCANNER_STATE_CHARACTER_DATA: { 2782 if(DEBUG_COALESCE){ 2783 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2784 System.out.println("fIsCoalesce = " + fIsCoalesce); 2785 } 2786 //if last section was either entity reference or cdata or character data we should be using buffer 2787 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2788 2789 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2790 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2791 fLastSectionWasEntityReference = false; 2792 fLastSectionWasCData = false; 2793 fLastSectionWasCharacterData = true ; 2794 fUsebuffer = true; 2795 }else{ 2796 //clear the buffer 2797 fContentBuffer.clear(); 2798 } 2799 2800 //set the fTempString length to 0 before passing it on to scanContent 2801 //scanContent sets the correct co-ordinates as per the content read 2802 fTempString.length = 0; 2803 int c = fEntityScanner.scanContent(fTempString); 2804 if(DEBUG){ 2805 System.out.println("fTempString = " + fTempString); 2806 } 2807 if(fEntityScanner.skipChar('<')){ 2808 //check if we have reached end of element 2809 if(fEntityScanner.skipChar('/')){ 2810 //increase the mark up depth 2811 fMarkupDepth++; 2812 fLastSectionWasCharacterData = false; 2813 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2814 //check if its start of new element 2815 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2816 fMarkupDepth++; 2817 fLastSectionWasCharacterData = false; 2818 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2819 }else{ 2820 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2821 //there can be cdata ahead if coalesce is true we should call again 2822 if(fIsCoalesce){ 2823 fUsebuffer = true; 2824 fLastSectionWasCharacterData = true; 2825 fContentBuffer.append(fTempString); 2826 fTempString.length = 0; 2827 continue; 2828 } 2829 } 2830 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2831 if(fUsebuffer){ 2832 fContentBuffer.append(fTempString); 2833 fTempString.length = 0; 2834 } 2835 if(DEBUG){ 2836 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2837 } 2838 //check limit before returning event 2839 checkLimit(fContentBuffer); 2840 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2841 if(DEBUG)System.out.println("Return SPACE EVENT"); 2842 return XMLEvent.SPACE; 2843 }else 2844 return XMLEvent.CHARACTERS; 2845 2846 } else{ 2847 fUsebuffer = true ; 2848 if(DEBUG){ 2849 System.out.println("fContentBuffer = " + fContentBuffer); 2850 System.out.println("fTempString = " + fTempString); 2851 } 2852 fContentBuffer.append(fTempString); 2853 fTempString.length = 0; 2854 } 2855 if (c == '\r') { 2856 if(DEBUG){ 2857 System.out.println("'\r' character found"); 2858 } 2859 // happens when there is the character reference 2860 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2861 fEntityScanner.scanChar(); 2862 fUsebuffer = true; 2863 fContentBuffer.append((char)c); 2864 c = -1 ; 2865 } else if (c == ']') { 2866 //fStringBuffer.clear(); 2867 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2868 fUsebuffer = true; 2869 fContentBuffer.append((char)fEntityScanner.scanChar()); 2870 // remember where we are in case we get an endEntity before we 2871 // could flush the buffer out - this happens when we're parsing an 2872 // entity which ends with a ] 2873 fInScanContent = true; 2874 2875 // We work on a single character basis to handle cases such as: 2876 // ']]]>' which we might otherwise miss. 2877 // 2878 if (fEntityScanner.skipChar(']')) { 2879 fContentBuffer.append(']'); 2880 while (fEntityScanner.skipChar(']')) { 2881 fContentBuffer.append(']'); 2882 } 2883 if (fEntityScanner.skipChar('>')) { 2884 reportFatalError("CDEndInContent", null); 2885 } 2886 } 2887 c = -1 ; 2888 fInScanContent = false; 2889 } 2890 2891 do{ 2892 //xxx: we should be using only one buffer.. 2893 // we need not to grow the buffer only when isCoalesce() is not true; 2894 2895 if (c == '<') { 2896 fEntityScanner.scanChar(); 2897 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2898 break; 2899 }//xxx what should be the behavior if entity reference is present in the content ? 2900 else if (c == '&') { 2901 fEntityScanner.scanChar(); 2902 setScannerState(SCANNER_STATE_REFERENCE); 2903 break; 2904 }///xxx since this part is also characters, it should be merged... 2905 else if (c != -1 && isInvalidLiteral(c)) { 2906 if (XMLChar.isHighSurrogate(c)) { 2907 // special case: surrogates 2908 scanSurrogates(fContentBuffer) ; 2909 setScannerState(SCANNER_STATE_CONTENT); 2910 } else { 2911 reportFatalError("InvalidCharInContent", 2912 new Object[] { 2913 Integer.toString(c, 16)}); 2914 fEntityScanner.scanChar(); 2915 } 2916 break; 2917 } 2918 //xxx: scanContent also gives character callback. 2919 c = scanContent(fContentBuffer) ; 2920 //we should not be iterating again if fIsCoalesce is not set to true 2921 2922 if(!fIsCoalesce){ 2923 setScannerState(SCANNER_STATE_CONTENT); 2924 break; 2925 } 2926 2927 }while(true); 2928 2929 //if (fDocumentHandler != null) { 2930 // fDocumentHandler.characters(fContentBuffer, null); 2931 //} 2932 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2933 //if fIsCoalesce is true there might be more data so call fDriver.next() 2934 if(fIsCoalesce){ 2935 fLastSectionWasCharacterData = true ; 2936 continue; 2937 }else{ 2938 //check limit before returning event 2939 checkLimit(fContentBuffer); 2940 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2941 if(DEBUG)System.out.println("Return SPACE EVENT"); 2942 return XMLEvent.SPACE; 2943 } else 2944 return XMLEvent.CHARACTERS ; 2945 } 2946 } 2947 2948 case SCANNER_STATE_END_ELEMENT_TAG :{ 2949 if(fEmptyElement){ 2950 //set it back to false. 2951 fEmptyElement = false; 2952 setScannerState(SCANNER_STATE_CONTENT); 2953 //check the case when there is comment after single element document 2954 //<foo/> and some comment after this 2955 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2956 2957 } else if(scanEndElement() == 0) { 2958 //It is last element of the document 2959 if (elementDepthIsZeroHook()) { 2960 //if element depth is zero , it indicates the end of the document 2961 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2962 //xxx understand this point once again.. 2963 return XMLEvent.END_ELEMENT ; 2964 } 2965 2966 } 2967 setScannerState(SCANNER_STATE_CONTENT); 2968 return XMLEvent.END_ELEMENT ; 2969 } 2970 2971 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2972 scanComment(); 2973 setScannerState(SCANNER_STATE_CONTENT); 2974 return XMLEvent.COMMENT; 2975 //break; 2976 } 2977 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2978 //clear the buffer first 2979 fContentBuffer.clear() ; 2980 //xxx: which buffer should be passed. Ideally we shouldn't have 2981 //more than two buffers -- 2982 //xxx: where should we add the switch for buffering. 2983 scanPI(fContentBuffer); 2984 setScannerState(SCANNER_STATE_CONTENT); 2985 return XMLEvent.PROCESSING_INSTRUCTION; 2986 //break; 2987 } 2988 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2989 //xxx: What if CDATA is the first event 2990 //<foo><![CDATA[hello<><>]]>append</foo> 2991 2992 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 2993 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2994 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 2995 fLastSectionWasCData = true ; 2996 fLastSectionWasEntityReference = false; 2997 fLastSectionWasCharacterData = false; 2998 }//if we dont need to coalesce clear the buffer 2999 else{ 3000 fContentBuffer.clear(); 3001 } 3002 fUsebuffer = true; 3003 //CDATA section is completely read in all the case. 3004 scanCDATASection(fContentBuffer , true); 3005 setScannerState(SCANNER_STATE_CONTENT); 3006 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 3007 //and just call fDispatche.next(). Since we have set the scanner state to 3008 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 3009 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 3010 //2. Check if application has set for reporting CDATA event 3011 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 3012 //return the cdata event as characters. 3013 if(fIsCoalesce){ 3014 fLastSectionWasCData = true ; 3015 //there might be more data to coalesce. 3016 continue; 3017 }else if(fReportCdataEvent){ 3018 return XMLEvent.CDATA; 3019 } else{ 3020 return XMLEvent.CHARACTERS; 3021 } 3022 } 3023 3024 case SCANNER_STATE_REFERENCE :{ 3025 fMarkupDepth++; 3026 foundBuiltInRefs = false; 3027 3028 //we should not clear the buffer only when the last state was either CDATA or 3029 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3030 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3031 //fLastSectionWasEntityReference or fLastSectionWasCData are only 3032 //used when fIsCoalesce is set to true. 3033 fLastSectionWasEntityReference = true ; 3034 fLastSectionWasCData = false; 3035 fLastSectionWasCharacterData = false; 3036 }//if we dont need to coalesce clear the buffer 3037 else{ 3038 fContentBuffer.clear(); 3039 } 3040 fUsebuffer = true ; 3041 //take care of character reference 3042 if (fEntityScanner.skipChar('#')) { 3043 scanCharReferenceValue(fContentBuffer, null); 3044 fMarkupDepth--; 3045 if(!fIsCoalesce){ 3046 setScannerState(SCANNER_STATE_CONTENT); 3047 return XMLEvent.CHARACTERS; 3048 } 3049 } else { 3050 // this function also starts new entity 3051 scanEntityReference(fContentBuffer); 3052 //if there was built-in entity reference & coalesce is not true 3053 //return CHARACTERS 3054 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3055 setScannerState(SCANNER_STATE_CONTENT); 3056 return XMLEvent.CHARACTERS; 3057 } 3058 3059 //if there was a text declaration, call next() it will be taken care. 3060 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3061 fLastSectionWasEntityReference = true ; 3062 continue; 3063 } 3064 3065 if(fScannerState == SCANNER_STATE_REFERENCE){ 3066 setScannerState(SCANNER_STATE_CONTENT); 3067 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3068 // Skip the entity reference, we don't care 3069 continue; 3070 } 3071 return XMLEvent.ENTITY_REFERENCE; 3072 } 3073 } 3074 //Wether it was character reference, entity reference or built-in entity 3075 //set the next possible state to SCANNER_STATE_CONTENT 3076 setScannerState(SCANNER_STATE_CONTENT); 3077 fLastSectionWasEntityReference = true ; 3078 continue; 3079 } 3080 3081 case SCANNER_STATE_TEXT_DECL: { 3082 // scan text decl 3083 if (fEntityScanner.skipString("<?xml")) { 3084 fMarkupDepth++; 3085 // NOTE: special case where entity starts with a PI 3086 // whose name starts with "xml" (e.g. "xmlfoo") 3087 if (isValidNameChar(fEntityScanner.peekChar())) { 3088 fStringBuffer.clear(); 3089 fStringBuffer.append("xml"); 3090 3091 if (fNamespaces) { 3092 while (isValidNCName(fEntityScanner.peekChar())) { 3093 fStringBuffer.append((char)fEntityScanner.scanChar()); 3094 } 3095 } else { 3096 while (isValidNameChar(fEntityScanner.peekChar())) { 3097 fStringBuffer.append((char)fEntityScanner.scanChar()); 3098 } 3099 } 3100 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3101 fContentBuffer.clear(); 3102 scanPIData(target, fContentBuffer); 3103 } 3104 3105 // standard text declaration 3106 else { 3107 //xxx: this function gives callback 3108 scanXMLDeclOrTextDecl(true); 3109 } 3110 } 3111 // now that we've straightened out the readers, we can read in chunks: 3112 fEntityManager.fCurrentEntity.mayReadChunks = true; 3113 setScannerState(SCANNER_STATE_CONTENT); 3114 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3115 //it seems we have to careful when to allow function issue a callback 3116 //and when to allow adapter issue a callback. 3117 continue; 3118 } 3119 3120 3121 case SCANNER_STATE_ROOT_ELEMENT: { 3122 if (scanRootElementHook()) { 3123 fEmptyElement = true; 3124 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3125 return XMLEvent.START_ELEMENT; 3126 } 3127 setScannerState(SCANNER_STATE_CONTENT); 3128 return XMLEvent.START_ELEMENT ; 3129 } 3130 case SCANNER_STATE_CHAR_REFERENCE : { 3131 fContentBuffer.clear(); 3132 scanCharReferenceValue(fContentBuffer, null); 3133 fMarkupDepth--; 3134 setScannerState(SCANNER_STATE_CONTENT); 3135 return XMLEvent.CHARACTERS; 3136 } 3137 default: 3138 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3139 3140 }//switch 3141 } 3142 // premature end of file 3143 catch (EOFException e) { 3144 endOfFileHook(e); 3145 return -1; 3146 } 3147 } //while loop 3148 }//next 3149 3150 /** 3151 * Add the count of the content buffer and check if the accumulated 3152 * value exceeds the limit 3153 * @param buffer content buffer 3154 */ 3155 protected void checkLimit(XMLStringBuffer buffer) { 3156 if (fLimitAnalyzer.isTracking(fCurrentEntityName)) { 3157 fLimitAnalyzer.addValue(Limit.GENEAL_ENTITY_SIZE_LIMIT, fCurrentEntityName, buffer.length); 3158 if (fSecurityManager.isOverLimit(Limit.GENEAL_ENTITY_SIZE_LIMIT)) { 3159 fSecurityManager.debugPrint(); 3160 reportFatalError("MaxEntitySizeLimit", new Object[]{fCurrentEntityName, 3161 fLimitAnalyzer.getValue(Limit.GENEAL_ENTITY_SIZE_LIMIT), 3162 fSecurityManager.getLimit(Limit.GENEAL_ENTITY_SIZE_LIMIT), 3163 fSecurityManager.getStateLiteral(Limit.GENEAL_ENTITY_SIZE_LIMIT)}); 3164 } 3165 if (fSecurityManager.isOverLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT)) { 3166 fSecurityManager.debugPrint(); 3167 reportFatalError("TotalEntitySizeLimit", 3168 new Object[]{fLimitAnalyzer.getTotalValue(Limit.TOTAL_ENTITY_SIZE_LIMIT), 3169 fSecurityManager.getLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT), 3170 fSecurityManager.getStateLiteral(Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 3171 } 3172 } 3173 } 3174 3175 // 3176 // Protected methods 3177 // 3178 3179 // hooks 3180 3181 // NOTE: These hook methods are added so that the full document 3182 // scanner can share the majority of code with this class. 3183 3184 /** 3185 * Scan for DOCTYPE hook. This method is a hook for subclasses 3186 * to add code to handle scanning for a the "DOCTYPE" string 3187 * after the string "<!" has been scanned. 3188 * 3189 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3190 * was not scanned. 3191 */ 3192 protected boolean scanForDoctypeHook() 3193 throws IOException, XNIException { 3194 return false; 3195 } // scanForDoctypeHook():boolean 3196 3197 /** 3198 * Element depth iz zero. This methos is a hook for subclasses 3199 * to add code to handle when the element depth hits zero. When 3200 * scanning a document fragment, an element depth of zero is 3201 * normal. However, when scanning a full XML document, the 3202 * scanner must handle the trailing miscellanous section of 3203 * the document after the end of the document's root element. 3204 * 3205 * @return True if the caller should stop and return true which 3206 * allows the scanner to switch to a new scanning 3207 * driver. A return value of false indicates that 3208 * the content driver should continue as normal. 3209 */ 3210 protected boolean elementDepthIsZeroHook() 3211 throws IOException, XNIException { 3212 return false; 3213 } // elementDepthIsZeroHook():boolean 3214 3215 /** 3216 * Scan for root element hook. This method is a hook for 3217 * subclasses to add code that handles scanning for the root 3218 * element. When scanning a document fragment, there is no 3219 * "root" element. However, when scanning a full XML document, 3220 * the scanner must handle the root element specially. 3221 * 3222 * @return True if the caller should stop and return true which 3223 * allows the scanner to switch to a new scanning 3224 * driver. A return value of false indicates that 3225 * the content driver should continue as normal. 3226 */ 3227 protected boolean scanRootElementHook() 3228 throws IOException, XNIException { 3229 return false; 3230 } // scanRootElementHook():boolean 3231 3232 /** 3233 * End of file hook. This method is a hook for subclasses to 3234 * add code that handles the end of file. The end of file in 3235 * a document fragment is OK if the markup depth is zero. 3236 * However, when scanning a full XML document, an end of file 3237 * is always premature. 3238 */ 3239 protected void endOfFileHook(EOFException e) 3240 throws IOException, XNIException { 3241 3242 // NOTE: An end of file is only only an error if we were 3243 // in the middle of scanning some markup. -Ac 3244 if (fMarkupDepth != 0) { 3245 reportFatalError("PrematureEOF", null); 3246 } 3247 3248 } // endOfFileHook() 3249 3250 } // class FragmentContentDriver 3251 3252 static void pr(String str) { 3253 System.out.println(str) ; 3254 } 3255 3256 protected boolean fUsebuffer ; 3257 3258 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3259 * maintained for attributes. 3260 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3261 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3262 * XMLString. 3263 * 3264 * @return XMLString XMLString used to store an attribute value. 3265 */ 3266 3267 protected XMLString getString(){ 3268 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3269 return (XMLString)attributeValueCache.get(fAttributeCacheUsedCount++); 3270 } else{ 3271 XMLString str = new XMLString(); 3272 fAttributeCacheUsedCount++; 3273 attributeValueCache.add(str); 3274 return str; 3275 } 3276 } 3277 3278 /** 3279 * Implements XMLBufferListener interface. 3280 */ 3281 3282 public void refresh(){ 3283 refresh(0); 3284 } 3285 3286 /** 3287 * receives callbacks from {@link XMLEntityReader } when buffer 3288 * is being changed. 3289 * @param refreshPosition 3290 */ 3291 public void refresh(int refreshPosition){ 3292 //If you are reading attributes and you got a callback 3293 //cache available attributes. 3294 if(fReadingAttributes){ 3295 fAttributes.refresh(); 3296 } 3297 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3298 //since fTempString directly matches to the underlying main buffer 3299 //store the data into buffer 3300 fContentBuffer.append(fTempString); 3301 //clear the XMLString so that data can't be added again. 3302 fTempString.length = 0; 3303 fUsebuffer = true; 3304 } 3305 } 3306 3307 } // class XMLDocumentFragmentScannerImpl