1 /* 2 * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException; 25 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 26 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 27 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 28 import com.sun.org.apache.xerces.internal.util.XMLChar; 29 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 30 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 31 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 32 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 34 import com.sun.org.apache.xerces.internal.xni.Augmentations; 35 import com.sun.org.apache.xerces.internal.xni.QName; 36 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 37 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 46 import com.sun.xml.internal.stream.XMLBufferListener; 47 import com.sun.xml.internal.stream.XMLEntityStorage; 48 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 49 import java.io.CharConversionException; 50 import java.io.EOFException; 51 import java.io.IOException; 52 import javax.xml.XMLConstants; 53 import javax.xml.stream.XMLInputFactory; 54 import javax.xml.stream.XMLStreamConstants; 55 import javax.xml.stream.events.XMLEvent; 56 import jdk.xml.internal.JdkXmlUtils; 57 import jdk.xml.internal.SecuritySupport; 58 59 /** 60 * 61 * This class is responsible for scanning the structure and content 62 * of document fragments. 63 * 64 * This class has been modified as per the new design which is more suited to 65 * efficiently build pull parser. Lot of improvements have been done and 66 * the code has been added to support stax functionality/features. 67 * 68 * @author Neeraj Bajaj SUN Microsystems 69 * @author K.Venugopal SUN Microsystems 70 * @author Glenn Marcy, IBM 71 * @author Andy Clark, IBM 72 * @author Arnaud Le Hors, IBM 73 * @author Eric Ye, IBM 74 * @author Sunitha Reddy, SUN Microsystems 75 * 76 * @LastModified: Jan 2019 77 */ 78 public class XMLDocumentFragmentScannerImpl 79 extends XMLScanner 80 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 81 82 // 83 // Constants 84 // 85 86 protected int fElementAttributeLimit, fXMLNameLimit; 87 88 /** External subset resolver. **/ 89 protected ExternalSubsetResolver fExternalSubsetResolver; 90 91 // scanner states 92 93 //XXX this should be divided into more states. 94 /** Scanner state: start of markup. */ 95 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 96 97 /** Scanner state: content. */ 98 protected static final int SCANNER_STATE_CONTENT = 22; 99 100 /** Scanner state: processing instruction. */ 101 protected static final int SCANNER_STATE_PI = 23; 102 103 /** Scanner state: DOCTYPE. */ 104 protected static final int SCANNER_STATE_DOCTYPE = 24; 105 106 /** Scanner state: XML Declaration */ 107 protected static final int SCANNER_STATE_XML_DECL = 25; 108 109 /** Scanner state: root element. */ 110 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 111 112 /** Scanner state: comment. */ 113 protected static final int SCANNER_STATE_COMMENT = 27; 114 115 /** Scanner state: reference. */ 116 protected static final int SCANNER_STATE_REFERENCE = 28; 117 118 // <book type="hard"> reading attribute name 'type' 119 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 120 121 // <book type="hard"> //reading attribute value. 122 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 123 124 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 125 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 126 127 /** Scanner state: end of input. */ 128 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 129 130 /** Scanner state: terminated. */ 131 protected static final int SCANNER_STATE_TERMINATED = 34; 132 133 /** Scanner state: CDATA section. */ 134 protected static final int SCANNER_STATE_CDATA = 35; 135 136 /** Scanner state: Text declaration. */ 137 protected static final int SCANNER_STATE_TEXT_DECL = 36; 138 139 /** Scanner state: Text declaration. */ 140 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 141 142 //<book type="hard">foo</book> 143 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 144 145 //<book type="hard">foo</book> reading </book> 146 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 147 148 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 149 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 150 151 // feature identifiers 152 153 154 /** Feature identifier: notify built-in refereces. */ 155 protected static final String NOTIFY_BUILTIN_REFS = 156 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 157 158 /** Property identifier: entity resolver. */ 159 protected static final String ENTITY_RESOLVER = 160 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 161 162 /** Feature identifier: standard uri conformant */ 163 protected static final String STANDARD_URI_CONFORMANT = 164 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 165 166 /** Feature id: create entity ref nodes. */ 167 protected static final String CREATE_ENTITY_REF_NODES = 168 Constants.XERCES_FEATURE_PREFIX + Constants.CREATE_ENTITY_REF_NODES_FEATURE; 169 170 /** Property identifier: Security property manager. */ 171 private static final String XML_SECURITY_PROPERTY_MANAGER = 172 Constants.XML_SECURITY_PROPERTY_MANAGER; 173 174 /** access external dtd: file protocol 175 * For DOM/SAX, the secure feature is set to true by default 176 */ 177 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 178 179 // recognized features and properties 180 181 /** Recognized features. */ 182 private static final String[] RECOGNIZED_FEATURES = { 183 NAMESPACES, 184 VALIDATION, 185 NOTIFY_BUILTIN_REFS, 186 NOTIFY_CHAR_REFS, 187 Constants.STAX_REPORT_CDATA_EVENT, 188 XMLConstants.USE_CATALOG 189 }; 190 191 /** Feature defaults. */ 192 private static final Boolean[] FEATURE_DEFAULTS = { 193 Boolean.TRUE, 194 null, 195 Boolean.FALSE, 196 Boolean.FALSE, 197 Boolean.TRUE, 198 JdkXmlUtils.USE_CATALOG_DEFAULT 199 }; 200 201 /** Recognized properties. */ 202 private static final String[] RECOGNIZED_PROPERTIES = { 203 SYMBOL_TABLE, 204 ERROR_REPORTER, 205 ENTITY_MANAGER, 206 XML_SECURITY_PROPERTY_MANAGER, 207 JdkXmlUtils.CATALOG_DEFER, 208 JdkXmlUtils.CATALOG_FILES, 209 JdkXmlUtils.CATALOG_PREFER, 210 JdkXmlUtils.CATALOG_RESOLVE, 211 JdkXmlUtils.CDATA_CHUNK_SIZE 212 }; 213 214 /** Property defaults. */ 215 private static final Object[] PROPERTY_DEFAULTS = { 216 null, 217 null, 218 null, 219 null, 220 null, 221 null, 222 null, 223 null, 224 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT 225 }; 226 227 228 private static final char [] CDATA = {'[','C','D','A','T','A','['}; 229 static final char [] XMLDECL = {'<','?','x','m','l'}; 230 // private static final char [] endTag = {'<','/'}; 231 // debugging 232 233 /** Debug scanner state. */ 234 private static final boolean DEBUG_SCANNER_STATE = false; 235 236 /** Debug driver. */ 237 private static final boolean DEBUG_DISPATCHER = false; 238 239 /** Debug content driver scanning. */ 240 protected static final boolean DEBUG_START_END_ELEMENT = false; 241 242 /** Debug driver next */ 243 protected static final boolean DEBUG = false; 244 245 // 246 // Data 247 // 248 249 // protected data 250 251 /** Document handler. */ 252 protected XMLDocumentHandler fDocumentHandler; 253 protected int fScannerLastState ; 254 255 /** Entity Storage */ 256 protected XMLEntityStorage fEntityStore; 257 258 /** Entity stack. */ 259 protected int[] fEntityStack = new int[4]; 260 261 /** Markup depth. */ 262 protected int fMarkupDepth; 263 264 //is the element empty 265 protected boolean fEmptyElement ; 266 267 //track if we are reading attributes, this is usefule while 268 //there is a callback 269 protected boolean fReadingAttributes = false; 270 271 /** Scanner state. */ 272 protected int fScannerState; 273 274 /** SubScanner state: inside scanContent method. */ 275 protected boolean fInScanContent = false; 276 protected boolean fLastSectionWasCData = false; 277 protected boolean fCDataStart = false; 278 protected boolean fInCData = false; 279 protected boolean fCDataEnd = false; 280 protected boolean fLastSectionWasEntityReference = false; 281 protected boolean fLastSectionWasCharacterData = false; 282 283 /** has external dtd */ 284 protected boolean fHasExternalDTD; 285 286 /** Standalone. */ 287 protected boolean fStandaloneSet; 288 protected boolean fStandalone; 289 protected String fVersion; 290 291 // element information 292 293 /** Current element. */ 294 protected QName fCurrentElement; 295 296 /** Element stack. */ 297 protected ElementStack fElementStack = new ElementStack(); 298 protected ElementStack2 fElementStack2 = new ElementStack2(); 299 300 // other info 301 302 /** Document system identifier. 303 * REVISIT: So what's this used for? - NG 304 * protected String fDocumentSystemId; 305 ******/ 306 307 protected String fPITarget ; 308 309 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 310 protected XMLString fPIData = new XMLString(); 311 312 // features 313 314 315 /** Notify built-in references. */ 316 protected boolean fNotifyBuiltInRefs = false; 317 318 //STAX related properties 319 //defaultValues. 320 protected boolean fSupportDTD = true; 321 protected boolean fReplaceEntityReferences = true; 322 protected boolean fSupportExternalEntities = false; 323 protected boolean fReportCdataEvent = false ; 324 protected boolean fIsCoalesce = false ; 325 protected String fDeclaredEncoding = null; 326 /** Xerces Feature: Disallow doctype declaration. */ 327 protected boolean fDisallowDoctype = false; 328 329 /** Create entity reference nodes. */ 330 protected boolean fCreateEntityRefNodes = false; 331 332 /** 333 * CDATA chunk size limit 334 */ 335 private int fChunkSize; 336 337 /** 338 * comma-delimited list of protocols that are allowed for the purpose 339 * of accessing external dtd or entity references 340 */ 341 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 342 343 /** 344 * standard uri conformant (strict uri). 345 * http://apache.org/xml/features/standard-uri-conformant 346 */ 347 protected boolean fStrictURI; 348 349 // drivers 350 351 /** Active driver. */ 352 protected Driver fDriver; 353 354 /** Content driver. */ 355 protected Driver fContentDriver = createContentDriver(); 356 357 // temporary variables 358 359 /** Element QName. */ 360 protected QName fElementQName = new QName(); 361 362 /** Attribute QName. */ 363 protected QName fAttributeQName = new QName(); 364 365 /** 366 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 367 * implements Iterator interface so we can directly give Attributes in the form of 368 * iterator. 369 */ 370 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 371 372 373 /** String. */ 374 protected XMLString fTempString = new XMLString(); 375 376 /** String. */ 377 protected XMLString fTempString2 = new XMLString(); 378 379 /** Array of 3 strings. */ 380 private final String[] fStrings = new String[3]; 381 382 /** Making the buffer accessible to derived class -- String buffer. */ 383 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 384 385 /** Making the buffer accessible to derived class -- String buffer. */ 386 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 387 388 /** stores character data. */ 389 /** Making the buffer accessible to derived class -- stores PI data */ 390 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 391 392 /** Single character array. */ 393 private final char[] fSingleChar = new char[1]; 394 private String fCurrentEntityName = null; 395 396 // New members 397 protected boolean fScanToEnd = false; 398 399 protected DTDGrammarUtil dtdGrammarUtil= null; 400 401 protected boolean fAddDefaultAttr = false; 402 403 protected boolean foundBuiltInRefs = false; 404 405 /** Built-in reference character event */ 406 protected boolean builtInRefCharacterHandled = false; 407 408 //skip element algorithm 409 static final short MAX_DEPTH_LIMIT = 5 ; 410 static final short ELEMENT_ARRAY_LENGTH = 200 ; 411 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 412 static final boolean DEBUG_SKIP_ALGORITHM = false; 413 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 414 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 415 //pointer location where last element was skipped 416 short fLastPointerLocation = 0 ; 417 short fElementPointer = 0 ; 418 //2D array to store pointer info 419 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 420 protected String fElementRawname ; 421 protected boolean fShouldSkip = false; 422 protected boolean fAdd = false ; 423 protected boolean fSkip = false; 424 425 /** Reusable Augmentations. */ 426 private Augmentations fTempAugmentations = null; 427 // 428 // Constructors 429 // 430 431 /** Default constructor. */ 432 public XMLDocumentFragmentScannerImpl() { 433 } // <init>() 434 435 // 436 // XMLDocumentScanner methods 437 // 438 439 /** 440 * Sets the input source. 441 * 442 * @param inputSource The input source. 443 * 444 * @throws IOException Thrown on i/o error. 445 */ 446 public void setInputSource(XMLInputSource inputSource) throws IOException { 447 fEntityManager.setEntityHandler(this); 448 fEntityManager.startEntity(false, "$fragment$", inputSource, false, true); 449 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 450 } // setInputSource(XMLInputSource) 451 452 /** 453 * Scans a document. 454 * 455 * @param complete True if the scanner should scan the document 456 * completely, pushing all events to the registered 457 * document handler. A value of false indicates that 458 * that the scanner should only scan the next portion 459 * of the document and return. A scanner instance is 460 * permitted to completely scan a document if it does 461 * not support this "pull" scanning model. 462 * 463 * @return True if there is more to scan, false otherwise. 464 */ 465 public boolean scanDocument(boolean complete) 466 throws IOException, XNIException { 467 468 // keep dispatching "events" 469 fEntityManager.setEntityHandler(this); 470 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 471 472 int event = next(); 473 do { 474 switch (event) { 475 case XMLStreamConstants.START_DOCUMENT : 476 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 477 break; 478 case XMLStreamConstants.START_ELEMENT : 479 //System.out.println(" in scann element"); 480 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 481 break; 482 case XMLStreamConstants.CHARACTERS : 483 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 484 fDocumentHandler.characters(getCharacterData(),null); 485 break; 486 case XMLStreamConstants.SPACE: 487 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 488 //System.out.println("in the space"); 489 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 490 break; 491 case XMLStreamConstants.ENTITY_REFERENCE : 492 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 493 //entity reference callback are given in startEntity 494 break; 495 case XMLStreamConstants.PROCESSING_INSTRUCTION : 496 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 497 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 498 break; 499 case XMLStreamConstants.COMMENT : 500 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 501 fDocumentHandler.comment(getCharacterData(),null); 502 break; 503 case XMLStreamConstants.DTD : 504 //all DTD related callbacks are handled in DTDScanner. 505 //1. Stax doesn't define DTD states as it does for XML Document. 506 //therefore we don't need to take care of anything here. So Just break; 507 break; 508 case XMLStreamConstants.CDATA: 509 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 510 if (fCDataStart) { 511 fDocumentHandler.startCDATA(null); 512 fCDataStart = false; 513 fInCData = true; 514 } 515 516 fDocumentHandler.characters(getCharacterData(),null); 517 if (fCDataEnd) { 518 fDocumentHandler.endCDATA(null); 519 fCDataEnd = false; 520 } 521 break; 522 case XMLStreamConstants.NOTATION_DECLARATION : 523 break; 524 case XMLStreamConstants.ENTITY_DECLARATION : 525 break; 526 case XMLStreamConstants.NAMESPACE : 527 break; 528 case XMLStreamConstants.ATTRIBUTE : 529 break; 530 case XMLStreamConstants.END_ELEMENT : 531 //do not give callback here. 532 //this callback is given in scanEndElement function. 533 //fDocumentHandler.endElement(getElementQName(),null); 534 break; 535 default : 536 // Errors should have already been handled by the Scanner 537 return false; 538 539 } 540 //System.out.println("here in before calling next"); 541 event = next(); 542 //System.out.println("here in after calling next"); 543 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 544 545 if(event == XMLStreamConstants.END_DOCUMENT) { 546 fDocumentHandler.endDocument(null); 547 return false; 548 } 549 550 return true; 551 552 } // scanDocument(boolean):boolean 553 554 555 556 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 557 if(fScannerLastState == XMLEvent.END_ELEMENT){ 558 fElementQName.setValues(fElementStack.getLastPoppedElement()); 559 } 560 return fElementQName ; 561 } 562 563 /** return the next state on the input 564 * @return int 565 */ 566 567 public int next() throws IOException, XNIException { 568 return fDriver.next(); 569 } 570 571 // 572 // XMLComponent methods 573 // 574 575 /** 576 * Resets the component. The component can query the component manager 577 * about any features and properties that affect the operation of the 578 * component. 579 * 580 * @param componentManager The component manager. 581 * 582 * @throws SAXException Thrown by component on initialization error. 583 * For example, if a feature or property is 584 * required for the operation of the component, the 585 * component manager may throw a 586 * SAXNotRecognizedException or a 587 * SAXNotSupportedException. 588 */ 589 590 public void reset(XMLComponentManager componentManager) 591 throws XMLConfigurationException { 592 593 super.reset(componentManager); 594 595 // other settings 596 // fDocumentSystemId = null; 597 598 // sax features 599 //fAttributes.setNamespaces(fNamespaces); 600 601 // xerces features 602 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 603 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 604 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 605 606 fCreateEntityRefNodes = componentManager.getFeature(CREATE_ENTITY_REF_NODES, fCreateEntityRefNodes); 607 608 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 609 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 610 (ExternalSubsetResolver) resolver : null; 611 612 //attribute 613 fReadingAttributes = false; 614 //xxx: external entities are supported in Xerces 615 // it would be good to define feature for this case 616 fSupportExternalEntities = true; 617 fReplaceEntityReferences = true; 618 fIsCoalesce = false; 619 620 // setup Driver 621 setScannerState(SCANNER_STATE_CONTENT); 622 setDriver(fContentDriver); 623 624 // JAXP 1.5 features and properties 625 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 626 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 627 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 628 629 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 630 fChunkSize = JdkXmlUtils.getValue(componentManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 631 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 632 633 resetCommon(); 634 //fEntityManager.test(); 635 } // reset(XMLComponentManager) 636 637 638 public void reset(PropertyManager propertyManager){ 639 640 super.reset(propertyManager); 641 642 // other settings 643 // fDocumentSystemId = null; 644 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)); 645 fNotifyBuiltInRefs = false ; 646 647 //fElementStack2.clear(); 648 //fReplaceEntityReferences = true; 649 //fSupportExternalEntities = true; 650 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 651 fReplaceEntityReferences = bo; 652 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 653 fSupportExternalEntities = bo; 654 Boolean cdata = (Boolean)propertyManager.getProperty( 655 Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 656 if(cdata != null) 657 fReportCdataEvent = cdata ; 658 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 659 if(coalesce != null) 660 fIsCoalesce = coalesce; 661 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 662 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 663 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 664 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 665 // setup Driver 666 //we dont need to do this -- nb. 667 //setScannerState(SCANNER_STATE_CONTENT); 668 //setDriver(fContentDriver); 669 //fEntityManager.test(); 670 671 // JAXP 1.5 features and properties 672 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 673 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 674 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 675 676 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 677 fChunkSize = JdkXmlUtils.getValue(propertyManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 678 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 679 resetCommon(); 680 } // reset(XMLComponentManager) 681 682 void resetCommon() { 683 // initialize vars 684 fMarkupDepth = 0; 685 fCurrentElement = null; 686 fElementStack.clear(); 687 fHasExternalDTD = false; 688 fStandaloneSet = false; 689 fStandalone = false; 690 fInScanContent = false; 691 //skipping algorithm 692 fShouldSkip = false; 693 fAdd = false; 694 fSkip = false; 695 696 fEntityStore = fEntityManager.getEntityStore(); 697 dtdGrammarUtil = null; 698 699 if (fSecurityManager != null) { 700 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 701 fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT); 702 } else { 703 fElementAttributeLimit = 0; 704 fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue(); 705 } 706 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 707 } 708 709 /** 710 * Returns a list of feature identifiers that are recognized by 711 * this component. This method may return null if no features 712 * are recognized by this component. 713 */ 714 public String[] getRecognizedFeatures() { 715 return RECOGNIZED_FEATURES.clone(); 716 } // getRecognizedFeatures():String[] 717 718 /** 719 * Sets the state of a feature. This method is called by the component 720 * manager any time after reset when a feature changes state. 721 * <p> 722 * <strong>Note:</strong> Components should silently ignore features 723 * that do not affect the operation of the component. 724 * 725 * @param featureId The feature identifier. 726 * @param state The state of the feature. 727 * 728 * @throws SAXNotRecognizedException The component should not throw 729 * this exception. 730 * @throws SAXNotSupportedException The component should not throw 731 * this exception. 732 */ 733 public void setFeature(String featureId, boolean state) 734 throws XMLConfigurationException { 735 736 super.setFeature(featureId, state); 737 738 // Xerces properties 739 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 740 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 741 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 742 fNotifyBuiltInRefs = state; 743 } 744 } 745 746 } // setFeature(String,boolean) 747 748 /** 749 * Returns a list of property identifiers that are recognized by 750 * this component. This method may return null if no properties 751 * are recognized by this component. 752 */ 753 public String[] getRecognizedProperties() { 754 return RECOGNIZED_PROPERTIES.clone(); 755 } // getRecognizedProperties():String[] 756 757 /** 758 * Sets the value of a property. This method is called by the component 759 * manager any time after reset when a property changes value. 760 * <p> 761 * <strong>Note:</strong> Components should silently ignore properties 762 * that do not affect the operation of the component. 763 * 764 * @param propertyId The property identifier. 765 * @param value The value of the property. 766 * 767 * @throws SAXNotRecognizedException The component should not throw 768 * this exception. 769 * @throws SAXNotSupportedException The component should not throw 770 * this exception. 771 */ 772 public void setProperty(String propertyId, Object value) 773 throws XMLConfigurationException { 774 775 super.setProperty(propertyId, value); 776 777 // Xerces properties 778 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 779 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 780 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 781 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 782 fEntityManager = (XMLEntityManager)value; 783 return; 784 } 785 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 786 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 787 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 788 (ExternalSubsetResolver) value : null; 789 return; 790 } 791 } 792 793 794 // Xerces properties 795 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 796 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 797 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 798 fEntityManager = (XMLEntityManager)value; 799 } 800 return; 801 } 802 803 //JAXP 1.5 properties 804 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 805 { 806 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 807 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 808 } 809 810 } // setProperty(String,Object) 811 812 /** 813 * Returns the default state for a feature, or null if this 814 * component does not want to report a default value for this 815 * feature. 816 * 817 * @param featureId The feature identifier. 818 * 819 * @since Xerces 2.2.0 820 */ 821 public Boolean getFeatureDefault(String featureId) { 822 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 823 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 824 return FEATURE_DEFAULTS[i]; 825 } 826 } 827 return null; 828 } // getFeatureDefault(String):Boolean 829 830 /** 831 * Returns the default state for a property, or null if this 832 * component does not want to report a default value for this 833 * property. 834 * 835 * @param propertyId The property identifier. 836 * 837 * @since Xerces 2.2.0 838 */ 839 public Object getPropertyDefault(String propertyId) { 840 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 841 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 842 return PROPERTY_DEFAULTS[i]; 843 } 844 } 845 return null; 846 } // getPropertyDefault(String):Object 847 848 // 849 // XMLDocumentSource methods 850 // 851 852 /** 853 * setDocumentHandler 854 * 855 * @param documentHandler 856 */ 857 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 858 fDocumentHandler = documentHandler; 859 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 860 } // setDocumentHandler(XMLDocumentHandler) 861 862 863 /** Returns the document handler */ 864 public XMLDocumentHandler getDocumentHandler(){ 865 return fDocumentHandler; 866 } 867 868 // 869 // XMLEntityHandler methods 870 // 871 872 /** 873 * This method notifies of the start of an entity. The DTD has the 874 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 875 * general entities are just specified by their name. 876 * 877 * @param name The name of the entity. 878 * @param identifier The resource identifier. 879 * @param encoding The auto-detected IANA encoding name of the entity 880 * stream. This value will be null in those situations 881 * where the entity encoding is not auto-detected (e.g. 882 * internal entities or a document entity that is 883 * parsed from a java.io.Reader). 884 * @param augs Additional information that may include infoset augmentations 885 * 886 * @throws XNIException Thrown by handler to signal an error. 887 */ 888 public void startEntity(String name, 889 XMLResourceIdentifier identifier, 890 String encoding, Augmentations augs) throws XNIException { 891 892 // keep track of this entity before fEntityDepth is increased 893 if (fEntityDepth == fEntityStack.length) { 894 int[] entityarray = new int[fEntityStack.length * 2]; 895 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 896 fEntityStack = entityarray; 897 } 898 fEntityStack[fEntityDepth] = fMarkupDepth; 899 900 super.startEntity(name, identifier, encoding, augs); 901 902 // WFC: entity declared in external subset in standalone doc 903 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 904 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 905 new Object[]{name}); 906 } 907 908 /** we are not calling the handlers yet.. */ 909 // call handler 910 if (fDocumentHandler != null && !fScanningAttribute) { 911 if (!name.equals("[xml]")) { 912 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 913 } 914 } 915 916 } // startEntity(String,XMLResourceIdentifier,String) 917 918 /** 919 * This method notifies the end of an entity. The DTD has the pseudo-name 920 * of "[dtd]" parameter entity names start with '%'; and general entities 921 * are just specified by their name. 922 * 923 * @param name The name of the entity. 924 * @param augs Additional information that may include infoset augmentations 925 * 926 * @throws XNIException Thrown by handler to signal an error. 927 */ 928 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 929 930 /** 931 * // flush possible pending output buffer - see scanContent 932 * if (fInScanContent && fStringBuffer.length != 0 933 * && fDocumentHandler != null) { 934 * fDocumentHandler.characters(fStringBuffer, null); 935 * fStringBuffer.length = 0; // make sure we know it's been flushed 936 * } 937 */ 938 super.endEntity(name, augs); 939 940 // make sure markup is properly balanced 941 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 942 reportFatalError("MarkupEntityMismatch", null); 943 } 944 945 /**/ 946 // call handler 947 if (fDocumentHandler != null && !fScanningAttribute) { 948 if (!name.equals("[xml]")) { 949 fDocumentHandler.endGeneralEntity(name, augs); 950 } 951 } 952 953 954 } // endEntity(String) 955 956 // 957 // Protected methods 958 // 959 960 // Driver factory methods 961 962 /** Creates a content Driver. */ 963 protected Driver createContentDriver() { 964 return new FragmentContentDriver(); 965 } // createContentDriver():Driver 966 967 // scanning methods 968 969 /** 970 * Scans an XML or text declaration. 971 * <p> 972 * <pre> 973 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 974 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 975 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 976 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 977 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 978 * | ('"' ('yes' | 'no') '"')) 979 * 980 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 981 * </pre> 982 * 983 * @param scanningTextDecl True if a text declaration is to 984 * be scanned instead of an XML 985 * declaration. 986 */ 987 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 988 throws IOException, XNIException { 989 990 // scan decl 991 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 992 fMarkupDepth--; 993 994 // pseudo-attribute values 995 String version = fStrings[0]; 996 String encoding = fStrings[1]; 997 String standalone = fStrings[2]; 998 fDeclaredEncoding = encoding; 999 // set standalone 1000 fStandaloneSet = standalone != null; 1001 fStandalone = fStandaloneSet && standalone.equals("yes"); 1002 ///xxx see where its used.. this is not used anywhere. 1003 //it may be useful for entity to store this information 1004 //but this information is only related with Document Entity. 1005 fEntityManager.setStandalone(fStandalone); 1006 1007 1008 // call handler 1009 if (fDocumentHandler != null) { 1010 if (scanningTextDecl) { 1011 fDocumentHandler.textDecl(version, encoding, null); 1012 } else { 1013 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 1014 } 1015 } 1016 1017 if(version != null){ 1018 fEntityScanner.setVersion(version); 1019 fEntityScanner.setXMLVersion(version); 1020 } 1021 // set encoding on reader, only if encoding was not specified by the application explicitly 1022 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 1023 fEntityScanner.setEncoding(encoding); 1024 } 1025 1026 } // scanXMLDeclOrTextDecl(boolean) 1027 1028 public String getPITarget(){ 1029 return fPITarget ; 1030 } 1031 1032 public XMLStringBuffer getPIData(){ 1033 return fContentBuffer ; 1034 } 1035 1036 //XXX: why not this function behave as per the state of the parser? 1037 public XMLString getCharacterData(){ 1038 if(fUsebuffer){ 1039 return fContentBuffer ; 1040 }else{ 1041 return fTempString; 1042 } 1043 1044 } 1045 1046 1047 /** 1048 * Scans a processing data. This is needed to handle the situation 1049 * where a document starts with a processing instruction whose 1050 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1051 * 1052 * @param target The PI target 1053 * @param data The XMLStringBuffer to fill in with the data 1054 */ 1055 protected void scanPIData(String target, XMLStringBuffer data) 1056 throws IOException, XNIException { 1057 1058 super.scanPIData(target, data); 1059 1060 //set the PI target and values 1061 fPITarget = target ; 1062 1063 fMarkupDepth--; 1064 1065 } // scanPIData(String) 1066 1067 /** 1068 * Scans a comment. 1069 * <p> 1070 * <pre> 1071 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1072 * </pre> 1073 * <p> 1074 * <strong>Note:</strong> Called after scanning past '<!--' 1075 */ 1076 protected void scanComment() throws IOException, XNIException { 1077 fContentBuffer.clear(); 1078 scanComment(fContentBuffer); 1079 //getTextCharacters can also be called for reading comments 1080 fUsebuffer = true; 1081 fMarkupDepth--; 1082 1083 } // scanComment() 1084 1085 //xxx value returned by this function may not remain valid if another event is scanned. 1086 public String getComment(){ 1087 return fContentBuffer.toString(); 1088 } 1089 1090 void addElement(String rawname){ 1091 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1092 //storing element raw name in a linear list of array 1093 fElementArray[fElementPointer] = rawname ; 1094 //storing elemnetPointer for particular element depth 1095 1096 if(DEBUG_SKIP_ALGORITHM){ 1097 StringBuffer sb = new StringBuffer() ; 1098 sb.append(" Storing element information ") ; 1099 sb.append(" fElementPointer = " + fElementPointer) ; 1100 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1101 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1102 System.out.println(sb.toString()) ; 1103 } 1104 1105 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1106 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1107 short column = storePointerForADepth(fElementPointer); 1108 if(column > 0){ 1109 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1110 //identity comparison shouldn't take much time and we can rely on this 1111 //since its guaranteed to have same object id for same string. 1112 if(rawname == fElementArray[pointer]){ 1113 fShouldSkip = true ; 1114 fLastPointerLocation = pointer ; 1115 //reset the things and return. 1116 resetPointer((short)fElementStack.fDepth , column) ; 1117 fElementArray[fElementPointer] = null ; 1118 return ; 1119 }else{ 1120 fShouldSkip = false ; 1121 } 1122 } 1123 } 1124 fElementPointer++ ; 1125 } 1126 } 1127 1128 1129 void resetPointer(short depth, short column){ 1130 fPointerInfo[depth] [column] = (short)0; 1131 } 1132 1133 //returns column information at which pointer was stored. 1134 short storePointerForADepth(short elementPointer){ 1135 short depth = (short) fElementStack.fDepth ; 1136 1137 //Stores element pointer locations at particular depth , only 4 pointer locations 1138 //are stored at particular depth for now. 1139 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1140 1141 if(canStore(depth, i)){ 1142 fPointerInfo[depth][i] = elementPointer ; 1143 if(DEBUG_SKIP_ALGORITHM){ 1144 StringBuffer sb = new StringBuffer() ; 1145 sb.append(" Pointer information ") ; 1146 sb.append(" fElementPointer = " + fElementPointer) ; 1147 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1148 sb.append(" column = " + i ) ; 1149 System.out.println(sb.toString()) ; 1150 } 1151 return i; 1152 } 1153 //else 1154 //pointer was not stored because we reached the limit 1155 } 1156 return -1 ; 1157 } 1158 1159 boolean canStore(short depth, short column){ 1160 //colum = 0 , means first element at particular depth 1161 //column = 1, means second element at particular depth 1162 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1163 return fPointerInfo[depth][column] == 0 ? true : false ; 1164 } 1165 1166 1167 short getElementPointer(short depth, short column){ 1168 //colum = 0 , means first element at particular depth 1169 //column = 1, means second element at particular depth 1170 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1171 return fPointerInfo[depth][column] ; 1172 } 1173 1174 //this function assumes that string passed is not null and skips 1175 //the following string from the buffer this makes sure 1176 boolean skipFromTheBuffer(String rawname) throws IOException{ 1177 if(fEntityScanner.skipString(rawname)){ 1178 char c = (char)fEntityScanner.peekChar() ; 1179 //If the start element was completely skipped we should encounter either ' '(space), 1180 //or '/' (in case of empty element) or '>' 1181 if( c == ' ' || c == '/' || c == '>'){ 1182 fElementRawname = rawname ; 1183 return true ; 1184 } else{ 1185 return false; 1186 } 1187 } else 1188 return false ; 1189 } 1190 1191 boolean skipQElement(String rawname) throws IOException{ 1192 1193 final int c = fEntityScanner.getChar(rawname.length()); 1194 //if this character is still valid element name -- this means string can't match 1195 if(XMLChar.isName(c)){ 1196 return false; 1197 }else{ 1198 return fEntityScanner.skipString(rawname); 1199 } 1200 } 1201 1202 protected boolean skipElement() throws IOException { 1203 1204 if(!fShouldSkip) return false ; 1205 1206 if(fLastPointerLocation != 0){ 1207 //Look at the next element stored in the array list.. we might just get a match. 1208 String rawname = fElementArray[fLastPointerLocation + 1] ; 1209 if(rawname != null && skipFromTheBuffer(rawname)){ 1210 fLastPointerLocation++ ; 1211 if(DEBUG_SKIP_ALGORITHM){ 1212 System.out.println("Element " + fElementRawname + 1213 " was SKIPPED at pointer location = " + fLastPointerLocation); 1214 } 1215 return true ; 1216 } else{ 1217 //reset it back to zero... we haven't got the correct subset yet. 1218 fLastPointerLocation = 0 ; 1219 1220 } 1221 } 1222 //xxx: we can put some logic here as from what column it should start looking 1223 //for now we always start at 0 1224 //fallback to tolerant algorithm, it would look for differnt element stored at different 1225 //depth and get us the pointer location. 1226 return fShouldSkip && skipElement((short)0); 1227 1228 } 1229 1230 //start of the column at which it should try searching 1231 boolean skipElement(short column) throws IOException { 1232 short depth = (short)fElementStack.fDepth ; 1233 1234 if(depth > MAX_DEPTH_LIMIT){ 1235 return fShouldSkip = false ; 1236 } 1237 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1238 short pointer = getElementPointer(depth , i ) ; 1239 1240 if(pointer == 0){ 1241 return fShouldSkip = false ; 1242 } 1243 1244 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1245 if(DEBUG_SKIP_ALGORITHM){ 1246 System.out.println(); 1247 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + 1248 fElementStack.fDepth + " column = " + column ); 1249 System.out.println(); 1250 } 1251 fLastPointerLocation = pointer ; 1252 return fShouldSkip = true ; 1253 } 1254 } 1255 return fShouldSkip = false ; 1256 } 1257 1258 /** 1259 * Scans a start element. This method will handle the binding of 1260 * namespace information and notifying the handler of the start 1261 * of the element. 1262 * <p> 1263 * <pre> 1264 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1265 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1266 * </pre> 1267 * <p> 1268 * <strong>Note:</strong> This method assumes that the leading 1269 * '<' character has been consumed. 1270 * <p> 1271 * <strong>Note:</strong> This method uses the fElementQName and 1272 * fAttributes variables. The contents of these variables will be 1273 * destroyed. The caller should copy important information out of 1274 * these variables before calling this method. 1275 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1276 * 1277 * @return True if element is empty. (i.e. It matches 1278 * production [44]. 1279 */ 1280 // fElementQName will have the details of element just read.. 1281 // fAttributes will have the details of all the attributes. 1282 protected boolean scanStartElement() 1283 throws IOException, XNIException { 1284 1285 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1286 //when skipping is true and no more elements should be added 1287 if(fSkip && !fAdd){ 1288 //get the stored element -- if everything goes right this should match the 1289 //token in the buffer 1290 1291 QName name = fElementStack.getNext(); 1292 1293 if(DEBUG_SKIP_ALGORITHM){ 1294 System.out.println("Trying to skip String = " + name.rawname); 1295 } 1296 1297 //Be conservative -- if skipping fails -- stop. 1298 fSkip = fEntityScanner.skipString(name.rawname); 1299 1300 if(fSkip){ 1301 if(DEBUG_SKIP_ALGORITHM){ 1302 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1303 } 1304 fElementStack.push(); 1305 fElementQName = name; 1306 }else{ 1307 //if skipping fails reposition the stack or fallback to normal way of processing 1308 fElementStack.reposition(); 1309 if(DEBUG_SKIP_ALGORITHM){ 1310 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1311 } 1312 } 1313 } 1314 1315 //we are still at the stage of adding elements 1316 //the elements were not matched or 1317 //fSkip is not set to true 1318 if(!fSkip || fAdd){ 1319 //get the next element from the stack 1320 fElementQName = fElementStack.nextElement(); 1321 // name 1322 if (fNamespaces) { 1323 fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART); 1324 } else { 1325 String name = fEntityScanner.scanName(NameType.ELEMENTSTART); 1326 fElementQName.setValues(null, name, name, null); 1327 } 1328 1329 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1330 if(DEBUG_SKIP_ALGORITHM){ 1331 if(fAdd){ 1332 System.out.println("Elements are being ADDED -- elemet added is = " + 1333 fElementQName.rawname + " at count = " + fElementStack.fCount); 1334 } 1335 } 1336 1337 } 1338 1339 //when the elements are being added , we need to check if we are set for skipping the elements 1340 if(fAdd){ 1341 //this sets the value of fAdd variable 1342 fElementStack.matchElement(fElementQName); 1343 } 1344 1345 1346 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1347 fCurrentElement = fElementQName; 1348 1349 String rawname = fElementQName.rawname; 1350 1351 fEmptyElement = false; 1352 1353 fAttributes.removeAllAttributes(); 1354 1355 checkDepth(rawname); 1356 if(!seekCloseOfStartTag()){ 1357 fReadingAttributes = true; 1358 fAttributeCacheUsedCount =0; 1359 fStringBufferIndex =0; 1360 fAddDefaultAttr = true; 1361 do { 1362 scanAttribute(fAttributes); 1363 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1364 fAttributes.getLength() > fElementAttributeLimit){ 1365 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1366 "ElementAttributeLimit", 1367 new Object[]{rawname, fElementAttributeLimit }, 1368 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1369 } 1370 1371 } while (!seekCloseOfStartTag()); 1372 fReadingAttributes=false; 1373 } 1374 1375 if (fEmptyElement) { 1376 //decrease the markup depth.. 1377 fMarkupDepth--; 1378 1379 // check that this element was opened in the same entity 1380 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1381 reportFatalError("ElementEntityMismatch", 1382 new Object[]{fCurrentElement.rawname}); 1383 } 1384 // call handler 1385 if (fDocumentHandler != null) { 1386 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1387 } 1388 1389 //We should not be popping out the context here in endELement becaause the namespace context is still 1390 //valid when parser is at the endElement state. 1391 //if (fNamespaces) { 1392 // fNamespaceContext.popContext(); 1393 //} 1394 1395 //pop the element off the stack.. 1396 fElementStack.popElement(); 1397 1398 } else { 1399 1400 if(dtdGrammarUtil != null) 1401 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1402 if(fDocumentHandler != null){ 1403 //complete element and attributes are traversed in this function so we can send a callback 1404 //here. 1405 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1406 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1407 } 1408 } 1409 1410 1411 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + 1412 "<<< scanStartElement(): "+fEmptyElement); 1413 return fEmptyElement; 1414 1415 } // scanStartElement():boolean 1416 1417 /** 1418 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1419 * Characters are consumed. 1420 */ 1421 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1422 // spaces 1423 boolean sawSpace = fEntityScanner.skipSpaces(); 1424 1425 // end tag? 1426 final int c = fEntityScanner.peekChar(); 1427 if (c == '>') { 1428 fEntityScanner.scanChar(null); 1429 return true; 1430 } else if (c == '/') { 1431 fEntityScanner.scanChar(null); 1432 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1433 reportFatalError("ElementUnterminated", 1434 new Object[]{fElementQName.rawname}); 1435 } 1436 fEmptyElement = true; 1437 return true; 1438 } else if (!isValidNameStartChar(c) || !sawSpace) { 1439 // Second chance. Check if this character is a high 1440 // surrogate of a valid name start character. 1441 if (!isValidNameStartHighSurrogate(c) || !sawSpace) { 1442 reportFatalError("ElementUnterminated", 1443 new Object[]{fElementQName.rawname}); 1444 } 1445 } 1446 1447 return false; 1448 } 1449 1450 public boolean hasAttributes(){ 1451 return fAttributes.getLength() > 0; 1452 } 1453 1454 /** return the attribute iterator implementation */ 1455 public XMLAttributesIteratorImpl getAttributeIterator(){ 1456 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1457 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1458 fAddDefaultAttr = false; 1459 } 1460 return fAttributes; 1461 } 1462 1463 /** return if standalone is set */ 1464 public boolean standaloneSet(){ 1465 return fStandaloneSet; 1466 } 1467 /** return if the doucment is standalone */ 1468 public boolean isStandAlone(){ 1469 return fStandalone ; 1470 } 1471 /** 1472 * Scans an attribute name value pair. 1473 * <p> 1474 * <pre> 1475 * [41] Attribute ::= Name Eq AttValue 1476 * </pre> 1477 * <p> 1478 * <strong>Note:</strong> This method assumes that the next 1479 * character on the stream is the first character of the attribute 1480 * name. 1481 * <p> 1482 * <strong>Note:</strong> This method uses the fAttributeQName and 1483 * fQName variables. The contents of these variables will be 1484 * destroyed. 1485 * 1486 * @param attributes The attributes list for the scanned attribute. 1487 */ 1488 1489 protected void scanAttribute(XMLAttributes attributes) 1490 throws IOException, XNIException { 1491 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1492 1493 // name 1494 if (fNamespaces) { 1495 fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME); 1496 } else { 1497 String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME); 1498 fAttributeQName.setValues(null, name, name, null); 1499 } 1500 1501 // equals 1502 fEntityScanner.skipSpaces(); 1503 if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) { 1504 reportFatalError("EqRequiredInAttribute", 1505 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1506 } 1507 fEntityScanner.skipSpaces(); 1508 1509 int attIndex = 0 ; 1510 //REVISIT: one more case needs to be included: external PE and standalone is no 1511 boolean isVC = fHasExternalDTD && !fStandalone; 1512 //fTempString would store attribute value 1513 ///fTempString2 would store attribute non-normalized value 1514 1515 //this function doesn't use 'attIndex'. We are adding the attribute later 1516 //after we have figured out that current attribute is not namespace declaration 1517 //since scanAttributeValue doesn't use attIndex parameter therefore we 1518 //can safely add the attribute later.. 1519 XMLString tmpStr = getString(); 1520 1521 scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes, 1522 attIndex, isVC, fCurrentElement.rawname, false); 1523 1524 // content 1525 int oldLen = attributes.getLength(); 1526 //if the attribute name already exists.. new value is replaced with old value 1527 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1528 1529 // WFC: Unique Att Spec 1530 //attributes count will be same if the current attribute name already exists for this element name. 1531 //this means there are two duplicate attributes. 1532 if (oldLen == attributes.getLength()) { 1533 reportFatalError("AttributeNotUnique", 1534 new Object[]{fCurrentElement.rawname, 1535 fAttributeQName.rawname}); 1536 } 1537 1538 //tmpString contains attribute value 1539 //we are passing null as the attribute value 1540 attributes.setValue(attIndex, null, tmpStr); 1541 1542 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1543 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1544 attributes.setSpecified(attIndex, true); 1545 1546 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1547 1548 } // scanAttribute(XMLAttributes) 1549 1550 /** 1551 * Scans element content. 1552 * 1553 * @return Returns the next character on the stream. 1554 */ 1555 //CHANGED: 1556 //EARLIER: scanContent() 1557 //NOW: scanContent(XMLStringBuffer) 1558 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1559 //this function appends the data to the buffer. 1560 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1561 //set the fTempString length to 0 before passing it on to scanContent 1562 //scanContent sets the correct co-ordinates as per the content read 1563 fTempString.length = 0; 1564 int c = fEntityScanner.scanContent(fTempString); 1565 content.append(fTempString); 1566 fTempString.length = 0; 1567 if (c == '\r') { 1568 // happens when there is the character reference 1569 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1570 fEntityScanner.scanChar(null); 1571 content.append((char)c); 1572 c = -1; 1573 } else if (c == ']') { 1574 //fStringBuffer.clear(); 1575 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1576 content.append((char)fEntityScanner.scanChar(null)); 1577 // remember where we are in case we get an endEntity before we 1578 // could flush the buffer out - this happens when we're parsing an 1579 // entity which ends with a ] 1580 fInScanContent = true; 1581 // 1582 // We work on a single character basis to handle cases such as: 1583 // ']]]>' which we might otherwise miss. 1584 // 1585 if (fEntityScanner.skipChar(']', null)) { 1586 content.append(']'); 1587 while (fEntityScanner.skipChar(']', null)) { 1588 content.append(']'); 1589 } 1590 if (fEntityScanner.skipChar('>', null)) { 1591 reportFatalError("CDEndInContent", null); 1592 } 1593 } 1594 fInScanContent = false; 1595 c = -1; 1596 } 1597 if (fDocumentHandler != null && content.length > 0) { 1598 //fDocumentHandler.characters(content, null); 1599 } 1600 return c; 1601 1602 } // scanContent():int 1603 1604 1605 /** 1606 * Scans a CDATA section. 1607 * <p> 1608 * <strong>Note:</strong> This method uses the fTempString and 1609 * fStringBuffer variables. 1610 * 1611 * @param complete True if the CDATA section is to be scanned 1612 * completely. 1613 * 1614 * @return True if CDATA is completely scanned. 1615 */ 1616 //CHANGED: 1617 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1618 throws IOException, XNIException { 1619 1620 // call handler 1621 if (fDocumentHandler != null) { 1622 //fDocumentHandler.startCDATA(null); 1623 } 1624 1625 while (true) { 1626 //scanData will fill the contentBuffer 1627 if (!fEntityScanner.scanData("]]>", contentBuffer, fChunkSize)) { 1628 fInCData = false; 1629 fCDataEnd = true; 1630 fMarkupDepth--; 1631 break ; 1632 } else { 1633 int c = fEntityScanner.peekChar(); 1634 if (c != -1 && isInvalidLiteral(c)) { 1635 if (XMLChar.isHighSurrogate(c)) { 1636 //contentBuffer.clear(); 1637 //scan surrogates if any.... 1638 scanSurrogates(contentBuffer); 1639 } else { 1640 reportFatalError("InvalidCharInCDSect", 1641 new Object[]{Integer.toString(c,16)}); 1642 fEntityScanner.scanChar(null); 1643 } 1644 } else { 1645 //CData partially returned due to the size limit 1646 fInCData = true; 1647 fCDataEnd = false; 1648 break; 1649 } 1650 //by this time we have also read surrogate contents if any... 1651 if (fDocumentHandler != null) { 1652 //fDocumentHandler.characters(contentBuffer, null); 1653 } 1654 } 1655 } 1656 1657 return true; 1658 1659 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1660 1661 /** 1662 * Scans an end element. 1663 * <p> 1664 * <pre> 1665 * [42] ETag ::= '</' Name S? '>' 1666 * </pre> 1667 * <p> 1668 * <strong>Note:</strong> This method uses the fElementQName variable. 1669 * The contents of this variable will be destroyed. The caller should 1670 * copy the needed information out of this variable before calling 1671 * this method. 1672 * 1673 * @return The element depth. 1674 */ 1675 protected int scanEndElement() throws IOException, XNIException { 1676 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1677 1678 // pop context 1679 QName endElementName = fElementStack.popElement(); 1680 1681 String rawname = endElementName.rawname; 1682 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1683 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1684 //In scanners most of the time is consumed on checks done for XML characters, we can 1685 // optimize on it and avoid the checks done for endElement, 1686 //we will also avoid symbol table lookup. 1687 1688 // this should work both for namespace processing true or false... 1689 1690 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1691 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1692 1693 if (!fEntityScanner.skipString(endElementName.rawname)) { 1694 reportFatalError("ETagRequired", new Object[]{rawname}); 1695 } 1696 1697 // end 1698 fEntityScanner.skipSpaces(); 1699 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1700 reportFatalError("ETagUnterminated", 1701 new Object[]{rawname}); 1702 } 1703 fMarkupDepth--; 1704 1705 //we have increased the depth for two markup "<" characters 1706 fMarkupDepth--; 1707 1708 // check that this element was opened in the same entity 1709 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1710 reportFatalError("ElementEntityMismatch", 1711 new Object[]{rawname}); 1712 } 1713 1714 //We should not be popping out the context here in endELement becaause the namespace context is still 1715 //valid when parser is at the endElement state. 1716 1717 //if (fNamespaces) { 1718 // fNamespaceContext.popContext(); 1719 //} 1720 1721 // call handler 1722 if (fDocumentHandler != null ) { 1723 //end element is scanned in this function so we can send a callback 1724 //here. 1725 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1726 1727 fDocumentHandler.endElement(endElementName, null); 1728 } 1729 if(dtdGrammarUtil != null) 1730 dtdGrammarUtil.endElement(endElementName); 1731 1732 return fMarkupDepth; 1733 1734 } // scanEndElement():int 1735 1736 /** 1737 * Scans a character reference. 1738 * <p> 1739 * <pre> 1740 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1741 * </pre> 1742 */ 1743 protected void scanCharReference() 1744 throws IOException, XNIException { 1745 1746 fStringBuffer2.clear(); 1747 int ch = scanCharReferenceValue(fStringBuffer2, null); 1748 fMarkupDepth--; 1749 if (ch != -1) { 1750 // call handler 1751 1752 if (fDocumentHandler != null) { 1753 if (fNotifyCharRefs) { 1754 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1755 } 1756 Augmentations augs = null; 1757 if (fValidation && ch <= 0x20) { 1758 if (fTempAugmentations != null) { 1759 fTempAugmentations.removeAllItems(); 1760 } 1761 else { 1762 fTempAugmentations = new AugmentationsImpl(); 1763 } 1764 augs = fTempAugmentations; 1765 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1766 } 1767 //xxx: How do we deal with this - how to return charReferenceValues 1768 //now this is being commented because this is taken care in scanDocument() 1769 //fDocumentHandler.characters(fStringBuffer2, null); 1770 if (fNotifyCharRefs) { 1771 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1772 } 1773 } 1774 } 1775 1776 } // scanCharReference() 1777 1778 1779 /** 1780 * Scans an entity reference. 1781 * 1782 * @return returns true if the new entity is started. If it was built-in entity 1783 * 'false' is returned. 1784 * @throws IOException Thrown if i/o error occurs. 1785 * @throws XNIException Thrown if handler throws exception upon 1786 * notification. 1787 */ 1788 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1789 String name = fEntityScanner.scanName(NameType.REFERENCE); 1790 if (name == null) { 1791 reportFatalError("NameRequiredInReference", null); 1792 return; 1793 } 1794 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1795 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1796 } 1797 if (fEntityStore.isUnparsedEntity(name)) { 1798 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1799 } 1800 fMarkupDepth--; 1801 fCurrentEntityName = name; 1802 1803 // handle built-in entities 1804 if (name == fAmpSymbol) { 1805 handleCharacter('&', fAmpSymbol, content); 1806 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1807 return ; 1808 } else if (name == fLtSymbol) { 1809 handleCharacter('<', fLtSymbol, content); 1810 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1811 return ; 1812 } else if (name == fGtSymbol) { 1813 handleCharacter('>', fGtSymbol, content); 1814 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1815 return ; 1816 } else if (name == fQuotSymbol) { 1817 handleCharacter('"', fQuotSymbol, content); 1818 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1819 return ; 1820 } else if (name == fAposSymbol) { 1821 handleCharacter('\'', fAposSymbol, content); 1822 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1823 return ; 1824 } 1825 1826 //1. if the entity is external and support to external entities is not required 1827 // 2. or entities should not be replaced 1828 //3. or if it is built in entity reference. 1829 boolean isEE = fEntityStore.isExternalEntity(name); 1830 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1831 fScannerState = SCANNER_STATE_REFERENCE; 1832 return ; 1833 } 1834 // start general entity 1835 if (!fEntityStore.isDeclaredEntity(name)) { 1836 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1837 if (!fSupportDTD && fReplaceEntityReferences) { 1838 reportFatalError("EntityNotDeclared", new Object[]{name}); 1839 return; 1840 } 1841 //REVISIT: one more case needs to be included: external PE and standalone is no 1842 if ( fHasExternalDTD && !fStandalone) { 1843 if (fValidation) 1844 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1845 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1846 } else 1847 reportFatalError("EntityNotDeclared", new Object[]{name}); 1848 } 1849 1850 // create EntityReference only 1851 if (fCreateEntityRefNodes) { 1852 fDocumentHandler.startGeneralEntity(name, null, null, null); 1853 } else { 1854 //we are starting the entity even if the entity was not declared 1855 //if that was the case it its taken care in XMLEntityManager.startEntity() 1856 //we immediately call the endEntity. Application gets to know if there was 1857 //any entity that was not declared. 1858 fEntityManager.startEntity(true, name, false); 1859 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1860 //setScannerState(SCANNER_STATE_CONTENT); 1861 //return true ; 1862 } 1863 } // scanEntityReference() 1864 1865 // utility methods 1866 1867 /** 1868 * Check if the depth exceeds the maxElementDepth limit 1869 * @param elementName name of the current element 1870 */ 1871 void checkDepth(String elementName) { 1872 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1873 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1874 fSecurityManager.debugPrint(fLimitAnalyzer); 1875 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1876 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1877 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1878 "maxElementDepth"}); 1879 } 1880 } 1881 1882 /** 1883 * Calls document handler with a single character resulting from 1884 * built-in entity resolution. 1885 * 1886 * @param c 1887 * @param entity built-in name 1888 * @param XMLStringBuffer append the character to buffer 1889 * 1890 * we really dont need to call this function -- this function is only required when 1891 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1892 * calling this function to hanlde built-in entity reference. 1893 * 1894 */ 1895 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1896 foundBuiltInRefs = true; 1897 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1898 content.append(c); 1899 if (fDocumentHandler != null) { 1900 fSingleChar[0] = c; 1901 if (fNotifyBuiltInRefs) { 1902 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1903 } 1904 fTempString.setValues(fSingleChar, 0, 1); 1905 if(!fIsCoalesce){ 1906 fDocumentHandler.characters(fTempString, null); 1907 builtInRefCharacterHandled = true; 1908 } 1909 1910 if (fNotifyBuiltInRefs) { 1911 fDocumentHandler.endGeneralEntity(entity, null); 1912 } 1913 } 1914 } // handleCharacter(char) 1915 1916 // helper methods 1917 1918 /** 1919 * Sets the scanner state. 1920 * 1921 * @param state The new scanner state. 1922 */ 1923 protected final void setScannerState(int state) { 1924 1925 fScannerState = state; 1926 if (DEBUG_SCANNER_STATE) { 1927 System.out.print("### setScannerState: "); 1928 //System.out.print(fScannerState); 1929 System.out.print(getScannerStateName(state)); 1930 System.out.println(); 1931 } 1932 1933 } // setScannerState(int) 1934 1935 1936 /** 1937 * Sets the Driver. 1938 * 1939 * @param Driver The new Driver. 1940 */ 1941 protected final void setDriver(Driver driver) { 1942 fDriver = driver; 1943 if (DEBUG_DISPATCHER) { 1944 System.out.print("%%% setDriver: "); 1945 System.out.print(getDriverName(driver)); 1946 System.out.println(); 1947 } 1948 } 1949 1950 // 1951 // Private methods 1952 // 1953 1954 /** Returns the scanner state name. */ 1955 protected String getScannerStateName(int state) { 1956 1957 switch (state) { 1958 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1959 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1960 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1961 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1962 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1963 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1964 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1965 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1966 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1967 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1968 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1969 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1970 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 1971 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 1972 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 1973 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 1974 } 1975 1976 return "??? ("+state+')'; 1977 1978 } // getScannerStateName(int):String 1979 public String getEntityName(){ 1980 //return the cached name 1981 return fCurrentEntityName; 1982 } 1983 1984 /** Returns the driver name. */ 1985 public String getDriverName(Driver driver) { 1986 1987 if (DEBUG_DISPATCHER) { 1988 if (driver != null) { 1989 String name = driver.getClass().getName(); 1990 int index = name.lastIndexOf('.'); 1991 if (index != -1) { 1992 name = name.substring(index + 1); 1993 index = name.lastIndexOf('$'); 1994 if (index != -1) { 1995 name = name.substring(index + 1); 1996 } 1997 } 1998 return name; 1999 } 2000 } 2001 return "null"; 2002 2003 } // getDriverName():String 2004 2005 /** 2006 * Check the protocol used in the systemId against allowed protocols 2007 * 2008 * @param systemId the Id of the URI 2009 * @param allowedProtocols a list of allowed protocols separated by comma 2010 * @return the name of the protocol if rejected, null otherwise 2011 */ 2012 String checkAccess(String systemId, String allowedProtocols) throws IOException { 2013 String baseSystemId = fEntityScanner.getBaseSystemId(); 2014 String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI); 2015 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 2016 } 2017 2018 // 2019 // Classes 2020 // 2021 2022 /** 2023 * @author Neeraj Bajaj, Sun Microsystems. 2024 */ 2025 protected static final class Element { 2026 2027 // 2028 // Data 2029 // 2030 2031 /** Symbol. */ 2032 public QName qname; 2033 2034 //raw name stored as characters 2035 public char[] fRawname; 2036 2037 /** The next Element entry. */ 2038 public Element next; 2039 2040 // 2041 // Constructors 2042 // 2043 2044 /** 2045 * Constructs a new Element from the given QName and next Element 2046 * reference. 2047 */ 2048 public Element(QName qname, Element next) { 2049 this.qname.setValues(qname); 2050 this.fRawname = qname.rawname.toCharArray(); 2051 this.next = next; 2052 } 2053 2054 } // class Element 2055 2056 /** 2057 * Element stack. 2058 * 2059 * @author Neeraj Bajaj, Sun Microsystems. 2060 */ 2061 protected class ElementStack2 { 2062 2063 // 2064 // Data 2065 // 2066 2067 /** The stack data. */ 2068 protected QName [] fQName = new QName[20]; 2069 2070 //Element depth 2071 protected int fDepth; 2072 //total number of elements 2073 protected int fCount; 2074 //current position 2075 protected int fPosition; 2076 //Mark refers to the position 2077 protected int fMark; 2078 2079 protected int fLastDepth ; 2080 2081 // 2082 // Constructors 2083 // 2084 2085 /** Default constructor. */ 2086 public ElementStack2() { 2087 for (int i = 0; i < fQName.length; i++) { 2088 fQName[i] = new QName(); 2089 } 2090 fMark = fPosition = 1; 2091 } // <init>() 2092 2093 public void resize(){ 2094 /** 2095 * int length = fElements.length; 2096 * Element [] temp = new Element[length * 2]; 2097 * System.arraycopy(fElements, 0, temp, 0, length); 2098 * fElements = temp; 2099 */ 2100 //resize QNames 2101 int oldLength = fQName.length; 2102 QName [] tmp = new QName[oldLength * 2]; 2103 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2104 fQName = tmp; 2105 2106 for (int i = oldLength; i < fQName.length; i++) { 2107 fQName[i] = new QName(); 2108 } 2109 2110 } 2111 2112 2113 // 2114 // Public methods 2115 // 2116 2117 /** Check if the element scanned during the start element 2118 *matches the stored element. 2119 * 2120 *@return true if the match suceeds. 2121 */ 2122 public boolean matchElement(QName element) { 2123 //last depth is the depth when last elemnt was pushed 2124 //if last depth is greater than current depth 2125 if(DEBUG_SKIP_ALGORITHM){ 2126 System.out.println("fLastDepth = " + fLastDepth); 2127 System.out.println("fDepth = " + fDepth); 2128 } 2129 boolean match = false; 2130 if(fLastDepth > fDepth && fDepth <= 2){ 2131 if(DEBUG_SKIP_ALGORITHM){ 2132 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2133 } 2134 if(element.rawname == fQName[fDepth].rawname){ 2135 fAdd = false; 2136 //mark this position 2137 //decrease the depth by 1 as arrays are 0 based 2138 fMark = fDepth - 1; 2139 //we found the match and from next element skipping will start, add 1 2140 fPosition = fMark + 1 ; 2141 match = true; 2142 //Once we get match decrease the count -- this was increased by nextElement() 2143 --fCount; 2144 if(DEBUG_SKIP_ALGORITHM){ 2145 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2146 System.out.println("fMark = " + fMark); 2147 System.out.println("fPosition = " + fPosition); 2148 System.out.println("fDepth = " + fDepth); 2149 System.out.println("fCount = " + fCount); 2150 } 2151 }else{ 2152 fAdd = true; 2153 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2154 } 2155 } 2156 //store the last depth 2157 fLastDepth = fDepth++; 2158 return match; 2159 } // pushElement(QName):QName 2160 2161 /** 2162 * This function doesn't increase depth. The function in this function is 2163 *broken down into two functions for efficiency. <@see>matchElement</see>. 2164 * This function just returns the pointer to the object and its values are set. 2165 * 2166 *@return QName reference to the next element in the list 2167 */ 2168 public QName nextElement() { 2169 2170 //if number of elements becomes equal to the length of array -- stop the skipping 2171 if (fCount == fQName.length) { 2172 fShouldSkip = false; 2173 fAdd = false; 2174 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2175 //xxx: this is not correct, we are returning the last element 2176 //this wont make any difference since flag has been set to 'false' 2177 return fQName[--fCount]; 2178 } 2179 if(DEBUG_SKIP_ALGORITHM){ 2180 System.out.println("fCount = " + fCount); 2181 } 2182 return fQName[fCount++]; 2183 2184 } 2185 2186 /** Note that this function is considerably different than nextElement() 2187 * This function just returns the previously stored elements 2188 */ 2189 public QName getNext(){ 2190 //when position reaches number of elements in the list.. 2191 //set the position back to mark, making it a circular linked list. 2192 if(fPosition == fCount){ 2193 fPosition = fMark; 2194 } 2195 return fQName[fPosition++]; 2196 } 2197 2198 /** returns the current depth 2199 */ 2200 public int popElement(){ 2201 return fDepth--; 2202 } 2203 2204 2205 /** Clears the stack without throwing away existing QName objects. */ 2206 public void clear() { 2207 fLastDepth = 0; 2208 fDepth = 0; 2209 fCount = 0 ; 2210 fPosition = fMark = 1; 2211 } // clear() 2212 2213 } // class ElementStack 2214 2215 /** 2216 * Element stack. This stack operates without synchronization, error 2217 * checking, and it re-uses objects instead of throwing popped items 2218 * away. 2219 * 2220 * @author Andy Clark, IBM 2221 */ 2222 protected class ElementStack { 2223 2224 // 2225 // Data 2226 // 2227 2228 /** The stack data. */ 2229 protected QName[] fElements; 2230 protected int [] fInt = new int[20]; 2231 2232 2233 //Element depth 2234 protected int fDepth; 2235 //total number of elements 2236 protected int fCount; 2237 //current position 2238 protected int fPosition; 2239 //Mark refers to the position 2240 protected int fMark; 2241 2242 protected int fLastDepth ; 2243 2244 // 2245 // Constructors 2246 // 2247 2248 /** Default constructor. */ 2249 public ElementStack() { 2250 fElements = new QName[20]; 2251 for (int i = 0; i < fElements.length; i++) { 2252 fElements[i] = new QName(); 2253 } 2254 } // <init>() 2255 2256 // 2257 // Public methods 2258 // 2259 2260 /** 2261 * Pushes an element on the stack. 2262 * <p> 2263 * <strong>Note:</strong> The QName values are copied into the 2264 * stack. In other words, the caller does <em>not</em> orphan 2265 * the element to the stack. Also, the QName object returned 2266 * is <em>not</em> orphaned to the caller. It should be 2267 * considered read-only. 2268 * 2269 * @param element The element to push onto the stack. 2270 * 2271 * @return Returns the actual QName object that stores the 2272 */ 2273 //XXX: THIS FUNCTION IS NOT USED 2274 public QName pushElement(QName element) { 2275 if (fDepth == fElements.length) { 2276 QName[] array = new QName[fElements.length * 2]; 2277 System.arraycopy(fElements, 0, array, 0, fDepth); 2278 fElements = array; 2279 for (int i = fDepth; i < fElements.length; i++) { 2280 fElements[i] = new QName(); 2281 } 2282 } 2283 fElements[fDepth].setValues(element); 2284 return fElements[fDepth++]; 2285 } // pushElement(QName):QName 2286 2287 2288 /** Note that this function is considerably different than nextElement() 2289 * This function just returns the previously stored elements 2290 */ 2291 public QName getNext(){ 2292 //when position reaches number of elements in the list.. 2293 //set the position back to mark, making it a circular linked list. 2294 if(fPosition == fCount){ 2295 fPosition = fMark; 2296 } 2297 //store the position of last opened tag at particular depth 2298 //fInt[++fDepth] = fPosition; 2299 if(DEBUG_SKIP_ALGORITHM){ 2300 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2301 } 2302 //return fElements[fPosition++]; 2303 return fElements[fPosition]; 2304 } 2305 2306 /** This function should be called only when element was skipped sucessfully. 2307 * 1. Increase the depth - because element was sucessfully skipped. 2308 *2. Store the position of the element token in array "last opened tag" at depth. 2309 *3. increase the position counter so as to point to the next element in the array 2310 */ 2311 public void push(){ 2312 2313 fInt[++fDepth] = fPosition++; 2314 } 2315 2316 /** Check if the element scanned during the start element 2317 *matches the stored element. 2318 * 2319 *@return true if the match suceeds. 2320 */ 2321 public boolean matchElement(QName element) { 2322 //last depth is the depth when last elemnt was pushed 2323 //if last depth is greater than current depth 2324 //if(DEBUG_SKIP_ALGORITHM){ 2325 // System.out.println("Check if the element " + element.rawname + " matches"); 2326 // System.out.println("fLastDepth = " + fLastDepth); 2327 // System.out.println("fDepth = " + fDepth); 2328 //} 2329 boolean match = false; 2330 if(fLastDepth > fDepth && fDepth <= 3){ 2331 if(DEBUG_SKIP_ALGORITHM){ 2332 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2333 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2334 } 2335 if(element.rawname == fElements[fDepth - 1].rawname){ 2336 fAdd = false; 2337 //mark this position 2338 //decrease the depth by 1 as arrays are 0 based 2339 fMark = fDepth - 1; 2340 //we found the match 2341 fPosition = fMark; 2342 match = true; 2343 //Once we get match decrease the count -- this was increased by nextElement() 2344 --fCount; 2345 if(DEBUG_SKIP_ALGORITHM){ 2346 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2347 System.out.println("fMark = " + fMark); 2348 System.out.println("fPosition = " + fPosition); 2349 System.out.println("fDepth = " + fDepth); 2350 System.out.println("fCount = " + fCount); 2351 System.out.println("---------MATCH SUCEEDED-----------------"); 2352 System.out.println(""); 2353 } 2354 }else{ 2355 fAdd = true; 2356 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2357 } 2358 } 2359 //store the position for the current depth 2360 //when we are adding the elements, when skipping 2361 //starts even then this should be tracked ie. when 2362 //calling getNext() 2363 if(match){ 2364 //from next element skipping will start, add 1 2365 fInt[fDepth] = fPosition++; 2366 } else{ 2367 if(DEBUG_SKIP_ALGORITHM){ 2368 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2369 } 2370 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2371 fInt[fDepth] = fCount - 1; 2372 } 2373 2374 //if number of elements becomes equal to the length of array -- stop the skipping 2375 //xxx: should we do "fCount == fInt.length" 2376 if (fCount == fElements.length) { 2377 fSkip = false; 2378 fAdd = false; 2379 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2380 reposition(); 2381 if(DEBUG_SKIP_ALGORITHM){ 2382 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2383 System.out.println("REPOSITIONING THE STACK"); 2384 System.out.println("-----------SKIPPING STOPPED----------"); 2385 System.out.println(""); 2386 } 2387 return false; 2388 } 2389 if(DEBUG_SKIP_ALGORITHM){ 2390 if(match){ 2391 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2392 }else{ 2393 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2394 } 2395 } 2396 //store the last depth 2397 fLastDepth = fDepth; 2398 return match; 2399 } // matchElement(QName):QName 2400 2401 2402 /** 2403 * Returns the next element on the stack. 2404 * 2405 * @return Returns the actual QName object. Callee should 2406 * use this object to store the details of next element encountered. 2407 */ 2408 public QName nextElement() { 2409 if(fSkip){ 2410 fDepth++; 2411 //boundary checks are done in matchElement() 2412 return fElements[fCount++]; 2413 } else if (fDepth == fElements.length) { 2414 QName[] array = new QName[fElements.length * 2]; 2415 System.arraycopy(fElements, 0, array, 0, fDepth); 2416 fElements = array; 2417 for (int i = fDepth; i < fElements.length; i++) { 2418 fElements[i] = new QName(); 2419 } 2420 } 2421 2422 return fElements[fDepth++]; 2423 2424 } // pushElement(QName):QName 2425 2426 2427 /** 2428 * Pops an element off of the stack by setting the values of 2429 * the specified QName. 2430 * <p> 2431 * <strong>Note:</strong> The object returned is <em>not</em> 2432 * orphaned to the caller. Therefore, the caller should consider 2433 * the object to be read-only. 2434 */ 2435 public QName popElement() { 2436 //return the same object that was pushed -- this would avoid 2437 //setting the values for every end element. 2438 //STRONG: this object is read only -- this object reference shouldn't be stored. 2439 if(fSkip || fAdd ){ 2440 if(DEBUG_SKIP_ALGORITHM){ 2441 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2442 System.out.println(""); 2443 } 2444 return fElements[fInt[fDepth--]]; 2445 } else{ 2446 if(DEBUG_SKIP_ALGORITHM){ 2447 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2448 } 2449 return fElements[--fDepth] ; 2450 } 2451 //element.setValues(fElements[--fDepth]); 2452 } // popElement(QName) 2453 2454 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2455 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2456 *as per the depth. 2457 */ 2458 public void reposition(){ 2459 for( int i = 2 ; i <= fDepth ; i++){ 2460 fElements[i-1] = fElements[fInt[i]]; 2461 } 2462 if(DEBUG_SKIP_ALGORITHM){ 2463 for( int i = 0 ; i < fDepth ; i++){ 2464 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2465 } 2466 } 2467 } 2468 2469 /** Clears the stack without throwing away existing QName objects. */ 2470 public void clear() { 2471 fDepth = 0; 2472 fLastDepth = 0; 2473 fCount = 0 ; 2474 fPosition = fMark = 1; 2475 2476 } // clear() 2477 2478 /** 2479 * This function is as a result of optimization done for endElement -- 2480 * we dont need to set the value for every end element encouterd. 2481 * For Well formedness checks we can have the same QName object that was pushed. 2482 * the values will be set only if application need to know about the endElement 2483 */ 2484 2485 public QName getLastPoppedElement(){ 2486 return fElements[fDepth]; 2487 } 2488 } // class ElementStack 2489 2490 /** 2491 * Drives the parser to the next state/event on the input. Parser is guaranteed 2492 * to stop at the next state/event. 2493 * 2494 * Internally XML document is divided into several states. Each state represents 2495 * a sections of XML document. When this functions returns normally, it has read 2496 * the section of XML document and returns the state corresponding to section of 2497 * document which has been read. For optimizations, a particular driver 2498 * can read ahead of the section of document (state returned) just read and 2499 * can maintain a different internal state. 2500 * 2501 * 2502 * @author Neeraj Bajaj, Sun Microsystems 2503 */ 2504 protected interface Driver { 2505 2506 2507 /** 2508 * Drives the parser to the next state/event on the input. Parser is guaranteed 2509 * to stop at the next state/event. 2510 * 2511 * Internally XML document is divided into several states. Each state represents 2512 * a sections of XML document. When this functions returns normally, it has read 2513 * the section of XML document and returns the state corresponding to section of 2514 * document which has been read. For optimizations, a particular driver 2515 * can read ahead of the section of document (state returned) just read and 2516 * can maintain a different internal state. 2517 * 2518 * @return state representing the section of document just read. 2519 * 2520 * @throws IOException Thrown on i/o error. 2521 * @throws XNIException Thrown on parse error. 2522 */ 2523 2524 public int next() throws IOException, XNIException; 2525 2526 } // interface Driver 2527 2528 /** 2529 * Driver to handle content scanning. This driver is capable of reading 2530 * the fragment of XML document. When it has finished reading fragment 2531 * of XML documents, it can pass the job of reading to another driver. 2532 * 2533 * This class has been modified as per the new design which is more suited to 2534 * efficiently build pull parser. Lot of performance improvements have been done and 2535 * the code has been added to support stax functionality/features. 2536 * 2537 * @author Neeraj Bajaj, Sun Microsystems 2538 * 2539 * 2540 * @author Andy Clark, IBM 2541 * @author Eric Ye, IBM 2542 */ 2543 protected class FragmentContentDriver 2544 implements Driver { 2545 2546 // 2547 // Driver methods 2548 // 2549 2550 /** 2551 * decides the appropriate state of the parser 2552 */ 2553 private void startOfMarkup() throws IOException { 2554 fMarkupDepth++; 2555 final int ch = fEntityScanner.peekChar(); 2556 if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { 2557 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2558 } else { 2559 switch(ch){ 2560 case '?' :{ 2561 setScannerState(SCANNER_STATE_PI); 2562 fEntityScanner.skipChar(ch, null); 2563 break; 2564 } 2565 case '!' :{ 2566 fEntityScanner.skipChar(ch, null); 2567 if (fEntityScanner.skipChar('-', null)) { 2568 if (!fEntityScanner.skipChar('-', NameType.COMMENT)) { 2569 reportFatalError("InvalidCommentStart", 2570 null); 2571 } 2572 setScannerState(SCANNER_STATE_COMMENT); 2573 } else if (fEntityScanner.skipString(CDATA)) { 2574 fCDataStart = true; 2575 setScannerState(SCANNER_STATE_CDATA ); 2576 } else if (!scanForDoctypeHook()) { 2577 reportFatalError("MarkupNotRecognizedInContent", 2578 null); 2579 } 2580 break; 2581 } 2582 case '/' :{ 2583 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2584 fEntityScanner.skipChar(ch, NameType.ELEMENTEND); 2585 break; 2586 } 2587 default :{ 2588 reportFatalError("MarkupNotRecognizedInContent", null); 2589 } 2590 } 2591 } 2592 2593 }//startOfMarkup 2594 2595 private void startOfContent() throws IOException { 2596 if (fEntityScanner.skipChar('<', null)) { 2597 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2598 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 2599 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2600 } else { 2601 //element content is there.. 2602 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2603 } 2604 }//startOfContent 2605 2606 2607 /** 2608 * 2609 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2610 * At any point of time when in doubt over the current state of the parser, the state should be 2611 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2612 * the parser to one of its sub state. 2613 * sub states are defined in the parser on the basis of different XML component like 2614 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2615 * These sub states help the parser to have fine control over the parsing. These are the 2616 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2617 * decided if paresr needs to stop at next milepost ?? 2618 * 2619 */ 2620 public void decideSubState() throws IOException { 2621 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2622 2623 switch (fScannerState) { 2624 2625 case SCANNER_STATE_CONTENT: { 2626 startOfContent() ; 2627 break; 2628 } 2629 2630 case SCANNER_STATE_START_OF_MARKUP: { 2631 startOfMarkup() ; 2632 break; 2633 } 2634 } 2635 } 2636 }//decideSubState 2637 2638 /** 2639 * Drives the parser to the next state/event on the input. Parser is guaranteed 2640 * to stop at the next state/event. Internally XML document 2641 * is divided into several states. Each state represents a sections of XML 2642 * document. When this functions returns normally, it has read the section 2643 * of XML document and returns the state corresponding to section of 2644 * document which has been read. For optimizations, a particular driver 2645 * can read ahead of the section of document (state returned) just read and 2646 * can maintain a different internal state. 2647 * 2648 * State returned corresponds to Stax states. 2649 * 2650 * @return state representing the section of document just read. 2651 * 2652 * @throws IOException Thrown on i/o error. 2653 * @throws XNIException Thrown on parse error. 2654 */ 2655 2656 public int next() throws IOException, XNIException { 2657 while (true) { 2658 try { 2659 2660 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2661 //decideSubState. 2662 2663 if (fScannerState == SCANNER_STATE_CONTENT) { 2664 final int ch = fEntityScanner.peekChar(); 2665 if (ch == '<') { 2666 fEntityScanner.scanChar(null); 2667 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2668 } else if (ch == '&') { 2669 fEntityScanner.scanChar(NameType.REFERENCE); 2670 setScannerState(SCANNER_STATE_REFERENCE) ; 2671 } else { 2672 //element content is there.. 2673 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2674 } 2675 } 2676 2677 if (fScannerState == SCANNER_STATE_START_OF_MARKUP) { 2678 startOfMarkup(); 2679 } 2680 2681 //decideSubState() ; 2682 2683 //do some special handling if isCoalesce is set to true. 2684 if (fIsCoalesce) { 2685 fUsebuffer = true ; 2686 //if the last section was character data 2687 if (fLastSectionWasCharacterData) { 2688 2689 //if we dont encounter any CDATA or ENTITY REFERENCE and 2690 //current state is also not SCANNER_STATE_CHARACTER_DATA 2691 //return the last scanned charactrer data. 2692 if ((fScannerState != SCANNER_STATE_CDATA) 2693 && (fScannerState != SCANNER_STATE_REFERENCE) 2694 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)) { 2695 fLastSectionWasCharacterData = false; 2696 return XMLEvent.CHARACTERS; 2697 } 2698 }//if last section was CDATA or ENTITY REFERENCE 2699 //xxx: there might be another entity reference or CDATA after this 2700 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2701 else if ((fLastSectionWasCData || fLastSectionWasEntityReference)) { 2702 //and current state is not SCANNER_STATE_CHARACTER_DATA 2703 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2704 //this means there is nothing more to be coalesced. 2705 //return the CHARACTERS event. 2706 if ((fScannerState != SCANNER_STATE_CDATA) 2707 && (fScannerState != SCANNER_STATE_REFERENCE) 2708 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2709 2710 fLastSectionWasCData = false; 2711 fLastSectionWasEntityReference = false; 2712 return XMLEvent.CHARACTERS; 2713 } 2714 } 2715 } 2716 2717 switch(fScannerState){ 2718 2719 case XMLEvent.START_DOCUMENT : 2720 return XMLEvent.START_DOCUMENT; 2721 2722 case SCANNER_STATE_START_ELEMENT_TAG :{ 2723 2724 //returns true if the element is empty 2725 fEmptyElement = scanStartElement() ; 2726 //if the element is empty the next event is "end element" 2727 if(fEmptyElement){ 2728 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2729 }else{ 2730 //set the next possible state 2731 setScannerState(SCANNER_STATE_CONTENT); 2732 } 2733 return XMLEvent.START_ELEMENT ; 2734 } 2735 2736 case SCANNER_STATE_CHARACTER_DATA: { 2737 2738 //if last section was either entity reference or cdata or 2739 //character data we should be using buffer 2740 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData 2741 || fLastSectionWasCharacterData ; 2742 2743 //When coalesce is set to true and last state was REFERENCE or 2744 //CDATA or CHARACTER_DATA, buffer should not be cleared. 2745 if( fIsCoalesce && (fLastSectionWasEntityReference || 2746 fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2747 fLastSectionWasEntityReference = false; 2748 fLastSectionWasCData = false; 2749 fLastSectionWasCharacterData = true ; 2750 fUsebuffer = true; 2751 }else{ 2752 //clear the buffer 2753 fContentBuffer.clear(); 2754 } 2755 2756 //set the fTempString length to 0 before passing it on to scanContent 2757 //scanContent sets the correct co-ordinates as per the content read 2758 fTempString.length = 0; 2759 int c = fEntityScanner.scanContent(fTempString); 2760 2761 if(fEntityScanner.skipChar('<', null)){ 2762 //check if we have reached end of element 2763 if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){ 2764 //increase the mark up depth 2765 fMarkupDepth++; 2766 fLastSectionWasCharacterData = false; 2767 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2768 //check if its start of new element 2769 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2770 fMarkupDepth++; 2771 fLastSectionWasCharacterData = false; 2772 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2773 }else{ 2774 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2775 //there can be cdata ahead if coalesce is true we should call again 2776 if(fIsCoalesce){ 2777 fLastSectionWasCharacterData = true; 2778 bufferContent(); 2779 continue; 2780 } 2781 } 2782 //in case last section was either entity reference or 2783 //cdata or character data -- we should be using buffer 2784 if(fUsebuffer){ 2785 bufferContent(); 2786 } 2787 2788 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2789 if(DEBUG)System.out.println("Return SPACE EVENT"); 2790 return XMLEvent.SPACE; 2791 }else 2792 return XMLEvent.CHARACTERS; 2793 2794 } else{ 2795 bufferContent(); 2796 } 2797 if (c == '\r') { 2798 if(DEBUG){ 2799 System.out.println("'\r' character found"); 2800 } 2801 // happens when there is the character reference 2802 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2803 fEntityScanner.scanChar(null); 2804 fUsebuffer = true; 2805 fContentBuffer.append((char)c); 2806 c = -1 ; 2807 } else if (c == ']') { 2808 //fStringBuffer.clear(); 2809 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2810 fUsebuffer = true; 2811 fContentBuffer.append((char)fEntityScanner.scanChar(null)); 2812 // remember where we are in case we get an endEntity before we 2813 // could flush the buffer out - this happens when we're parsing an 2814 // entity which ends with a ] 2815 fInScanContent = true; 2816 2817 // We work on a single character basis to handle cases such as: 2818 // ']]]>' which we might otherwise miss. 2819 // 2820 if (fEntityScanner.skipChar(']', null)) { 2821 fContentBuffer.append(']'); 2822 while (fEntityScanner.skipChar(']', null)) { 2823 fContentBuffer.append(']'); 2824 } 2825 if (fEntityScanner.skipChar('>', null)) { 2826 reportFatalError("CDEndInContent", null); 2827 } 2828 } 2829 c = -1 ; 2830 fInScanContent = false; 2831 } 2832 2833 do{ 2834 //xxx: we should be using only one buffer.. 2835 // we need not to grow the buffer only when isCoalesce() is not true; 2836 2837 if (c == '<') { 2838 fEntityScanner.scanChar(null); 2839 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2840 break; 2841 }//xxx what should be the behavior if entity reference is present in the content ? 2842 else if (c == '&') { 2843 fEntityScanner.scanChar(NameType.REFERENCE); 2844 setScannerState(SCANNER_STATE_REFERENCE); 2845 break; 2846 }///xxx since this part is also characters, it should be merged... 2847 else if (c != -1 && isInvalidLiteral(c)) { 2848 if (XMLChar.isHighSurrogate(c)) { 2849 // special case: surrogates 2850 scanSurrogates(fContentBuffer) ; 2851 setScannerState(SCANNER_STATE_CONTENT); 2852 } else { 2853 reportFatalError("InvalidCharInContent", 2854 new Object[] { 2855 Integer.toString(c, 16)}); 2856 fEntityScanner.scanChar(null); 2857 } 2858 break; 2859 } 2860 //xxx: scanContent also gives character callback. 2861 c = scanContent(fContentBuffer) ; 2862 //we should not be iterating again if fIsCoalesce is not set to true 2863 2864 if(!fIsCoalesce){ 2865 setScannerState(SCANNER_STATE_CONTENT); 2866 break; 2867 } 2868 2869 }while(true); 2870 2871 //if (fDocumentHandler != null) { 2872 // fDocumentHandler.characters(fContentBuffer, null); 2873 //} 2874 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2875 //if fIsCoalesce is true there might be more data so call fDriver.next() 2876 if(fIsCoalesce){ 2877 fLastSectionWasCharacterData = true ; 2878 continue; 2879 }else{ 2880 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2881 if(DEBUG)System.out.println("Return SPACE EVENT"); 2882 return XMLEvent.SPACE; 2883 } else 2884 return XMLEvent.CHARACTERS ; 2885 } 2886 } 2887 2888 case SCANNER_STATE_END_ELEMENT_TAG :{ 2889 if(fEmptyElement){ 2890 //set it back to false. 2891 fEmptyElement = false; 2892 setScannerState(SCANNER_STATE_CONTENT); 2893 //check the case when there is comment after single element document 2894 //<foo/> and some comment after this 2895 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? 2896 XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2897 2898 } else if(scanEndElement() == 0) { 2899 //It is last element of the document 2900 if (elementDepthIsZeroHook()) { 2901 //if element depth is zero , it indicates the end of the document 2902 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2903 //xxx understand this point once again.. 2904 return XMLEvent.END_ELEMENT ; 2905 } 2906 2907 } 2908 setScannerState(SCANNER_STATE_CONTENT); 2909 return XMLEvent.END_ELEMENT ; 2910 } 2911 2912 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2913 scanComment(); 2914 setScannerState(SCANNER_STATE_CONTENT); 2915 return XMLEvent.COMMENT; 2916 //break; 2917 } 2918 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2919 //clear the buffer first 2920 fContentBuffer.clear() ; 2921 //xxx: which buffer should be passed. Ideally we shouldn't have 2922 //more than two buffers -- 2923 //xxx: where should we add the switch for buffering. 2924 scanPI(fContentBuffer); 2925 setScannerState(SCANNER_STATE_CONTENT); 2926 return XMLEvent.PROCESSING_INSTRUCTION; 2927 //break; 2928 } 2929 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2930 //xxx: What if CDATA is the first event 2931 //<foo><![CDATA[hello<><>]]>append</foo> 2932 2933 //we should not clear the buffer only when the last state was 2934 //either SCANNER_STATE_REFERENCE or 2935 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2936 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2937 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2938 fLastSectionWasCData = true ; 2939 fLastSectionWasEntityReference = false; 2940 fLastSectionWasCharacterData = false; 2941 }//if we dont need to coalesce clear the buffer 2942 else{ 2943 fContentBuffer.clear(); 2944 } 2945 fUsebuffer = true; 2946 //CDATA section is read up to the chunk size limit 2947 scanCDATASection(fContentBuffer , true); 2948 if (!fCDataEnd) { 2949 setScannerState(SCANNER_STATE_CDATA); 2950 } else { 2951 setScannerState(SCANNER_STATE_CONTENT); 2952 } 2953 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 2954 //and just call fDispatche.next(). Since we have set the scanner state to 2955 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 2956 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 2957 //2. Check if application has set for reporting CDATA event 2958 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 2959 //return the cdata event as characters. 2960 if (fIsCoalesce) { 2961 fLastSectionWasCData = true ; 2962 //there might be more data to coalesce. 2963 continue; 2964 } else if(fReportCdataEvent) { 2965 return XMLEvent.CDATA; 2966 } else { 2967 return XMLEvent.CHARACTERS; 2968 } 2969 } 2970 2971 case SCANNER_STATE_REFERENCE :{ 2972 fMarkupDepth++; 2973 foundBuiltInRefs = false; 2974 2975 //we should not clear the buffer only when the last state was 2976 //either CDATA or 2977 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2978 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2979 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2980 //fLastSectionWasEntityReference or fLastSectionWasCData are only 2981 //used when fIsCoalesce is set to true. 2982 fLastSectionWasEntityReference = true ; 2983 fLastSectionWasCData = false; 2984 fLastSectionWasCharacterData = false; 2985 }//if we dont need to coalesce clear the buffer 2986 else{ 2987 fContentBuffer.clear(); 2988 } 2989 fUsebuffer = true ; 2990 //take care of character reference 2991 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 2992 scanCharReferenceValue(fContentBuffer, null); 2993 fMarkupDepth--; 2994 if(!fIsCoalesce){ 2995 setScannerState(SCANNER_STATE_CONTENT); 2996 return XMLEvent.CHARACTERS; 2997 } 2998 } else { 2999 // this function also starts new entity 3000 scanEntityReference(fContentBuffer); 3001 //if there was built-in entity reference & coalesce is not true 3002 //return CHARACTERS 3003 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3004 setScannerState(SCANNER_STATE_CONTENT); 3005 if (builtInRefCharacterHandled) { 3006 builtInRefCharacterHandled = false; 3007 return XMLEvent.ENTITY_REFERENCE; 3008 } else { 3009 return XMLEvent.CHARACTERS; 3010 } 3011 } 3012 3013 //if there was a text declaration, call next() it will be taken care. 3014 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3015 fLastSectionWasEntityReference = true ; 3016 continue; 3017 } 3018 3019 if(fScannerState == SCANNER_STATE_REFERENCE){ 3020 setScannerState(SCANNER_STATE_CONTENT); 3021 if (fReplaceEntityReferences && 3022 fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3023 // Skip the entity reference, we don't care 3024 continue; 3025 } 3026 return XMLEvent.ENTITY_REFERENCE; 3027 } 3028 } 3029 //Wether it was character reference, entity reference or built-in entity 3030 //set the next possible state to SCANNER_STATE_CONTENT 3031 setScannerState(SCANNER_STATE_CONTENT); 3032 fLastSectionWasEntityReference = true ; 3033 continue; 3034 } 3035 3036 case SCANNER_STATE_TEXT_DECL: { 3037 // scan text decl 3038 if (fEntityScanner.skipString("<?xml")) { 3039 fMarkupDepth++; 3040 // NOTE: special case where entity starts with a PI 3041 // whose name starts with "xml" (e.g. "xmlfoo") 3042 if (isValidNameChar(fEntityScanner.peekChar())) { 3043 fStringBuffer.clear(); 3044 fStringBuffer.append("xml"); 3045 3046 if (fNamespaces) { 3047 while (isValidNCName(fEntityScanner.peekChar())) { 3048 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3049 } 3050 } else { 3051 while (isValidNameChar(fEntityScanner.peekChar())) { 3052 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3053 } 3054 } 3055 String target = fSymbolTable.addSymbol(fStringBuffer.ch, 3056 fStringBuffer.offset, fStringBuffer.length); 3057 fContentBuffer.clear(); 3058 scanPIData(target, fContentBuffer); 3059 } 3060 3061 // standard text declaration 3062 else { 3063 //xxx: this function gives callback 3064 scanXMLDeclOrTextDecl(true); 3065 } 3066 } 3067 // now that we've straightened out the readers, we can read in chunks: 3068 fEntityManager.fCurrentEntity.mayReadChunks = true; 3069 setScannerState(SCANNER_STATE_CONTENT); 3070 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3071 //it seems we have to careful when to allow function issue a callback 3072 //and when to allow adapter issue a callback. 3073 continue; 3074 } 3075 3076 3077 case SCANNER_STATE_ROOT_ELEMENT: { 3078 if (scanRootElementHook()) { 3079 fEmptyElement = true; 3080 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3081 return XMLEvent.START_ELEMENT; 3082 } 3083 setScannerState(SCANNER_STATE_CONTENT); 3084 return XMLEvent.START_ELEMENT ; 3085 } 3086 case SCANNER_STATE_CHAR_REFERENCE : { 3087 fContentBuffer.clear(); 3088 scanCharReferenceValue(fContentBuffer, null); 3089 fMarkupDepth--; 3090 setScannerState(SCANNER_STATE_CONTENT); 3091 return XMLEvent.CHARACTERS; 3092 } 3093 default: 3094 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3095 3096 }//switch 3097 } 3098 // encoding errors 3099 catch (MalformedByteSequenceException e) { 3100 fErrorReporter.reportError(e.getDomain(), e.getKey(), 3101 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 3102 return -1; 3103 } 3104 catch (CharConversionException e) { 3105 fErrorReporter.reportError( 3106 XMLMessageFormatter.XML_DOMAIN, 3107 "CharConversionFailure", 3108 null, 3109 XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 3110 return -1; 3111 } 3112 // premature end of file 3113 catch (EOFException e) { 3114 endOfFileHook(e); 3115 return -1; 3116 } 3117 } //while loop 3118 }//next 3119 3120 // 3121 // Protected methods 3122 // 3123 3124 // hooks 3125 3126 // NOTE: These hook methods are added so that the full document 3127 // scanner can share the majority of code with this class. 3128 3129 /** 3130 * Scan for DOCTYPE hook. This method is a hook for subclasses 3131 * to add code to handle scanning for a the "DOCTYPE" string 3132 * after the string "<!" has been scanned. 3133 * 3134 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3135 * was not scanned. 3136 */ 3137 protected boolean scanForDoctypeHook() 3138 throws IOException, XNIException { 3139 return false; 3140 } // scanForDoctypeHook():boolean 3141 3142 /** 3143 * Element depth iz zero. This methos is a hook for subclasses 3144 * to add code to handle when the element depth hits zero. When 3145 * scanning a document fragment, an element depth of zero is 3146 * normal. However, when scanning a full XML document, the 3147 * scanner must handle the trailing miscellanous section of 3148 * the document after the end of the document's root element. 3149 * 3150 * @return True if the caller should stop and return true which 3151 * allows the scanner to switch to a new scanning 3152 * driver. A return value of false indicates that 3153 * the content driver should continue as normal. 3154 */ 3155 protected boolean elementDepthIsZeroHook() 3156 throws IOException, XNIException { 3157 return false; 3158 } // elementDepthIsZeroHook():boolean 3159 3160 /** 3161 * Scan for root element hook. This method is a hook for 3162 * subclasses to add code that handles scanning for the root 3163 * element. When scanning a document fragment, there is no 3164 * "root" element. However, when scanning a full XML document, 3165 * the scanner must handle the root element specially. 3166 * 3167 * @return True if the caller should stop and return true which 3168 * allows the scanner to switch to a new scanning 3169 * driver. A return value of false indicates that 3170 * the content driver should continue as normal. 3171 */ 3172 protected boolean scanRootElementHook() 3173 throws IOException, XNIException { 3174 return false; 3175 } // scanRootElementHook():boolean 3176 3177 /** 3178 * End of file hook. This method is a hook for subclasses to 3179 * add code that handles the end of file. The end of file in 3180 * a document fragment is OK if the markup depth is zero. 3181 * However, when scanning a full XML document, an end of file 3182 * is always premature. 3183 */ 3184 protected void endOfFileHook(EOFException e) 3185 throws IOException, XNIException { 3186 3187 // NOTE: An end of file is only only an error if we were 3188 // in the middle of scanning some markup. -Ac 3189 if (fMarkupDepth != 0) { 3190 reportFatalError("PrematureEOF", null); 3191 } 3192 3193 } // endOfFileHook() 3194 3195 } // class FragmentContentDriver 3196 3197 static void pr(String str) { 3198 System.out.println(str) ; 3199 } 3200 3201 protected boolean fUsebuffer ; 3202 3203 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3204 * maintained for attributes. 3205 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3206 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3207 * XMLString. 3208 * 3209 * @return XMLString XMLString used to store an attribute value. 3210 */ 3211 3212 protected XMLString getString(){ 3213 if(fAttributeCacheUsedCount < initialCacheCount || 3214 fAttributeCacheUsedCount < attributeValueCache.size()){ 3215 return attributeValueCache.get(fAttributeCacheUsedCount++); 3216 } else{ 3217 XMLString str = new XMLString(); 3218 fAttributeCacheUsedCount++; 3219 attributeValueCache.add(str); 3220 return str; 3221 } 3222 } 3223 3224 /** 3225 * Implements XMLBufferListener interface. 3226 */ 3227 3228 public void refresh(){ 3229 refresh(0); 3230 } 3231 3232 /** 3233 * receives callbacks from {@link XMLEntityReader } when buffer 3234 * is being changed. 3235 * @param refreshPosition 3236 */ 3237 public void refresh(int refreshPosition){ 3238 //If you are reading attributes and you got a callback 3239 //cache available attributes. 3240 if(fReadingAttributes){ 3241 fAttributes.refresh(); 3242 } 3243 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3244 bufferContent(); 3245 } 3246 } 3247 3248 /** 3249 * Since 'TempString' shares the buffer (a char array) with the CurrentEntity, 3250 * when the cursor position reaches the end, that is, before the buffer is 3251 * being loaded with new data, the content in the TempString needs to be 3252 * copied into the ContentBuffer. 3253 */ 3254 private void bufferContent() { 3255 fContentBuffer.append(fTempString); 3256 //clear the XMLString so that data can't be added again. 3257 fTempString.length = 0; 3258 fUsebuffer = true; 3259 } 3260 } // class XMLDocumentFragmentScannerImpl