1 /* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.xml.internal.stream.XMLBufferListener; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 27 28 import java.io.EOFException; 29 import java.io.IOException; 30 import javax.xml.stream.XMLInputFactory; 31 import javax.xml.stream.events.XMLEvent; 32 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 33 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 34 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 35 import com.sun.org.apache.xerces.internal.util.XMLChar; 36 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 37 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 38 import com.sun.org.apache.xerces.internal.xni.QName; 39 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 40 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 41 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 42 import com.sun.org.apache.xerces.internal.xni.XMLString; 43 import com.sun.org.apache.xerces.internal.xni.XNIException; 44 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 46 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 47 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 49 import com.sun.org.apache.xerces.internal.xni.Augmentations; 50 import com.sun.org.apache.xerces.internal.impl.Constants; 51 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 52 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 53 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 54 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 55 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 56 import javax.xml.stream.XMLStreamConstants; 57 58 /** 59 * 60 * This class is responsible for scanning the structure and content 61 * of document fragments. 62 * 63 * This class has been modified as per the new design which is more suited to 64 * efficiently build pull parser. Lot of improvements have been done and 65 * the code has been added to support stax functionality/features. 66 * 67 * @author Neeraj Bajaj SUN Microsystems 68 * @author K.Venugopal SUN Microsystems 69 * @author Glenn Marcy, IBM 70 * @author Andy Clark, IBM 71 * @author Arnaud Le Hors, IBM 72 * @author Eric Ye, IBM 73 * @author Sunitha Reddy, SUN Microsystems 74 * 75 */ 76 public class XMLDocumentFragmentScannerImpl 77 extends XMLScanner 78 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 79 80 // 81 // Constants 82 // 83 84 protected int fElementAttributeLimit, fXMLNameLimit; 85 86 /** External subset resolver. **/ 87 protected ExternalSubsetResolver fExternalSubsetResolver; 88 89 // scanner states 90 91 //XXX this should be divided into more states. 92 /** Scanner state: start of markup. */ 93 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 94 95 /** Scanner state: content. */ 96 protected static final int SCANNER_STATE_CONTENT = 22; 97 98 /** Scanner state: processing instruction. */ 99 protected static final int SCANNER_STATE_PI = 23; 100 101 /** Scanner state: DOCTYPE. */ 102 protected static final int SCANNER_STATE_DOCTYPE = 24; 103 104 /** Scanner state: XML Declaration */ 105 protected static final int SCANNER_STATE_XML_DECL = 25; 106 107 /** Scanner state: root element. */ 108 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 109 110 /** Scanner state: comment. */ 111 protected static final int SCANNER_STATE_COMMENT = 27; 112 113 /** Scanner state: reference. */ 114 protected static final int SCANNER_STATE_REFERENCE = 28; 115 116 // <book type="hard"> reading attribute name 'type' 117 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 118 119 // <book type="hard"> //reading attribute value. 120 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 121 122 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 123 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 124 125 /** Scanner state: end of input. */ 126 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 127 128 /** Scanner state: terminated. */ 129 protected static final int SCANNER_STATE_TERMINATED = 34; 130 131 /** Scanner state: CDATA section. */ 132 protected static final int SCANNER_STATE_CDATA = 35; 133 134 /** Scanner state: Text declaration. */ 135 protected static final int SCANNER_STATE_TEXT_DECL = 36; 136 137 /** Scanner state: Text declaration. */ 138 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 139 140 //<book type="hard">foo</book> 141 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 142 143 //<book type="hard">foo</book> reading </book> 144 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 145 146 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 147 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 148 149 // feature identifiers 150 151 152 /** Feature identifier: notify built-in refereces. */ 153 protected static final String NOTIFY_BUILTIN_REFS = 154 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 155 156 /** Property identifier: entity resolver. */ 157 protected static final String ENTITY_RESOLVER = 158 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 159 160 /** Feature identifier: standard uri conformant */ 161 protected static final String STANDARD_URI_CONFORMANT = 162 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 163 164 /** Property identifier: Security property manager. */ 165 private static final String XML_SECURITY_PROPERTY_MANAGER = 166 Constants.XML_SECURITY_PROPERTY_MANAGER; 167 168 /** access external dtd: file protocol 169 * For DOM/SAX, the secure feature is set to true by default 170 */ 171 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 172 173 // recognized features and properties 174 175 /** Recognized features. */ 176 private static final String[] RECOGNIZED_FEATURES = { 177 NAMESPACES, 178 VALIDATION, 179 NOTIFY_BUILTIN_REFS, 180 NOTIFY_CHAR_REFS, 181 Constants.STAX_REPORT_CDATA_EVENT 182 }; 183 184 /** Feature defaults. */ 185 private static final Boolean[] FEATURE_DEFAULTS = { 186 Boolean.TRUE, 187 null, 188 Boolean.FALSE, 189 Boolean.FALSE, 190 Boolean.TRUE 191 }; 192 193 /** Recognized properties. */ 194 private static final String[] RECOGNIZED_PROPERTIES = { 195 SYMBOL_TABLE, 196 ERROR_REPORTER, 197 ENTITY_MANAGER, 198 XML_SECURITY_PROPERTY_MANAGER 199 }; 200 201 /** Property defaults. */ 202 private static final Object[] PROPERTY_DEFAULTS = { 203 null, 204 null, 205 null, 206 null 207 }; 208 209 private static final char [] cdata = {'[','C','D','A','T','A','['}; 210 static final char [] xmlDecl = {'<','?','x','m','l'}; 211 // private static final char [] endTag = {'<','/'}; 212 // debugging 213 214 /** Debug scanner state. */ 215 private static final boolean DEBUG_SCANNER_STATE = false; 216 217 /** Debug driver. */ 218 private static final boolean DEBUG_DISPATCHER = false; 219 220 /** Debug content driver scanning. */ 221 protected static final boolean DEBUG_START_END_ELEMENT = false; 222 223 224 /** Debug driver next */ 225 protected static final boolean DEBUG_NEXT = false ; 226 227 /** Debug driver next */ 228 protected static final boolean DEBUG = false; 229 protected static final boolean DEBUG_COALESCE = false; 230 // 231 // Data 232 // 233 234 // protected data 235 236 /** Document handler. */ 237 protected XMLDocumentHandler fDocumentHandler; 238 protected int fScannerLastState ; 239 240 /** Entity Storage */ 241 protected XMLEntityStorage fEntityStore; 242 243 /** Entity stack. */ 244 protected int[] fEntityStack = new int[4]; 245 246 /** Markup depth. */ 247 protected int fMarkupDepth; 248 249 //is the element empty 250 protected boolean fEmptyElement ; 251 252 //track if we are reading attributes, this is usefule while 253 //there is a callback 254 protected boolean fReadingAttributes = false; 255 256 /** Scanner state. */ 257 protected int fScannerState; 258 259 /** SubScanner state: inside scanContent method. */ 260 protected boolean fInScanContent = false; 261 protected boolean fLastSectionWasCData = false; 262 protected boolean fLastSectionWasEntityReference = false; 263 protected boolean fLastSectionWasCharacterData = false; 264 265 /** has external dtd */ 266 protected boolean fHasExternalDTD; 267 268 /** Standalone. */ 269 protected boolean fStandaloneSet; 270 protected boolean fStandalone; 271 protected String fVersion; 272 273 // element information 274 275 /** Current element. */ 276 protected QName fCurrentElement; 277 278 /** Element stack. */ 279 protected ElementStack fElementStack = new ElementStack(); 280 protected ElementStack2 fElementStack2 = new ElementStack2(); 281 282 // other info 283 284 /** Document system identifier. 285 * REVISIT: So what's this used for? - NG 286 * protected String fDocumentSystemId; 287 ******/ 288 289 protected String fPITarget ; 290 291 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 292 protected XMLString fPIData = new XMLString(); 293 294 // features 295 296 297 /** Notify built-in references. */ 298 protected boolean fNotifyBuiltInRefs = false; 299 300 //STAX related properties 301 //defaultValues. 302 protected boolean fSupportDTD = true; 303 protected boolean fReplaceEntityReferences = true; 304 protected boolean fSupportExternalEntities = false; 305 protected boolean fReportCdataEvent = false ; 306 protected boolean fIsCoalesce = false ; 307 protected String fDeclaredEncoding = null; 308 /** Xerces Feature: Disallow doctype declaration. */ 309 protected boolean fDisallowDoctype = false; 310 311 /** 312 * comma-delimited list of protocols that are allowed for the purpose 313 * of accessing external dtd or entity references 314 */ 315 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 316 317 /** 318 * standard uri conformant (strict uri). 319 * http://apache.org/xml/features/standard-uri-conformant 320 */ 321 protected boolean fStrictURI; 322 323 // drivers 324 325 /** Active driver. */ 326 protected Driver fDriver; 327 328 /** Content driver. */ 329 protected Driver fContentDriver = createContentDriver(); 330 331 // temporary variables 332 333 /** Element QName. */ 334 protected QName fElementQName = new QName(); 335 336 /** Attribute QName. */ 337 protected QName fAttributeQName = new QName(); 338 339 /** 340 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 341 * implements Iterator interface so we can directly give Attributes in the form of 342 * iterator. 343 */ 344 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 345 346 347 /** String. */ 348 protected XMLString fTempString = new XMLString(); 349 350 /** String. */ 351 protected XMLString fTempString2 = new XMLString(); 352 353 /** Array of 3 strings. */ 354 private String[] fStrings = new String[3]; 355 356 /** Making the buffer accesible to derived class -- String buffer. */ 357 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 358 359 /** Making the buffer accesible to derived class -- String buffer. */ 360 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 361 362 /** stores character data. */ 363 /** Making the buffer accesible to derived class -- stores PI data */ 364 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 365 366 /** Single character array. */ 367 private final char[] fSingleChar = new char[1]; 368 private String fCurrentEntityName = null; 369 370 // New members 371 protected boolean fScanToEnd = false; 372 373 protected DTDGrammarUtil dtdGrammarUtil= null; 374 375 protected boolean fAddDefaultAttr = false; 376 377 protected boolean foundBuiltInRefs = false; 378 379 /** Built-in reference character event */ 380 protected boolean builtInRefCharacterHandled = false; 381 382 //skip element algorithm 383 static final short MAX_DEPTH_LIMIT = 5 ; 384 static final short ELEMENT_ARRAY_LENGTH = 200 ; 385 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 386 static final boolean DEBUG_SKIP_ALGORITHM = false; 387 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 388 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 389 //pointer location where last element was skipped 390 short fLastPointerLocation = 0 ; 391 short fElementPointer = 0 ; 392 //2D array to store pointer info 393 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 394 protected String fElementRawname ; 395 protected boolean fShouldSkip = false; 396 protected boolean fAdd = false ; 397 protected boolean fSkip = false; 398 399 /** Reusable Augmentations. */ 400 private Augmentations fTempAugmentations = null; 401 // 402 // Constructors 403 // 404 405 /** Default constructor. */ 406 public XMLDocumentFragmentScannerImpl() { 407 } // <init>() 408 409 // 410 // XMLDocumentScanner methods 411 // 412 413 /** 414 * Sets the input source. 415 * 416 * @param inputSource The input source. 417 * 418 * @throws IOException Thrown on i/o error. 419 */ 420 public void setInputSource(XMLInputSource inputSource) throws IOException { 421 fEntityManager.setEntityHandler(this); 422 fEntityManager.startEntity(false, "$fragment$", inputSource, false, true); 423 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 424 } // setInputSource(XMLInputSource) 425 426 /** 427 * Scans a document. 428 * 429 * @param complete True if the scanner should scan the document 430 * completely, pushing all events to the registered 431 * document handler. A value of false indicates that 432 * that the scanner should only scan the next portion 433 * of the document and return. A scanner instance is 434 * permitted to completely scan a document if it does 435 * not support this "pull" scanning model. 436 * 437 * @return True if there is more to scan, false otherwise. 438 */ 439 public boolean scanDocument(boolean complete) 440 throws IOException, XNIException { 441 442 // keep dispatching "events" 443 fEntityManager.setEntityHandler(this); 444 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 445 446 int event = next(); 447 do { 448 switch (event) { 449 case XMLStreamConstants.START_DOCUMENT : 450 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 451 break; 452 case XMLStreamConstants.START_ELEMENT : 453 //System.out.println(" in scann element"); 454 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 455 break; 456 case XMLStreamConstants.CHARACTERS : 457 fDocumentHandler.characters(getCharacterData(),null); 458 break; 459 case XMLStreamConstants.SPACE: 460 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 461 //System.out.println("in the space"); 462 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 463 break; 464 case XMLStreamConstants.ENTITY_REFERENCE : 465 //entity reference callback are given in startEntity 466 break; 467 case XMLStreamConstants.PROCESSING_INSTRUCTION : 468 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 469 break; 470 case XMLStreamConstants.COMMENT : 471 //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 472 fDocumentHandler.comment(getCharacterData(),null); 473 break; 474 case XMLStreamConstants.DTD : 475 //all DTD related callbacks are handled in DTDScanner. 476 //1. Stax doesn't define DTD states as it does for XML Document. 477 //therefore we don't need to take care of anything here. So Just break; 478 break; 479 case XMLStreamConstants.CDATA: 480 fDocumentHandler.startCDATA(null); 481 //xxx: check if CDATA values comes from getCharacterData() function 482 fDocumentHandler.characters(getCharacterData(),null); 483 fDocumentHandler.endCDATA(null); 484 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 485 break; 486 case XMLStreamConstants.NOTATION_DECLARATION : 487 break; 488 case XMLStreamConstants.ENTITY_DECLARATION : 489 break; 490 case XMLStreamConstants.NAMESPACE : 491 break; 492 case XMLStreamConstants.ATTRIBUTE : 493 break; 494 case XMLStreamConstants.END_ELEMENT : 495 //do not give callback here. 496 //this callback is given in scanEndElement function. 497 //fDocumentHandler.endElement(getElementQName(),null); 498 break; 499 default : 500 throw new InternalError("processing event: " + event); 501 502 } 503 //System.out.println("here in before calling next"); 504 event = next(); 505 //System.out.println("here in after calling next"); 506 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 507 508 if(event == XMLStreamConstants.END_DOCUMENT) { 509 fDocumentHandler.endDocument(null); 510 return false; 511 } 512 513 return true; 514 515 } // scanDocument(boolean):boolean 516 517 518 519 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 520 if(fScannerLastState == XMLEvent.END_ELEMENT){ 521 fElementQName.setValues(fElementStack.getLastPoppedElement()); 522 } 523 return fElementQName ; 524 } 525 526 /** return the next state on the input 527 * @return int 528 */ 529 530 public int next() throws IOException, XNIException { 531 return fDriver.next(); 532 } 533 534 // 535 // XMLComponent methods 536 // 537 538 /** 539 * Resets the component. The component can query the component manager 540 * about any features and properties that affect the operation of the 541 * component. 542 * 543 * @param componentManager The component manager. 544 * 545 * @throws SAXException Thrown by component on initialization error. 546 * For example, if a feature or property is 547 * required for the operation of the component, the 548 * component manager may throw a 549 * SAXNotRecognizedException or a 550 * SAXNotSupportedException. 551 */ 552 553 public void reset(XMLComponentManager componentManager) 554 throws XMLConfigurationException { 555 556 super.reset(componentManager); 557 558 // other settings 559 // fDocumentSystemId = null; 560 561 // sax features 562 //fAttributes.setNamespaces(fNamespaces); 563 564 // xerces features 565 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 566 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 567 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 568 569 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 570 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 571 (ExternalSubsetResolver) resolver : null; 572 573 //attribute 574 fReadingAttributes = false; 575 //xxx: external entities are supported in Xerces 576 // it would be good to define feature for this case 577 fSupportExternalEntities = true; 578 fReplaceEntityReferences = true; 579 fIsCoalesce = false; 580 581 // setup Driver 582 setScannerState(SCANNER_STATE_CONTENT); 583 setDriver(fContentDriver); 584 585 // JAXP 1.5 features and properties 586 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 587 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 588 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 589 590 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 591 592 resetCommon(); 593 //fEntityManager.test(); 594 } // reset(XMLComponentManager) 595 596 597 public void reset(PropertyManager propertyManager){ 598 599 super.reset(propertyManager); 600 601 // other settings 602 // fDocumentSystemId = null; 603 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 604 fNotifyBuiltInRefs = false ; 605 606 //fElementStack2.clear(); 607 //fReplaceEntityReferences = true; 608 //fSupportExternalEntities = true; 609 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 610 fReplaceEntityReferences = bo.booleanValue(); 611 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 612 fSupportExternalEntities = bo.booleanValue(); 613 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 614 if(cdata != null) 615 fReportCdataEvent = cdata.booleanValue() ; 616 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 617 if(coalesce != null) 618 fIsCoalesce = coalesce.booleanValue(); 619 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 620 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 621 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 622 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 623 // setup Driver 624 //we dont need to do this -- nb. 625 //setScannerState(SCANNER_STATE_CONTENT); 626 //setDriver(fContentDriver); 627 //fEntityManager.test(); 628 629 // JAXP 1.5 features and properties 630 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 631 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 632 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 633 634 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 635 resetCommon(); 636 } // reset(XMLComponentManager) 637 638 void resetCommon() { 639 // initialize vars 640 fMarkupDepth = 0; 641 fCurrentElement = null; 642 fElementStack.clear(); 643 fHasExternalDTD = false; 644 fStandaloneSet = false; 645 fStandalone = false; 646 fInScanContent = false; 647 //skipping algorithm 648 fShouldSkip = false; 649 fAdd = false; 650 fSkip = false; 651 652 fEntityStore = fEntityManager.getEntityStore(); 653 dtdGrammarUtil = null; 654 655 if (fSecurityManager != null) { 656 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 657 fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT); 658 } else { 659 fElementAttributeLimit = 0; 660 fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue(); 661 } 662 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 663 } 664 665 /** 666 * Returns a list of feature identifiers that are recognized by 667 * this component. This method may return null if no features 668 * are recognized by this component. 669 */ 670 public String[] getRecognizedFeatures() { 671 return (String[])(RECOGNIZED_FEATURES.clone()); 672 } // getRecognizedFeatures():String[] 673 674 /** 675 * Sets the state of a feature. This method is called by the component 676 * manager any time after reset when a feature changes state. 677 * <p> 678 * <strong>Note:</strong> Components should silently ignore features 679 * that do not affect the operation of the component. 680 * 681 * @param featureId The feature identifier. 682 * @param state The state of the feature. 683 * 684 * @throws SAXNotRecognizedException The component should not throw 685 * this exception. 686 * @throws SAXNotSupportedException The component should not throw 687 * this exception. 688 */ 689 public void setFeature(String featureId, boolean state) 690 throws XMLConfigurationException { 691 692 super.setFeature(featureId, state); 693 694 // Xerces properties 695 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 696 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 697 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 698 fNotifyBuiltInRefs = state; 699 } 700 } 701 702 } // setFeature(String,boolean) 703 704 /** 705 * Returns a list of property identifiers that are recognized by 706 * this component. This method may return null if no properties 707 * are recognized by this component. 708 */ 709 public String[] getRecognizedProperties() { 710 return (String[])(RECOGNIZED_PROPERTIES.clone()); 711 } // getRecognizedProperties():String[] 712 713 /** 714 * Sets the value of a property. This method is called by the component 715 * manager any time after reset when a property changes value. 716 * <p> 717 * <strong>Note:</strong> Components should silently ignore properties 718 * that do not affect the operation of the component. 719 * 720 * @param propertyId The property identifier. 721 * @param value The value of the property. 722 * 723 * @throws SAXNotRecognizedException The component should not throw 724 * this exception. 725 * @throws SAXNotSupportedException The component should not throw 726 * this exception. 727 */ 728 public void setProperty(String propertyId, Object value) 729 throws XMLConfigurationException { 730 731 super.setProperty(propertyId, value); 732 733 // Xerces properties 734 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 735 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 736 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 737 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 738 fEntityManager = (XMLEntityManager)value; 739 return; 740 } 741 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 742 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 743 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 744 (ExternalSubsetResolver) value : null; 745 return; 746 } 747 } 748 749 750 // Xerces properties 751 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 752 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 753 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 754 fEntityManager = (XMLEntityManager)value; 755 } 756 return; 757 } 758 759 //JAXP 1.5 properties 760 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 761 { 762 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 763 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 764 } 765 766 } // setProperty(String,Object) 767 768 /** 769 * Returns the default state for a feature, or null if this 770 * component does not want to report a default value for this 771 * feature. 772 * 773 * @param featureId The feature identifier. 774 * 775 * @since Xerces 2.2.0 776 */ 777 public Boolean getFeatureDefault(String featureId) { 778 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 779 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 780 return FEATURE_DEFAULTS[i]; 781 } 782 } 783 return null; 784 } // getFeatureDefault(String):Boolean 785 786 /** 787 * Returns the default state for a property, or null if this 788 * component does not want to report a default value for this 789 * property. 790 * 791 * @param propertyId The property identifier. 792 * 793 * @since Xerces 2.2.0 794 */ 795 public Object getPropertyDefault(String propertyId) { 796 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 797 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 798 return PROPERTY_DEFAULTS[i]; 799 } 800 } 801 return null; 802 } // getPropertyDefault(String):Object 803 804 // 805 // XMLDocumentSource methods 806 // 807 808 /** 809 * setDocumentHandler 810 * 811 * @param documentHandler 812 */ 813 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 814 fDocumentHandler = documentHandler; 815 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 816 } // setDocumentHandler(XMLDocumentHandler) 817 818 819 /** Returns the document handler */ 820 public XMLDocumentHandler getDocumentHandler(){ 821 return fDocumentHandler; 822 } 823 824 // 825 // XMLEntityHandler methods 826 // 827 828 /** 829 * This method notifies of the start of an entity. The DTD has the 830 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 831 * general entities are just specified by their name. 832 * 833 * @param name The name of the entity. 834 * @param identifier The resource identifier. 835 * @param encoding The auto-detected IANA encoding name of the entity 836 * stream. This value will be null in those situations 837 * where the entity encoding is not auto-detected (e.g. 838 * internal entities or a document entity that is 839 * parsed from a java.io.Reader). 840 * @param augs Additional information that may include infoset augmentations 841 * 842 * @throws XNIException Thrown by handler to signal an error. 843 */ 844 public void startEntity(String name, 845 XMLResourceIdentifier identifier, 846 String encoding, Augmentations augs) throws XNIException { 847 848 // keep track of this entity before fEntityDepth is increased 849 if (fEntityDepth == fEntityStack.length) { 850 int[] entityarray = new int[fEntityStack.length * 2]; 851 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 852 fEntityStack = entityarray; 853 } 854 fEntityStack[fEntityDepth] = fMarkupDepth; 855 856 super.startEntity(name, identifier, encoding, augs); 857 858 // WFC: entity declared in external subset in standalone doc 859 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 860 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 861 new Object[]{name}); 862 } 863 864 /** we are not calling the handlers yet.. */ 865 // call handler 866 if (fDocumentHandler != null && !fScanningAttribute) { 867 if (!name.equals("[xml]")) { 868 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 869 } 870 } 871 872 } // startEntity(String,XMLResourceIdentifier,String) 873 874 /** 875 * This method notifies the end of an entity. The DTD has the pseudo-name 876 * of "[dtd]" parameter entity names start with '%'; and general entities 877 * are just specified by their name. 878 * 879 * @param name The name of the entity. 880 * @param augs Additional information that may include infoset augmentations 881 * 882 * @throws XNIException Thrown by handler to signal an error. 883 */ 884 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 885 886 /** 887 * // flush possible pending output buffer - see scanContent 888 * if (fInScanContent && fStringBuffer.length != 0 889 * && fDocumentHandler != null) { 890 * fDocumentHandler.characters(fStringBuffer, null); 891 * fStringBuffer.length = 0; // make sure we know it's been flushed 892 * } 893 */ 894 super.endEntity(name, augs); 895 896 // make sure markup is properly balanced 897 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 898 reportFatalError("MarkupEntityMismatch", null); 899 } 900 901 /**/ 902 // call handler 903 if (fDocumentHandler != null && !fScanningAttribute) { 904 if (!name.equals("[xml]")) { 905 fDocumentHandler.endGeneralEntity(name, augs); 906 } 907 } 908 909 910 } // endEntity(String) 911 912 // 913 // Protected methods 914 // 915 916 // Driver factory methods 917 918 /** Creates a content Driver. */ 919 protected Driver createContentDriver() { 920 return new FragmentContentDriver(); 921 } // createContentDriver():Driver 922 923 // scanning methods 924 925 /** 926 * Scans an XML or text declaration. 927 * <p> 928 * <pre> 929 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 930 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 931 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 932 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 933 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 934 * | ('"' ('yes' | 'no') '"')) 935 * 936 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 937 * </pre> 938 * 939 * @param scanningTextDecl True if a text declaration is to 940 * be scanned instead of an XML 941 * declaration. 942 */ 943 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 944 throws IOException, XNIException { 945 946 // scan decl 947 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 948 fMarkupDepth--; 949 950 // pseudo-attribute values 951 String version = fStrings[0]; 952 String encoding = fStrings[1]; 953 String standalone = fStrings[2]; 954 fDeclaredEncoding = encoding; 955 // set standalone 956 fStandaloneSet = standalone != null; 957 fStandalone = fStandaloneSet && standalone.equals("yes"); 958 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 959 //but this information is only related with Document Entity. 960 fEntityManager.setStandalone(fStandalone); 961 962 963 // call handler 964 if (fDocumentHandler != null) { 965 if (scanningTextDecl) { 966 fDocumentHandler.textDecl(version, encoding, null); 967 } else { 968 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 969 } 970 } 971 972 if(version != null){ 973 fEntityScanner.setVersion(version); 974 fEntityScanner.setXMLVersion(version); 975 } 976 // set encoding on reader, only if encoding was not specified by the application explicitly 977 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 978 fEntityScanner.setEncoding(encoding); 979 } 980 981 } // scanXMLDeclOrTextDecl(boolean) 982 983 public String getPITarget(){ 984 return fPITarget ; 985 } 986 987 public XMLStringBuffer getPIData(){ 988 return fContentBuffer ; 989 } 990 991 //XXX: why not this function behave as per the state of the parser? 992 public XMLString getCharacterData(){ 993 if(fUsebuffer){ 994 return fContentBuffer ; 995 }else{ 996 return fTempString; 997 } 998 999 } 1000 1001 1002 /** 1003 * Scans a processing data. This is needed to handle the situation 1004 * where a document starts with a processing instruction whose 1005 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1006 * 1007 * @param target The PI target 1008 * @param data The XMLStringBuffer to fill in with the data 1009 */ 1010 protected void scanPIData(String target, XMLStringBuffer data) 1011 throws IOException, XNIException { 1012 1013 super.scanPIData(target, data); 1014 1015 //set the PI target and values 1016 fPITarget = target ; 1017 1018 fMarkupDepth--; 1019 1020 } // scanPIData(String) 1021 1022 /** 1023 * Scans a comment. 1024 * <p> 1025 * <pre> 1026 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1027 * </pre> 1028 * <p> 1029 * <strong>Note:</strong> Called after scanning past '<!--' 1030 */ 1031 protected void scanComment() throws IOException, XNIException { 1032 fContentBuffer.clear(); 1033 scanComment(fContentBuffer); 1034 //getTextCharacters can also be called for reading comments 1035 fUsebuffer = true; 1036 fMarkupDepth--; 1037 1038 } // scanComment() 1039 1040 //xxx value returned by this function may not remain valid if another event is scanned. 1041 public String getComment(){ 1042 return fContentBuffer.toString(); 1043 } 1044 1045 void addElement(String rawname){ 1046 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1047 //storing element raw name in a linear list of array 1048 fElementArray[fElementPointer] = rawname ; 1049 //storing elemnetPointer for particular element depth 1050 1051 if(DEBUG_SKIP_ALGORITHM){ 1052 StringBuffer sb = new StringBuffer() ; 1053 sb.append(" Storing element information ") ; 1054 sb.append(" fElementPointer = " + fElementPointer) ; 1055 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1056 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1057 System.out.println(sb.toString()) ; 1058 } 1059 1060 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1061 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1062 short column = storePointerForADepth(fElementPointer); 1063 if(column > 0){ 1064 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1065 //identity comparison shouldn't take much time and we can rely on this 1066 //since its guaranteed to have same object id for same string. 1067 if(rawname == fElementArray[pointer]){ 1068 fShouldSkip = true ; 1069 fLastPointerLocation = pointer ; 1070 //reset the things and return. 1071 resetPointer((short)fElementStack.fDepth , column) ; 1072 fElementArray[fElementPointer] = null ; 1073 return ; 1074 }else{ 1075 fShouldSkip = false ; 1076 } 1077 } 1078 } 1079 fElementPointer++ ; 1080 } 1081 } 1082 1083 1084 void resetPointer(short depth, short column){ 1085 fPointerInfo[depth] [column] = (short)0; 1086 } 1087 1088 //returns column information at which pointer was stored. 1089 short storePointerForADepth(short elementPointer){ 1090 short depth = (short) fElementStack.fDepth ; 1091 1092 //Stores element pointer locations at particular depth , only 4 pointer locations 1093 //are stored at particular depth for now. 1094 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1095 1096 if(canStore(depth, i)){ 1097 fPointerInfo[depth][i] = elementPointer ; 1098 if(DEBUG_SKIP_ALGORITHM){ 1099 StringBuffer sb = new StringBuffer() ; 1100 sb.append(" Pointer information ") ; 1101 sb.append(" fElementPointer = " + fElementPointer) ; 1102 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1103 sb.append(" column = " + i ) ; 1104 System.out.println(sb.toString()) ; 1105 } 1106 return i; 1107 } 1108 //else 1109 //pointer was not stored because we reached the limit 1110 } 1111 return -1 ; 1112 } 1113 1114 boolean canStore(short depth, short column){ 1115 //colum = 0 , means first element at particular depth 1116 //column = 1, means second element at particular depth 1117 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1118 return fPointerInfo[depth][column] == 0 ? true : false ; 1119 } 1120 1121 1122 short getElementPointer(short depth, short column){ 1123 //colum = 0 , means first element at particular depth 1124 //column = 1, means second element at particular depth 1125 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1126 return fPointerInfo[depth][column] ; 1127 } 1128 1129 //this function assumes that string passed is not null and skips 1130 //the following string from the buffer this makes sure 1131 boolean skipFromTheBuffer(String rawname) throws IOException{ 1132 if(fEntityScanner.skipString(rawname)){ 1133 char c = (char)fEntityScanner.peekChar() ; 1134 //If the start element was completely skipped we should encounter either ' '(space), 1135 //or '/' (in case of empty element) or '>' 1136 if( c == ' ' || c == '/' || c == '>'){ 1137 fElementRawname = rawname ; 1138 return true ; 1139 } else{ 1140 return false; 1141 } 1142 } else 1143 return false ; 1144 } 1145 1146 boolean skipQElement(String rawname) throws IOException{ 1147 1148 final int c = fEntityScanner.getChar(rawname.length()); 1149 //if this character is still valid element name -- this means string can't match 1150 if(XMLChar.isName(c)){ 1151 return false; 1152 }else{ 1153 return fEntityScanner.skipString(rawname); 1154 } 1155 } 1156 1157 protected boolean skipElement() throws IOException { 1158 1159 if(!fShouldSkip) return false ; 1160 1161 if(fLastPointerLocation != 0){ 1162 //Look at the next element stored in the array list.. we might just get a match. 1163 String rawname = fElementArray[fLastPointerLocation + 1] ; 1164 if(rawname != null && skipFromTheBuffer(rawname)){ 1165 fLastPointerLocation++ ; 1166 if(DEBUG_SKIP_ALGORITHM){ 1167 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1168 } 1169 return true ; 1170 } else{ 1171 //reset it back to zero... we haven't got the correct subset yet. 1172 fLastPointerLocation = 0 ; 1173 1174 } 1175 } 1176 //xxx: we can put some logic here as from what column it should start looking 1177 //for now we always start at 0 1178 //fallback to tolerant algorithm, it would look for differnt element stored at different 1179 //depth and get us the pointer location. 1180 return fShouldSkip && skipElement((short)0); 1181 1182 } 1183 1184 //start of the column at which it should try searching 1185 boolean skipElement(short column) throws IOException { 1186 short depth = (short)fElementStack.fDepth ; 1187 1188 if(depth > MAX_DEPTH_LIMIT){ 1189 return fShouldSkip = false ; 1190 } 1191 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1192 short pointer = getElementPointer(depth , i ) ; 1193 1194 if(pointer == 0){ 1195 return fShouldSkip = false ; 1196 } 1197 1198 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1199 if(DEBUG_SKIP_ALGORITHM){ 1200 System.out.println(); 1201 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1202 System.out.println(); 1203 } 1204 fLastPointerLocation = pointer ; 1205 return fShouldSkip = true ; 1206 } 1207 } 1208 return fShouldSkip = false ; 1209 } 1210 1211 /** 1212 * Scans a start element. This method will handle the binding of 1213 * namespace information and notifying the handler of the start 1214 * of the element. 1215 * <p> 1216 * <pre> 1217 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1218 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1219 * </pre> 1220 * <p> 1221 * <strong>Note:</strong> This method assumes that the leading 1222 * '<' character has been consumed. 1223 * <p> 1224 * <strong>Note:</strong> This method uses the fElementQName and 1225 * fAttributes variables. The contents of these variables will be 1226 * destroyed. The caller should copy important information out of 1227 * these variables before calling this method. 1228 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1229 * 1230 * @return True if element is empty. (i.e. It matches 1231 * production [44]. 1232 */ 1233 // fElementQName will have the details of element just read.. 1234 // fAttributes will have the details of all the attributes. 1235 protected boolean scanStartElement() 1236 throws IOException, XNIException { 1237 1238 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1239 //when skipping is true and no more elements should be added 1240 if(fSkip && !fAdd){ 1241 //get the stored element -- if everything goes right this should match the 1242 //token in the buffer 1243 1244 QName name = fElementStack.getNext(); 1245 1246 if(DEBUG_SKIP_ALGORITHM){ 1247 System.out.println("Trying to skip String = " + name.rawname); 1248 } 1249 1250 //Be conservative -- if skipping fails -- stop. 1251 fSkip = fEntityScanner.skipString(name.rawname); 1252 1253 if(fSkip){ 1254 if(DEBUG_SKIP_ALGORITHM){ 1255 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1256 } 1257 fElementStack.push(); 1258 fElementQName = name; 1259 }else{ 1260 //if skipping fails reposition the stack or fallback to normal way of processing 1261 fElementStack.reposition(); 1262 if(DEBUG_SKIP_ALGORITHM){ 1263 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1264 } 1265 } 1266 } 1267 1268 //we are still at the stage of adding elements 1269 //the elements were not matched or 1270 //fSkip is not set to true 1271 if(!fSkip || fAdd){ 1272 //get the next element from the stack 1273 fElementQName = fElementStack.nextElement(); 1274 // name 1275 if (fNamespaces) { 1276 fEntityScanner.scanQName(fElementQName); 1277 } else { 1278 String name = fEntityScanner.scanName(); 1279 fElementQName.setValues(null, name, name, null); 1280 } 1281 1282 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1283 if(DEBUG_SKIP_ALGORITHM){ 1284 if(fAdd){ 1285 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1286 } 1287 } 1288 1289 } 1290 1291 //when the elements are being added , we need to check if we are set for skipping the elements 1292 if(fAdd){ 1293 //this sets the value of fAdd variable 1294 fElementStack.matchElement(fElementQName); 1295 } 1296 1297 1298 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1299 fCurrentElement = fElementQName; 1300 1301 String rawname = fElementQName.rawname; 1302 1303 fEmptyElement = false; 1304 1305 fAttributes.removeAllAttributes(); 1306 1307 checkDepth(rawname); 1308 if(!seekCloseOfStartTag()){ 1309 fReadingAttributes = true; 1310 fAttributeCacheUsedCount =0; 1311 fStringBufferIndex =0; 1312 fAddDefaultAttr = true; 1313 do { 1314 scanAttribute(fAttributes); 1315 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1316 fAttributes.getLength() > fElementAttributeLimit){ 1317 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1318 "ElementAttributeLimit", 1319 new Object[]{rawname, fElementAttributeLimit }, 1320 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1321 } 1322 1323 } while (!seekCloseOfStartTag()); 1324 fReadingAttributes=false; 1325 } 1326 1327 if (fEmptyElement) { 1328 //decrease the markup depth.. 1329 fMarkupDepth--; 1330 1331 // check that this element was opened in the same entity 1332 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1333 reportFatalError("ElementEntityMismatch", 1334 new Object[]{fCurrentElement.rawname}); 1335 } 1336 // call handler 1337 if (fDocumentHandler != null) { 1338 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1339 } 1340 1341 //We should not be popping out the context here in endELement becaause the namespace context is still 1342 //valid when parser is at the endElement state. 1343 //if (fNamespaces) { 1344 // fNamespaceContext.popContext(); 1345 //} 1346 1347 //pop the element off the stack.. 1348 fElementStack.popElement(); 1349 1350 } else { 1351 1352 if(dtdGrammarUtil != null) 1353 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1354 if(fDocumentHandler != null){ 1355 //complete element and attributes are traversed in this function so we can send a callback 1356 //here. 1357 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1358 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1359 } 1360 } 1361 1362 1363 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1364 return fEmptyElement; 1365 1366 } // scanStartElement():boolean 1367 1368 /** 1369 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1370 * Characters are consumed. 1371 */ 1372 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1373 // spaces 1374 boolean sawSpace = fEntityScanner.skipSpaces(); 1375 1376 // end tag? 1377 final int c = fEntityScanner.peekChar(); 1378 if (c == '>') { 1379 fEntityScanner.scanChar(); 1380 return true; 1381 } else if (c == '/') { 1382 fEntityScanner.scanChar(); 1383 if (!fEntityScanner.skipChar('>')) { 1384 reportFatalError("ElementUnterminated", 1385 new Object[]{fElementQName.rawname}); 1386 } 1387 fEmptyElement = true; 1388 return true; 1389 } else if (!isValidNameStartChar(c) || !sawSpace) { 1390 // Second chance. Check if this character is a high 1391 // surrogate of a valid name start character. 1392 if (!isValidNameStartHighSurrogate(c) || !sawSpace) { 1393 reportFatalError("ElementUnterminated", 1394 new Object[]{fElementQName.rawname}); 1395 } 1396 } 1397 1398 return false; 1399 } 1400 1401 public boolean hasAttributes(){ 1402 return fAttributes.getLength() > 0 ? true : false ; 1403 } 1404 1405 1406 /** 1407 * Scans an attribute. 1408 * <p> 1409 * <pre> 1410 * [41] Attribute ::= Name Eq AttValue 1411 * </pre> 1412 * <p> 1413 * <strong>Note:</strong> This method assumes that the next 1414 * character on the stream is the first character of the attribute 1415 * name. 1416 * <p> 1417 * <strong>Note:</strong> This method uses the fAttributeQName and 1418 * fQName variables. The contents of these variables will be 1419 * destroyed. 1420 * 1421 * @param attributes The attributes list for the scanned attribute. 1422 */ 1423 1424 /** 1425 * protected void scanAttribute(AttributeIteratorImpl attributes) 1426 * throws IOException, XNIException { 1427 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1428 * 1429 * 1430 * // name 1431 * if (fNamespaces) { 1432 * fEntityScanner.scanQName(fAttributeQName); 1433 * } 1434 * else { 1435 * String name = fEntityScanner.scanName(); 1436 * fAttributeQName.setValues(null, name, name, null); 1437 * } 1438 * 1439 * // equals 1440 * fEntityScanner.skipSpaces(); 1441 * if (!fEntityScanner.skipChar('=')) { 1442 * reportFatalError("EqRequiredInAttribute", 1443 * new Object[]{fAttributeQName.rawname}); 1444 * } 1445 * fEntityScanner.skipSpaces(); 1446 * 1447 * 1448 * // content 1449 * int oldLen = attributes.getLength(); 1450 */ 1451 /**xxx there is one check of duplicate attribute that has been removed. 1452 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1453 * 1454 * // WFC: Unique Att Spec 1455 * if (oldLen == attributes.getLength()) { 1456 * reportFatalError("AttributeNotUnique", 1457 * new Object[]{fCurrentElement.rawname, 1458 * fAttributeQName.rawname}); 1459 * } 1460 */ 1461 1462 /* 1463 //REVISIT: one more case needs to be included: external PE and standalone is no 1464 boolean isVC = fHasExternalDTD && !fStandalone; 1465 scanAttributeValue(fTempString, fTempString2, 1466 fAttributeQName.rawname, attributes, 1467 oldLen, isVC); 1468 1469 //attributes.setValue(oldLen, fTempString.toString()); 1470 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1471 //attributes.setSpecified(oldLen, true); 1472 1473 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1474 fAttributes.addAttribute(attribute); 1475 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1476 } // scanAttribute(XMLAttributes) 1477 1478 */ 1479 1480 /** return the attribute iterator implementation */ 1481 public XMLAttributesIteratorImpl getAttributeIterator(){ 1482 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1483 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1484 fAddDefaultAttr = false; 1485 } 1486 return fAttributes; 1487 } 1488 1489 /** return if standalone is set */ 1490 public boolean standaloneSet(){ 1491 return fStandaloneSet; 1492 } 1493 /** return if the doucment is standalone */ 1494 public boolean isStandAlone(){ 1495 return fStandalone ; 1496 } 1497 /** 1498 * Scans an attribute name value pair. 1499 * <p> 1500 * <pre> 1501 * [41] Attribute ::= Name Eq AttValue 1502 * </pre> 1503 * <p> 1504 * <strong>Note:</strong> This method assumes that the next 1505 * character on the stream is the first character of the attribute 1506 * name. 1507 * <p> 1508 * <strong>Note:</strong> This method uses the fAttributeQName and 1509 * fQName variables. The contents of these variables will be 1510 * destroyed. 1511 * 1512 * @param attributes The attributes list for the scanned attribute. 1513 */ 1514 1515 protected void scanAttribute(XMLAttributes attributes) 1516 throws IOException, XNIException { 1517 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1518 1519 // name 1520 if (fNamespaces) { 1521 fEntityScanner.scanQName(fAttributeQName); 1522 } else { 1523 String name = fEntityScanner.scanName(); 1524 fAttributeQName.setValues(null, name, name, null); 1525 } 1526 1527 // equals 1528 fEntityScanner.skipSpaces(); 1529 if (!fEntityScanner.skipChar('=')) { 1530 reportFatalError("EqRequiredInAttribute", 1531 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1532 } 1533 fEntityScanner.skipSpaces(); 1534 1535 int attIndex = 0 ; 1536 //REVISIT: one more case needs to be included: external PE and standalone is no 1537 boolean isVC = fHasExternalDTD && !fStandalone; 1538 //fTempString would store attribute value 1539 ///fTempString2 would store attribute non-normalized value 1540 1541 //this function doesn't use 'attIndex'. We are adding the attribute later 1542 //after we have figured out that current attribute is not namespace declaration 1543 //since scanAttributeValue doesn't use attIndex parameter therefore we 1544 //can safely add the attribute later.. 1545 XMLString tmpStr = getString(); 1546 1547 scanAttributeValue(tmpStr, fTempString2, 1548 fAttributeQName.rawname, attributes, 1549 attIndex, isVC, fCurrentElement.rawname); 1550 1551 // content 1552 int oldLen = attributes.getLength(); 1553 //if the attribute name already exists.. new value is replaced with old value 1554 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1555 1556 // WFC: Unique Att Spec 1557 //attributes count will be same if the current attribute name already exists for this element name. 1558 //this means there are two duplicate attributes. 1559 if (oldLen == attributes.getLength()) { 1560 reportFatalError("AttributeNotUnique", 1561 new Object[]{fCurrentElement.rawname, 1562 fAttributeQName.rawname}); 1563 } 1564 1565 //tmpString contains attribute value 1566 //we are passing null as the attribute value 1567 attributes.setValue(attIndex, null, tmpStr); 1568 1569 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1570 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1571 attributes.setSpecified(attIndex, true); 1572 1573 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1574 1575 } // scanAttribute(XMLAttributes) 1576 1577 /** 1578 * Scans element content. 1579 * 1580 * @return Returns the next character on the stream. 1581 */ 1582 //CHANGED: 1583 //EARLIER: scanContent() 1584 //NOW: scanContent(XMLStringBuffer) 1585 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1586 //this function appends the data to the buffer. 1587 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1588 //set the fTempString length to 0 before passing it on to scanContent 1589 //scanContent sets the correct co-ordinates as per the content read 1590 fTempString.length = 0; 1591 int c = fEntityScanner.scanContent(fTempString); 1592 content.append(fTempString); 1593 fTempString.length = 0; 1594 if (c == '\r') { 1595 // happens when there is the character reference 1596 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1597 fEntityScanner.scanChar(); 1598 content.append((char)c); 1599 c = -1; 1600 } else if (c == ']') { 1601 //fStringBuffer.clear(); 1602 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1603 content.append((char)fEntityScanner.scanChar()); 1604 // remember where we are in case we get an endEntity before we 1605 // could flush the buffer out - this happens when we're parsing an 1606 // entity which ends with a ] 1607 fInScanContent = true; 1608 // 1609 // We work on a single character basis to handle cases such as: 1610 // ']]]>' which we might otherwise miss. 1611 // 1612 if (fEntityScanner.skipChar(']')) { 1613 content.append(']'); 1614 while (fEntityScanner.skipChar(']')) { 1615 content.append(']'); 1616 } 1617 if (fEntityScanner.skipChar('>')) { 1618 reportFatalError("CDEndInContent", null); 1619 } 1620 } 1621 fInScanContent = false; 1622 c = -1; 1623 } 1624 if (fDocumentHandler != null && content.length > 0) { 1625 //fDocumentHandler.characters(content, null); 1626 } 1627 return c; 1628 1629 } // scanContent():int 1630 1631 1632 /** 1633 * Scans a CDATA section. 1634 * <p> 1635 * <strong>Note:</strong> This method uses the fTempString and 1636 * fStringBuffer variables. 1637 * 1638 * @param complete True if the CDATA section is to be scanned 1639 * completely. 1640 * 1641 * @return True if CDATA is completely scanned. 1642 */ 1643 //CHANGED: 1644 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1645 throws IOException, XNIException { 1646 1647 // call handler 1648 if (fDocumentHandler != null) { 1649 //fDocumentHandler.startCDATA(null); 1650 } 1651 1652 while (true) { 1653 //scanData will fill the contentBuffer 1654 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1655 break ; 1656 /** We dont need all this code if we pass ']]>' as delimeter.. 1657 * int brackets = 2; 1658 * while (fEntityScanner.skipChar(']')) { 1659 * brackets++; 1660 * } 1661 * 1662 * //When we find more than 2 square brackets 1663 * if (fDocumentHandler != null && brackets > 2) { 1664 * //we dont need to clear the buffer.. 1665 * //contentBuffer.clear(); 1666 * for (int i = 2; i < brackets; i++) { 1667 * contentBuffer.append(']'); 1668 * } 1669 * fDocumentHandler.characters(contentBuffer, null); 1670 * } 1671 * 1672 * if (fEntityScanner.skipChar('>')) { 1673 * break; 1674 * } 1675 * if (fDocumentHandler != null) { 1676 * //we dont need to clear the buffer now.. 1677 * //contentBuffer.clear(); 1678 * contentBuffer.append("]]"); 1679 * fDocumentHandler.characters(contentBuffer, null); 1680 * } 1681 **/ 1682 } else { 1683 int c = fEntityScanner.peekChar(); 1684 if (c != -1 && isInvalidLiteral(c)) { 1685 if (XMLChar.isHighSurrogate(c)) { 1686 //contentBuffer.clear(); 1687 //scan surrogates if any.... 1688 scanSurrogates(contentBuffer); 1689 } else { 1690 reportFatalError("InvalidCharInCDSect", 1691 new Object[]{Integer.toString(c,16)}); 1692 fEntityScanner.scanChar(); 1693 } 1694 } 1695 //by this time we have also read surrogate contents if any... 1696 if (fDocumentHandler != null) { 1697 //fDocumentHandler.characters(contentBuffer, null); 1698 } 1699 } 1700 } 1701 fMarkupDepth--; 1702 1703 if (fDocumentHandler != null && contentBuffer.length > 0) { 1704 //fDocumentHandler.characters(contentBuffer, null); 1705 } 1706 1707 // call handler 1708 if (fDocumentHandler != null) { 1709 //fDocumentHandler.endCDATA(null); 1710 } 1711 1712 return true; 1713 1714 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1715 1716 /** 1717 * Scans an end element. 1718 * <p> 1719 * <pre> 1720 * [42] ETag ::= '</' Name S? '>' 1721 * </pre> 1722 * <p> 1723 * <strong>Note:</strong> This method uses the fElementQName variable. 1724 * The contents of this variable will be destroyed. The caller should 1725 * copy the needed information out of this variable before calling 1726 * this method. 1727 * 1728 * @return The element depth. 1729 */ 1730 protected int scanEndElement() throws IOException, XNIException { 1731 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1732 1733 // pop context 1734 QName endElementName = fElementStack.popElement(); 1735 1736 String rawname = endElementName.rawname; 1737 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1738 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1739 //In scanners most of the time is consumed on checks done for XML characters, we can 1740 // optimize on it and avoid the checks done for endElement, 1741 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1742 1743 // this should work both for namespace processing true or false... 1744 1745 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1746 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1747 1748 if (!fEntityScanner.skipString(endElementName.rawname)) { 1749 reportFatalError("ETagRequired", new Object[]{rawname}); 1750 } 1751 1752 // end 1753 fEntityScanner.skipSpaces(); 1754 if (!fEntityScanner.skipChar('>')) { 1755 reportFatalError("ETagUnterminated", 1756 new Object[]{rawname}); 1757 } 1758 fMarkupDepth--; 1759 1760 //we have increased the depth for two markup "<" characters 1761 fMarkupDepth--; 1762 1763 // check that this element was opened in the same entity 1764 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1765 reportFatalError("ElementEntityMismatch", 1766 new Object[]{rawname}); 1767 } 1768 1769 //We should not be popping out the context here in endELement becaause the namespace context is still 1770 //valid when parser is at the endElement state. 1771 1772 //if (fNamespaces) { 1773 // fNamespaceContext.popContext(); 1774 //} 1775 1776 // call handler 1777 if (fDocumentHandler != null ) { 1778 //end element is scanned in this function so we can send a callback 1779 //here. 1780 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1781 1782 fDocumentHandler.endElement(endElementName, null); 1783 } 1784 if(dtdGrammarUtil != null) 1785 dtdGrammarUtil.endElement(endElementName); 1786 1787 return fMarkupDepth; 1788 1789 } // scanEndElement():int 1790 1791 /** 1792 * Scans a character reference. 1793 * <p> 1794 * <pre> 1795 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1796 * </pre> 1797 */ 1798 protected void scanCharReference() 1799 throws IOException, XNIException { 1800 1801 fStringBuffer2.clear(); 1802 int ch = scanCharReferenceValue(fStringBuffer2, null); 1803 fMarkupDepth--; 1804 if (ch != -1) { 1805 // call handler 1806 1807 if (fDocumentHandler != null) { 1808 if (fNotifyCharRefs) { 1809 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1810 } 1811 Augmentations augs = null; 1812 if (fValidation && ch <= 0x20) { 1813 if (fTempAugmentations != null) { 1814 fTempAugmentations.removeAllItems(); 1815 } 1816 else { 1817 fTempAugmentations = new AugmentationsImpl(); 1818 } 1819 augs = fTempAugmentations; 1820 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1821 } 1822 //xxx: How do we deal with this - how to return charReferenceValues 1823 //now this is being commented because this is taken care in scanDocument() 1824 //fDocumentHandler.characters(fStringBuffer2, null); 1825 if (fNotifyCharRefs) { 1826 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1827 } 1828 } 1829 } 1830 1831 } // scanCharReference() 1832 1833 1834 /** 1835 * Scans an entity reference. 1836 * 1837 * @return returns true if the new entity is started. If it was built-in entity 1838 * 'false' is returned. 1839 * @throws IOException Thrown if i/o error occurs. 1840 * @throws XNIException Thrown if handler throws exception upon 1841 * notification. 1842 */ 1843 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1844 String name = fEntityScanner.scanName(); 1845 if (name == null) { 1846 reportFatalError("NameRequiredInReference", null); 1847 return; 1848 } 1849 if (!fEntityScanner.skipChar(';')) { 1850 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1851 } 1852 if (fEntityStore.isUnparsedEntity(name)) { 1853 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1854 } 1855 fMarkupDepth--; 1856 fCurrentEntityName = name; 1857 1858 // handle built-in entities 1859 if (name == fAmpSymbol) { 1860 handleCharacter('&', fAmpSymbol, content); 1861 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1862 return ; 1863 } else if (name == fLtSymbol) { 1864 handleCharacter('<', fLtSymbol, content); 1865 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1866 return ; 1867 } else if (name == fGtSymbol) { 1868 handleCharacter('>', fGtSymbol, content); 1869 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1870 return ; 1871 } else if (name == fQuotSymbol) { 1872 handleCharacter('"', fQuotSymbol, content); 1873 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1874 return ; 1875 } else if (name == fAposSymbol) { 1876 handleCharacter('\'', fAposSymbol, content); 1877 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1878 return ; 1879 } 1880 1881 //1. if the entity is external and support to external entities is not required 1882 // 2. or entities should not be replaced 1883 //3. or if it is built in entity reference. 1884 boolean isEE = fEntityStore.isExternalEntity(name); 1885 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1886 fScannerState = SCANNER_STATE_REFERENCE; 1887 return ; 1888 } 1889 // start general entity 1890 if (!fEntityStore.isDeclaredEntity(name)) { 1891 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1892 if (!fSupportDTD && fReplaceEntityReferences) { 1893 reportFatalError("EntityNotDeclared", new Object[]{name}); 1894 return; 1895 } 1896 //REVISIT: one more case needs to be included: external PE and standalone is no 1897 if ( fHasExternalDTD && !fStandalone) { 1898 if (fValidation) 1899 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1900 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1901 } else 1902 reportFatalError("EntityNotDeclared", new Object[]{name}); 1903 } 1904 //we are starting the entity even if the entity was not declared 1905 //if that was the case it its taken care in XMLEntityManager.startEntity() 1906 //we immediately call the endEntity. Application gets to know if there was 1907 //any entity that was not declared. 1908 fEntityManager.startEntity(true, name, false); 1909 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1910 //setScannerState(SCANNER_STATE_CONTENT); 1911 //return true ; 1912 } // scanEntityReference() 1913 1914 // utility methods 1915 1916 /** 1917 * Check if the depth exceeds the maxElementDepth limit 1918 * @param elementName name of the current element 1919 */ 1920 void checkDepth(String elementName) { 1921 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1922 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1923 fSecurityManager.debugPrint(fLimitAnalyzer); 1924 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1925 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1926 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1927 "maxElementDepth"}); 1928 } 1929 } 1930 1931 /** 1932 * Calls document handler with a single character resulting from 1933 * built-in entity resolution. 1934 * 1935 * @param c 1936 * @param entity built-in name 1937 * @param XMLStringBuffer append the character to buffer 1938 * 1939 * we really dont need to call this function -- this function is only required when 1940 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1941 * calling this function to hanlde built-in entity reference. 1942 * 1943 */ 1944 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1945 foundBuiltInRefs = true; 1946 content.append(c); 1947 if (fDocumentHandler != null) { 1948 fSingleChar[0] = c; 1949 if (fNotifyBuiltInRefs) { 1950 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1951 } 1952 fTempString.setValues(fSingleChar, 0, 1); 1953 if(!fIsCoalesce){ 1954 fDocumentHandler.characters(fTempString, null); 1955 builtInRefCharacterHandled = true; 1956 } 1957 1958 if (fNotifyBuiltInRefs) { 1959 fDocumentHandler.endGeneralEntity(entity, null); 1960 } 1961 } 1962 } // handleCharacter(char) 1963 1964 // helper methods 1965 1966 /** 1967 * Sets the scanner state. 1968 * 1969 * @param state The new scanner state. 1970 */ 1971 protected final void setScannerState(int state) { 1972 1973 fScannerState = state; 1974 if (DEBUG_SCANNER_STATE) { 1975 System.out.print("### setScannerState: "); 1976 //System.out.print(fScannerState); 1977 System.out.print(getScannerStateName(state)); 1978 System.out.println(); 1979 } 1980 1981 } // setScannerState(int) 1982 1983 1984 /** 1985 * Sets the Driver. 1986 * 1987 * @param Driver The new Driver. 1988 */ 1989 protected final void setDriver(Driver driver) { 1990 fDriver = driver; 1991 if (DEBUG_DISPATCHER) { 1992 System.out.print("%%% setDriver: "); 1993 System.out.print(getDriverName(driver)); 1994 System.out.println(); 1995 } 1996 } 1997 1998 // 1999 // Private methods 2000 // 2001 2002 /** Returns the scanner state name. */ 2003 protected String getScannerStateName(int state) { 2004 2005 switch (state) { 2006 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 2007 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 2008 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 2009 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 2010 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 2011 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 2012 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 2013 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 2014 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 2015 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 2016 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 2017 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 2018 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 2019 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 2020 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 2021 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 2022 } 2023 2024 return "??? ("+state+')'; 2025 2026 } // getScannerStateName(int):String 2027 public String getEntityName(){ 2028 //return the cached name 2029 return fCurrentEntityName; 2030 } 2031 2032 /** Returns the driver name. */ 2033 public String getDriverName(Driver driver) { 2034 2035 if (DEBUG_DISPATCHER) { 2036 if (driver != null) { 2037 String name = driver.getClass().getName(); 2038 int index = name.lastIndexOf('.'); 2039 if (index != -1) { 2040 name = name.substring(index + 1); 2041 index = name.lastIndexOf('$'); 2042 if (index != -1) { 2043 name = name.substring(index + 1); 2044 } 2045 } 2046 return name; 2047 } 2048 } 2049 return "null"; 2050 2051 } // getDriverName():String 2052 2053 /** 2054 * Check the protocol used in the systemId against allowed protocols 2055 * 2056 * @param systemId the Id of the URI 2057 * @param allowedProtocols a list of allowed protocols separated by comma 2058 * @return the name of the protocol if rejected, null otherwise 2059 */ 2060 String checkAccess(String systemId, String allowedProtocols) throws IOException { 2061 String baseSystemId = fEntityScanner.getBaseSystemId(); 2062 String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI); 2063 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 2064 } 2065 2066 // 2067 // Classes 2068 // 2069 2070 /** 2071 * @author Neeraj Bajaj, Sun Microsystems. 2072 */ 2073 protected static final class Element { 2074 2075 // 2076 // Data 2077 // 2078 2079 /** Symbol. */ 2080 public QName qname; 2081 2082 //raw name stored as characters 2083 public char[] fRawname; 2084 2085 /** The next Element entry. */ 2086 public Element next; 2087 2088 // 2089 // Constructors 2090 // 2091 2092 /** 2093 * Constructs a new Element from the given QName and next Element 2094 * reference. 2095 */ 2096 public Element(QName qname, Element next) { 2097 this.qname.setValues(qname); 2098 this.fRawname = qname.rawname.toCharArray(); 2099 this.next = next; 2100 } 2101 2102 } // class Element 2103 2104 /** 2105 * Element stack. 2106 * 2107 * @author Neeraj Bajaj, Sun Microsystems. 2108 */ 2109 protected class ElementStack2 { 2110 2111 // 2112 // Data 2113 // 2114 2115 /** The stack data. */ 2116 protected QName [] fQName = new QName[20]; 2117 2118 //Element depth 2119 protected int fDepth; 2120 //total number of elements 2121 protected int fCount; 2122 //current position 2123 protected int fPosition; 2124 //Mark refers to the position 2125 protected int fMark; 2126 2127 protected int fLastDepth ; 2128 2129 // 2130 // Constructors 2131 // 2132 2133 /** Default constructor. */ 2134 public ElementStack2() { 2135 for (int i = 0; i < fQName.length; i++) { 2136 fQName[i] = new QName(); 2137 } 2138 fMark = fPosition = 1; 2139 } // <init>() 2140 2141 public void resize(){ 2142 /** 2143 * int length = fElements.length; 2144 * Element [] temp = new Element[length * 2]; 2145 * System.arraycopy(fElements, 0, temp, 0, length); 2146 * fElements = temp; 2147 */ 2148 //resize QNames 2149 int oldLength = fQName.length; 2150 QName [] tmp = new QName[oldLength * 2]; 2151 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2152 fQName = tmp; 2153 2154 for (int i = oldLength; i < fQName.length; i++) { 2155 fQName[i] = new QName(); 2156 } 2157 2158 } 2159 2160 2161 // 2162 // Public methods 2163 // 2164 2165 /** Check if the element scanned during the start element 2166 *matches the stored element. 2167 * 2168 *@return true if the match suceeds. 2169 */ 2170 public boolean matchElement(QName element) { 2171 //last depth is the depth when last elemnt was pushed 2172 //if last depth is greater than current depth 2173 if(DEBUG_SKIP_ALGORITHM){ 2174 System.out.println("fLastDepth = " + fLastDepth); 2175 System.out.println("fDepth = " + fDepth); 2176 } 2177 boolean match = false; 2178 if(fLastDepth > fDepth && fDepth <= 2){ 2179 if(DEBUG_SKIP_ALGORITHM){ 2180 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2181 } 2182 if(element.rawname == fQName[fDepth].rawname){ 2183 fAdd = false; 2184 //mark this position 2185 //decrease the depth by 1 as arrays are 0 based 2186 fMark = fDepth - 1; 2187 //we found the match and from next element skipping will start, add 1 2188 fPosition = fMark + 1 ; 2189 match = true; 2190 //Once we get match decrease the count -- this was increased by nextElement() 2191 --fCount; 2192 if(DEBUG_SKIP_ALGORITHM){ 2193 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2194 System.out.println("fMark = " + fMark); 2195 System.out.println("fPosition = " + fPosition); 2196 System.out.println("fDepth = " + fDepth); 2197 System.out.println("fCount = " + fCount); 2198 } 2199 }else{ 2200 fAdd = true; 2201 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2202 } 2203 } 2204 //store the last depth 2205 fLastDepth = fDepth++; 2206 return match; 2207 } // pushElement(QName):QName 2208 2209 /** 2210 * This function doesn't increase depth. The function in this function is 2211 *broken down into two functions for efficiency. <@see>matchElement</see>. 2212 * This function just returns the pointer to the object and its values are set. 2213 * 2214 *@return QName reference to the next element in the list 2215 */ 2216 public QName nextElement() { 2217 2218 //if number of elements becomes equal to the length of array -- stop the skipping 2219 if (fCount == fQName.length) { 2220 fShouldSkip = false; 2221 fAdd = false; 2222 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2223 //xxx: this is not correct, we are returning the last element 2224 //this wont make any difference since flag has been set to 'false' 2225 return fQName[--fCount]; 2226 } 2227 if(DEBUG_SKIP_ALGORITHM){ 2228 System.out.println("fCount = " + fCount); 2229 } 2230 return fQName[fCount++]; 2231 2232 } 2233 2234 /** Note that this function is considerably different than nextElement() 2235 * This function just returns the previously stored elements 2236 */ 2237 public QName getNext(){ 2238 //when position reaches number of elements in the list.. 2239 //set the position back to mark, making it a circular linked list. 2240 if(fPosition == fCount){ 2241 fPosition = fMark; 2242 } 2243 return fQName[fPosition++]; 2244 } 2245 2246 /** returns the current depth 2247 */ 2248 public int popElement(){ 2249 return fDepth--; 2250 } 2251 2252 2253 /** Clears the stack without throwing away existing QName objects. */ 2254 public void clear() { 2255 fLastDepth = 0; 2256 fDepth = 0; 2257 fCount = 0 ; 2258 fPosition = fMark = 1; 2259 } // clear() 2260 2261 } // class ElementStack 2262 2263 /** 2264 * Element stack. This stack operates without synchronization, error 2265 * checking, and it re-uses objects instead of throwing popped items 2266 * away. 2267 * 2268 * @author Andy Clark, IBM 2269 */ 2270 protected class ElementStack { 2271 2272 // 2273 // Data 2274 // 2275 2276 /** The stack data. */ 2277 protected QName[] fElements; 2278 protected int [] fInt = new int[20]; 2279 2280 2281 //Element depth 2282 protected int fDepth; 2283 //total number of elements 2284 protected int fCount; 2285 //current position 2286 protected int fPosition; 2287 //Mark refers to the position 2288 protected int fMark; 2289 2290 protected int fLastDepth ; 2291 2292 // 2293 // Constructors 2294 // 2295 2296 /** Default constructor. */ 2297 public ElementStack() { 2298 fElements = new QName[20]; 2299 for (int i = 0; i < fElements.length; i++) { 2300 fElements[i] = new QName(); 2301 } 2302 } // <init>() 2303 2304 // 2305 // Public methods 2306 // 2307 2308 /** 2309 * Pushes an element on the stack. 2310 * <p> 2311 * <strong>Note:</strong> The QName values are copied into the 2312 * stack. In other words, the caller does <em>not</em> orphan 2313 * the element to the stack. Also, the QName object returned 2314 * is <em>not</em> orphaned to the caller. It should be 2315 * considered read-only. 2316 * 2317 * @param element The element to push onto the stack. 2318 * 2319 * @return Returns the actual QName object that stores the 2320 */ 2321 //XXX: THIS FUNCTION IS NOT USED 2322 public QName pushElement(QName element) { 2323 if (fDepth == fElements.length) { 2324 QName[] array = new QName[fElements.length * 2]; 2325 System.arraycopy(fElements, 0, array, 0, fDepth); 2326 fElements = array; 2327 for (int i = fDepth; i < fElements.length; i++) { 2328 fElements[i] = new QName(); 2329 } 2330 } 2331 fElements[fDepth].setValues(element); 2332 return fElements[fDepth++]; 2333 } // pushElement(QName):QName 2334 2335 2336 /** Note that this function is considerably different than nextElement() 2337 * This function just returns the previously stored elements 2338 */ 2339 public QName getNext(){ 2340 //when position reaches number of elements in the list.. 2341 //set the position back to mark, making it a circular linked list. 2342 if(fPosition == fCount){ 2343 fPosition = fMark; 2344 } 2345 //store the position of last opened tag at particular depth 2346 //fInt[++fDepth] = fPosition; 2347 if(DEBUG_SKIP_ALGORITHM){ 2348 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2349 } 2350 //return fElements[fPosition++]; 2351 return fElements[fPosition]; 2352 } 2353 2354 /** This function should be called only when element was skipped sucessfully. 2355 * 1. Increase the depth - because element was sucessfully skipped. 2356 *2. Store the position of the element token in array "last opened tag" at depth. 2357 *3. increase the position counter so as to point to the next element in the array 2358 */ 2359 public void push(){ 2360 2361 fInt[++fDepth] = fPosition++; 2362 } 2363 2364 /** Check if the element scanned during the start element 2365 *matches the stored element. 2366 * 2367 *@return true if the match suceeds. 2368 */ 2369 public boolean matchElement(QName element) { 2370 //last depth is the depth when last elemnt was pushed 2371 //if last depth is greater than current depth 2372 //if(DEBUG_SKIP_ALGORITHM){ 2373 // System.out.println("Check if the element " + element.rawname + " matches"); 2374 // System.out.println("fLastDepth = " + fLastDepth); 2375 // System.out.println("fDepth = " + fDepth); 2376 //} 2377 boolean match = false; 2378 if(fLastDepth > fDepth && fDepth <= 3){ 2379 if(DEBUG_SKIP_ALGORITHM){ 2380 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2381 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2382 } 2383 if(element.rawname == fElements[fDepth - 1].rawname){ 2384 fAdd = false; 2385 //mark this position 2386 //decrease the depth by 1 as arrays are 0 based 2387 fMark = fDepth - 1; 2388 //we found the match 2389 fPosition = fMark; 2390 match = true; 2391 //Once we get match decrease the count -- this was increased by nextElement() 2392 --fCount; 2393 if(DEBUG_SKIP_ALGORITHM){ 2394 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2395 System.out.println("fMark = " + fMark); 2396 System.out.println("fPosition = " + fPosition); 2397 System.out.println("fDepth = " + fDepth); 2398 System.out.println("fCount = " + fCount); 2399 System.out.println("---------MATCH SUCEEDED-----------------"); 2400 System.out.println(""); 2401 } 2402 }else{ 2403 fAdd = true; 2404 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2405 } 2406 } 2407 //store the position for the current depth 2408 //when we are adding the elements, when skipping 2409 //starts even then this should be tracked ie. when 2410 //calling getNext() 2411 if(match){ 2412 //from next element skipping will start, add 1 2413 fInt[fDepth] = fPosition++; 2414 } else{ 2415 if(DEBUG_SKIP_ALGORITHM){ 2416 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2417 } 2418 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2419 fInt[fDepth] = fCount - 1; 2420 } 2421 2422 //if number of elements becomes equal to the length of array -- stop the skipping 2423 //xxx: should we do "fCount == fInt.length" 2424 if (fCount == fElements.length) { 2425 fSkip = false; 2426 fAdd = false; 2427 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2428 reposition(); 2429 if(DEBUG_SKIP_ALGORITHM){ 2430 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2431 System.out.println("REPOSITIONING THE STACK"); 2432 System.out.println("-----------SKIPPING STOPPED----------"); 2433 System.out.println(""); 2434 } 2435 return false; 2436 } 2437 if(DEBUG_SKIP_ALGORITHM){ 2438 if(match){ 2439 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2440 }else{ 2441 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2442 } 2443 } 2444 //store the last depth 2445 fLastDepth = fDepth; 2446 return match; 2447 } // matchElement(QName):QName 2448 2449 2450 /** 2451 * Returns the next element on the stack. 2452 * 2453 * @return Returns the actual QName object. Callee should 2454 * use this object to store the details of next element encountered. 2455 */ 2456 public QName nextElement() { 2457 if(fSkip){ 2458 fDepth++; 2459 //boundary checks are done in matchElement() 2460 return fElements[fCount++]; 2461 } else if (fDepth == fElements.length) { 2462 QName[] array = new QName[fElements.length * 2]; 2463 System.arraycopy(fElements, 0, array, 0, fDepth); 2464 fElements = array; 2465 for (int i = fDepth; i < fElements.length; i++) { 2466 fElements[i] = new QName(); 2467 } 2468 } 2469 2470 return fElements[fDepth++]; 2471 2472 } // pushElement(QName):QName 2473 2474 2475 /** 2476 * Pops an element off of the stack by setting the values of 2477 * the specified QName. 2478 * <p> 2479 * <strong>Note:</strong> The object returned is <em>not</em> 2480 * orphaned to the caller. Therefore, the caller should consider 2481 * the object to be read-only. 2482 */ 2483 public QName popElement() { 2484 //return the same object that was pushed -- this would avoid 2485 //setting the values for every end element. 2486 //STRONG: this object is read only -- this object reference shouldn't be stored. 2487 if(fSkip || fAdd ){ 2488 if(DEBUG_SKIP_ALGORITHM){ 2489 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2490 System.out.println(""); 2491 } 2492 return fElements[fInt[fDepth--]]; 2493 } else{ 2494 if(DEBUG_SKIP_ALGORITHM){ 2495 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2496 } 2497 return fElements[--fDepth] ; 2498 } 2499 //element.setValues(fElements[--fDepth]); 2500 } // popElement(QName) 2501 2502 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2503 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2504 *as per the depth. 2505 */ 2506 public void reposition(){ 2507 for( int i = 2 ; i <= fDepth ; i++){ 2508 fElements[i-1] = fElements[fInt[i]]; 2509 } 2510 if(DEBUG_SKIP_ALGORITHM){ 2511 for( int i = 0 ; i < fDepth ; i++){ 2512 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2513 } 2514 } 2515 } 2516 2517 /** Clears the stack without throwing away existing QName objects. */ 2518 public void clear() { 2519 fDepth = 0; 2520 fLastDepth = 0; 2521 fCount = 0 ; 2522 fPosition = fMark = 1; 2523 2524 } // clear() 2525 2526 /** 2527 * This function is as a result of optimization done for endElement -- 2528 * we dont need to set the value for every end element encouterd. 2529 * For Well formedness checks we can have the same QName object that was pushed. 2530 * the values will be set only if application need to know about the endElement 2531 * -- neeraj.bajaj@sun.com 2532 */ 2533 2534 public QName getLastPoppedElement(){ 2535 return fElements[fDepth]; 2536 } 2537 } // class ElementStack 2538 2539 /** 2540 * Drives the parser to the next state/event on the input. Parser is guaranteed 2541 * to stop at the next state/event. 2542 * 2543 * Internally XML document is divided into several states. Each state represents 2544 * a sections of XML document. When this functions returns normally, it has read 2545 * the section of XML document and returns the state corresponding to section of 2546 * document which has been read. For optimizations, a particular driver 2547 * can read ahead of the section of document (state returned) just read and 2548 * can maintain a different internal state. 2549 * 2550 * 2551 * @author Neeraj Bajaj, Sun Microsystems 2552 */ 2553 protected interface Driver { 2554 2555 2556 /** 2557 * Drives the parser to the next state/event on the input. Parser is guaranteed 2558 * to stop at the next state/event. 2559 * 2560 * Internally XML document is divided into several states. Each state represents 2561 * a sections of XML document. When this functions returns normally, it has read 2562 * the section of XML document and returns the state corresponding to section of 2563 * document which has been read. For optimizations, a particular driver 2564 * can read ahead of the section of document (state returned) just read and 2565 * can maintain a different internal state. 2566 * 2567 * @return state representing the section of document just read. 2568 * 2569 * @throws IOException Thrown on i/o error. 2570 * @throws XNIException Thrown on parse error. 2571 */ 2572 2573 public int next() throws IOException, XNIException; 2574 2575 } // interface Driver 2576 2577 /** 2578 * Driver to handle content scanning. This driver is capable of reading 2579 * the fragment of XML document. When it has finished reading fragment 2580 * of XML documents, it can pass the job of reading to another driver. 2581 * 2582 * This class has been modified as per the new design which is more suited to 2583 * efficiently build pull parser. Lot of performance improvements have been done and 2584 * the code has been added to support stax functionality/features. 2585 * 2586 * @author Neeraj Bajaj, Sun Microsystems 2587 * 2588 * 2589 * @author Andy Clark, IBM 2590 * @author Eric Ye, IBM 2591 */ 2592 protected class FragmentContentDriver 2593 implements Driver { 2594 2595 // 2596 // Driver methods 2597 // 2598 2599 /** 2600 * decides the appropriate state of the parser 2601 */ 2602 private void startOfMarkup() throws IOException { 2603 fMarkupDepth++; 2604 final int ch = fEntityScanner.peekChar(); 2605 if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { 2606 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2607 } else { 2608 switch(ch){ 2609 case '?' :{ 2610 setScannerState(SCANNER_STATE_PI); 2611 fEntityScanner.skipChar(ch); 2612 break; 2613 } 2614 case '!' :{ 2615 fEntityScanner.skipChar(ch); 2616 if (fEntityScanner.skipChar('-')) { 2617 if (!fEntityScanner.skipChar('-')) { 2618 reportFatalError("InvalidCommentStart", 2619 null); 2620 } 2621 setScannerState(SCANNER_STATE_COMMENT); 2622 } else if (fEntityScanner.skipString(cdata)) { 2623 setScannerState(SCANNER_STATE_CDATA ); 2624 } else if (!scanForDoctypeHook()) { 2625 reportFatalError("MarkupNotRecognizedInContent", 2626 null); 2627 } 2628 break; 2629 } 2630 case '/' :{ 2631 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2632 fEntityScanner.skipChar(ch); 2633 break; 2634 } 2635 default :{ 2636 reportFatalError("MarkupNotRecognizedInContent", null); 2637 } 2638 } 2639 } 2640 2641 }//startOfMarkup 2642 2643 private void startOfContent() throws IOException { 2644 if (fEntityScanner.skipChar('<')) { 2645 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2646 } else if (fEntityScanner.skipChar('&')) { 2647 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2648 } else { 2649 //element content is there.. 2650 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2651 } 2652 }//startOfContent 2653 2654 2655 /** 2656 * 2657 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2658 * At any point of time when in doubt over the current state of the parser, the state should be 2659 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2660 * the parser to one of its sub state. 2661 * sub states are defined in the parser on the basis of different XML component like 2662 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2663 * These sub states help the parser to have fine control over the parsing. These are the 2664 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2665 * decided if paresr needs to stop at next milepost ?? 2666 * 2667 */ 2668 public void decideSubState() throws IOException { 2669 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2670 2671 switch (fScannerState) { 2672 2673 case SCANNER_STATE_CONTENT: { 2674 startOfContent() ; 2675 break; 2676 } 2677 2678 case SCANNER_STATE_START_OF_MARKUP: { 2679 startOfMarkup() ; 2680 break; 2681 } 2682 } 2683 } 2684 }//decideSubState 2685 2686 /** 2687 * Drives the parser to the next state/event on the input. Parser is guaranteed 2688 * to stop at the next state/event. Internally XML document 2689 * is divided into several states. Each state represents a sections of XML 2690 * document. When this functions returns normally, it has read the section 2691 * of XML document and returns the state corresponding to section of 2692 * document which has been read. For optimizations, a particular driver 2693 * can read ahead of the section of document (state returned) just read and 2694 * can maintain a different internal state. 2695 * 2696 * State returned corresponds to Stax states. 2697 * 2698 * @return state representing the section of document just read. 2699 * 2700 * @throws IOException Thrown on i/o error. 2701 * @throws XNIException Thrown on parse error. 2702 */ 2703 2704 public int next() throws IOException, XNIException { 2705 while (true) { 2706 try { 2707 if(DEBUG_NEXT){ 2708 System.out.println("NOW IN FragmentContentDriver"); 2709 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2710 } 2711 2712 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2713 //decideSubState. 2714 2715 switch (fScannerState) { 2716 case SCANNER_STATE_CONTENT: { 2717 final int ch = fEntityScanner.peekChar(); 2718 if (ch == '<') { 2719 fEntityScanner.scanChar(); 2720 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2721 } else if (ch == '&') { 2722 fEntityScanner.scanChar(); 2723 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2724 break; 2725 } else { 2726 //element content is there.. 2727 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2728 break; 2729 } 2730 } 2731 2732 case SCANNER_STATE_START_OF_MARKUP: { 2733 startOfMarkup(); 2734 break; 2735 }//case: SCANNER_STATE_START_OF_MARKUP 2736 2737 }//end of switch 2738 //decideSubState() ; 2739 2740 //do some special handling if isCoalesce is set to true. 2741 if(fIsCoalesce){ 2742 fUsebuffer = true ; 2743 //if the last section was character data 2744 if(fLastSectionWasCharacterData){ 2745 2746 //if we dont encounter any CDATA or ENTITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2747 //return the last scanned charactrer data. 2748 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2749 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2750 fLastSectionWasCharacterData = false; 2751 return XMLEvent.CHARACTERS; 2752 } 2753 }//if last section was CDATA or ENTITY REFERENCE 2754 //xxx: there might be another entity reference or CDATA after this 2755 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2756 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2757 //and current state is not SCANNER_STATE_CHARACTER_DATA 2758 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2759 //this means there is nothing more to be coalesced. 2760 //return the CHARACTERS event. 2761 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2762 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2763 2764 fLastSectionWasCData = false; 2765 fLastSectionWasEntityReference = false; 2766 return XMLEvent.CHARACTERS; 2767 } 2768 } 2769 } 2770 2771 2772 if(DEBUG_NEXT){ 2773 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2774 } 2775 2776 switch(fScannerState){ 2777 2778 case XMLEvent.START_DOCUMENT : 2779 return XMLEvent.START_DOCUMENT; 2780 2781 case SCANNER_STATE_START_ELEMENT_TAG :{ 2782 2783 //xxx this function returns true when element is empty.. can be linked to end element event. 2784 //returns true if the element is empty 2785 fEmptyElement = scanStartElement() ; 2786 //if the element is empty the next event is "end element" 2787 if(fEmptyElement){ 2788 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2789 }else{ 2790 //set the next possible state 2791 setScannerState(SCANNER_STATE_CONTENT); 2792 } 2793 return XMLEvent.START_ELEMENT ; 2794 } 2795 2796 case SCANNER_STATE_CHARACTER_DATA: { 2797 if(DEBUG_COALESCE){ 2798 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2799 System.out.println("fIsCoalesce = " + fIsCoalesce); 2800 } 2801 //if last section was either entity reference or cdata or character data we should be using buffer 2802 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2803 2804 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2805 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2806 fLastSectionWasEntityReference = false; 2807 fLastSectionWasCData = false; 2808 fLastSectionWasCharacterData = true ; 2809 fUsebuffer = true; 2810 }else{ 2811 //clear the buffer 2812 fContentBuffer.clear(); 2813 } 2814 2815 //set the fTempString length to 0 before passing it on to scanContent 2816 //scanContent sets the correct co-ordinates as per the content read 2817 fTempString.length = 0; 2818 int c = fEntityScanner.scanContent(fTempString); 2819 if(DEBUG){ 2820 System.out.println("fTempString = " + fTempString); 2821 } 2822 if(fEntityScanner.skipChar('<')){ 2823 //check if we have reached end of element 2824 if(fEntityScanner.skipChar('/')){ 2825 //increase the mark up depth 2826 fMarkupDepth++; 2827 fLastSectionWasCharacterData = false; 2828 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2829 //check if its start of new element 2830 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2831 fMarkupDepth++; 2832 fLastSectionWasCharacterData = false; 2833 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2834 }else{ 2835 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2836 //there can be cdata ahead if coalesce is true we should call again 2837 if(fIsCoalesce){ 2838 fUsebuffer = true; 2839 fLastSectionWasCharacterData = true; 2840 fContentBuffer.append(fTempString); 2841 fTempString.length = 0; 2842 continue; 2843 } 2844 } 2845 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2846 if(fUsebuffer){ 2847 fContentBuffer.append(fTempString); 2848 fTempString.length = 0; 2849 } 2850 if(DEBUG){ 2851 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2852 } 2853 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2854 if(DEBUG)System.out.println("Return SPACE EVENT"); 2855 return XMLEvent.SPACE; 2856 }else 2857 return XMLEvent.CHARACTERS; 2858 2859 } else{ 2860 fUsebuffer = true ; 2861 if(DEBUG){ 2862 System.out.println("fContentBuffer = " + fContentBuffer); 2863 System.out.println("fTempString = " + fTempString); 2864 } 2865 fContentBuffer.append(fTempString); 2866 fTempString.length = 0; 2867 } 2868 if (c == '\r') { 2869 if(DEBUG){ 2870 System.out.println("'\r' character found"); 2871 } 2872 // happens when there is the character reference 2873 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2874 fEntityScanner.scanChar(); 2875 fUsebuffer = true; 2876 fContentBuffer.append((char)c); 2877 c = -1 ; 2878 } else if (c == ']') { 2879 //fStringBuffer.clear(); 2880 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2881 fUsebuffer = true; 2882 fContentBuffer.append((char)fEntityScanner.scanChar()); 2883 // remember where we are in case we get an endEntity before we 2884 // could flush the buffer out - this happens when we're parsing an 2885 // entity which ends with a ] 2886 fInScanContent = true; 2887 2888 // We work on a single character basis to handle cases such as: 2889 // ']]]>' which we might otherwise miss. 2890 // 2891 if (fEntityScanner.skipChar(']')) { 2892 fContentBuffer.append(']'); 2893 while (fEntityScanner.skipChar(']')) { 2894 fContentBuffer.append(']'); 2895 } 2896 if (fEntityScanner.skipChar('>')) { 2897 reportFatalError("CDEndInContent", null); 2898 } 2899 } 2900 c = -1 ; 2901 fInScanContent = false; 2902 } 2903 2904 do{ 2905 //xxx: we should be using only one buffer.. 2906 // we need not to grow the buffer only when isCoalesce() is not true; 2907 2908 if (c == '<') { 2909 fEntityScanner.scanChar(); 2910 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2911 break; 2912 }//xxx what should be the behavior if entity reference is present in the content ? 2913 else if (c == '&') { 2914 fEntityScanner.scanChar(); 2915 setScannerState(SCANNER_STATE_REFERENCE); 2916 break; 2917 }///xxx since this part is also characters, it should be merged... 2918 else if (c != -1 && isInvalidLiteral(c)) { 2919 if (XMLChar.isHighSurrogate(c)) { 2920 // special case: surrogates 2921 scanSurrogates(fContentBuffer) ; 2922 setScannerState(SCANNER_STATE_CONTENT); 2923 } else { 2924 reportFatalError("InvalidCharInContent", 2925 new Object[] { 2926 Integer.toString(c, 16)}); 2927 fEntityScanner.scanChar(); 2928 } 2929 break; 2930 } 2931 //xxx: scanContent also gives character callback. 2932 c = scanContent(fContentBuffer) ; 2933 //we should not be iterating again if fIsCoalesce is not set to true 2934 2935 if(!fIsCoalesce){ 2936 setScannerState(SCANNER_STATE_CONTENT); 2937 break; 2938 } 2939 2940 }while(true); 2941 2942 //if (fDocumentHandler != null) { 2943 // fDocumentHandler.characters(fContentBuffer, null); 2944 //} 2945 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2946 //if fIsCoalesce is true there might be more data so call fDriver.next() 2947 if(fIsCoalesce){ 2948 fLastSectionWasCharacterData = true ; 2949 continue; 2950 }else{ 2951 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2952 if(DEBUG)System.out.println("Return SPACE EVENT"); 2953 return XMLEvent.SPACE; 2954 } else 2955 return XMLEvent.CHARACTERS ; 2956 } 2957 } 2958 2959 case SCANNER_STATE_END_ELEMENT_TAG :{ 2960 if(fEmptyElement){ 2961 //set it back to false. 2962 fEmptyElement = false; 2963 setScannerState(SCANNER_STATE_CONTENT); 2964 //check the case when there is comment after single element document 2965 //<foo/> and some comment after this 2966 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2967 2968 } else if(scanEndElement() == 0) { 2969 //It is last element of the document 2970 if (elementDepthIsZeroHook()) { 2971 //if element depth is zero , it indicates the end of the document 2972 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2973 //xxx understand this point once again.. 2974 return XMLEvent.END_ELEMENT ; 2975 } 2976 2977 } 2978 setScannerState(SCANNER_STATE_CONTENT); 2979 return XMLEvent.END_ELEMENT ; 2980 } 2981 2982 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2983 scanComment(); 2984 setScannerState(SCANNER_STATE_CONTENT); 2985 return XMLEvent.COMMENT; 2986 //break; 2987 } 2988 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2989 //clear the buffer first 2990 fContentBuffer.clear() ; 2991 //xxx: which buffer should be passed. Ideally we shouldn't have 2992 //more than two buffers -- 2993 //xxx: where should we add the switch for buffering. 2994 scanPI(fContentBuffer); 2995 setScannerState(SCANNER_STATE_CONTENT); 2996 return XMLEvent.PROCESSING_INSTRUCTION; 2997 //break; 2998 } 2999 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 3000 //xxx: What if CDATA is the first event 3001 //<foo><![CDATA[hello<><>]]>append</foo> 3002 3003 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 3004 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3005 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3006 fLastSectionWasCData = true ; 3007 fLastSectionWasEntityReference = false; 3008 fLastSectionWasCharacterData = false; 3009 }//if we dont need to coalesce clear the buffer 3010 else{ 3011 fContentBuffer.clear(); 3012 } 3013 fUsebuffer = true; 3014 //CDATA section is completely read in all the case. 3015 scanCDATASection(fContentBuffer , true); 3016 setScannerState(SCANNER_STATE_CONTENT); 3017 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 3018 //and just call fDispatche.next(). Since we have set the scanner state to 3019 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 3020 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 3021 //2. Check if application has set for reporting CDATA event 3022 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 3023 //return the cdata event as characters. 3024 if(fIsCoalesce){ 3025 fLastSectionWasCData = true ; 3026 //there might be more data to coalesce. 3027 continue; 3028 }else if(fReportCdataEvent){ 3029 return XMLEvent.CDATA; 3030 } else{ 3031 return XMLEvent.CHARACTERS; 3032 } 3033 } 3034 3035 case SCANNER_STATE_REFERENCE :{ 3036 fMarkupDepth++; 3037 foundBuiltInRefs = false; 3038 3039 //we should not clear the buffer only when the last state was either CDATA or 3040 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3041 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3042 //fLastSectionWasEntityReference or fLastSectionWasCData are only 3043 //used when fIsCoalesce is set to true. 3044 fLastSectionWasEntityReference = true ; 3045 fLastSectionWasCData = false; 3046 fLastSectionWasCharacterData = false; 3047 }//if we dont need to coalesce clear the buffer 3048 else{ 3049 fContentBuffer.clear(); 3050 } 3051 fUsebuffer = true ; 3052 //take care of character reference 3053 if (fEntityScanner.skipChar('#')) { 3054 scanCharReferenceValue(fContentBuffer, null); 3055 fMarkupDepth--; 3056 if(!fIsCoalesce){ 3057 setScannerState(SCANNER_STATE_CONTENT); 3058 return XMLEvent.CHARACTERS; 3059 } 3060 } else { 3061 // this function also starts new entity 3062 scanEntityReference(fContentBuffer); 3063 //if there was built-in entity reference & coalesce is not true 3064 //return CHARACTERS 3065 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3066 setScannerState(SCANNER_STATE_CONTENT); 3067 if (builtInRefCharacterHandled) { 3068 builtInRefCharacterHandled = false; 3069 return XMLEvent.ENTITY_REFERENCE; 3070 } else { 3071 return XMLEvent.CHARACTERS; 3072 } 3073 } 3074 3075 //if there was a text declaration, call next() it will be taken care. 3076 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3077 fLastSectionWasEntityReference = true ; 3078 continue; 3079 } 3080 3081 if(fScannerState == SCANNER_STATE_REFERENCE){ 3082 setScannerState(SCANNER_STATE_CONTENT); 3083 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3084 // Skip the entity reference, we don't care 3085 continue; 3086 } 3087 return XMLEvent.ENTITY_REFERENCE; 3088 } 3089 } 3090 //Wether it was character reference, entity reference or built-in entity 3091 //set the next possible state to SCANNER_STATE_CONTENT 3092 setScannerState(SCANNER_STATE_CONTENT); 3093 fLastSectionWasEntityReference = true ; 3094 continue; 3095 } 3096 3097 case SCANNER_STATE_TEXT_DECL: { 3098 // scan text decl 3099 if (fEntityScanner.skipString("<?xml")) { 3100 fMarkupDepth++; 3101 // NOTE: special case where entity starts with a PI 3102 // whose name starts with "xml" (e.g. "xmlfoo") 3103 if (isValidNameChar(fEntityScanner.peekChar())) { 3104 fStringBuffer.clear(); 3105 fStringBuffer.append("xml"); 3106 3107 if (fNamespaces) { 3108 while (isValidNCName(fEntityScanner.peekChar())) { 3109 fStringBuffer.append((char)fEntityScanner.scanChar()); 3110 } 3111 } else { 3112 while (isValidNameChar(fEntityScanner.peekChar())) { 3113 fStringBuffer.append((char)fEntityScanner.scanChar()); 3114 } 3115 } 3116 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3117 fContentBuffer.clear(); 3118 scanPIData(target, fContentBuffer); 3119 } 3120 3121 // standard text declaration 3122 else { 3123 //xxx: this function gives callback 3124 scanXMLDeclOrTextDecl(true); 3125 } 3126 } 3127 // now that we've straightened out the readers, we can read in chunks: 3128 fEntityManager.fCurrentEntity.mayReadChunks = true; 3129 setScannerState(SCANNER_STATE_CONTENT); 3130 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3131 //it seems we have to careful when to allow function issue a callback 3132 //and when to allow adapter issue a callback. 3133 continue; 3134 } 3135 3136 3137 case SCANNER_STATE_ROOT_ELEMENT: { 3138 if (scanRootElementHook()) { 3139 fEmptyElement = true; 3140 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3141 return XMLEvent.START_ELEMENT; 3142 } 3143 setScannerState(SCANNER_STATE_CONTENT); 3144 return XMLEvent.START_ELEMENT ; 3145 } 3146 case SCANNER_STATE_CHAR_REFERENCE : { 3147 fContentBuffer.clear(); 3148 scanCharReferenceValue(fContentBuffer, null); 3149 fMarkupDepth--; 3150 setScannerState(SCANNER_STATE_CONTENT); 3151 return XMLEvent.CHARACTERS; 3152 } 3153 default: 3154 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3155 3156 }//switch 3157 } 3158 // premature end of file 3159 catch (EOFException e) { 3160 endOfFileHook(e); 3161 return -1; 3162 } 3163 } //while loop 3164 }//next 3165 3166 // 3167 // Protected methods 3168 // 3169 3170 // hooks 3171 3172 // NOTE: These hook methods are added so that the full document 3173 // scanner can share the majority of code with this class. 3174 3175 /** 3176 * Scan for DOCTYPE hook. This method is a hook for subclasses 3177 * to add code to handle scanning for a the "DOCTYPE" string 3178 * after the string "<!" has been scanned. 3179 * 3180 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3181 * was not scanned. 3182 */ 3183 protected boolean scanForDoctypeHook() 3184 throws IOException, XNIException { 3185 return false; 3186 } // scanForDoctypeHook():boolean 3187 3188 /** 3189 * Element depth iz zero. This methos is a hook for subclasses 3190 * to add code to handle when the element depth hits zero. When 3191 * scanning a document fragment, an element depth of zero is 3192 * normal. However, when scanning a full XML document, the 3193 * scanner must handle the trailing miscellanous section of 3194 * the document after the end of the document's root element. 3195 * 3196 * @return True if the caller should stop and return true which 3197 * allows the scanner to switch to a new scanning 3198 * driver. A return value of false indicates that 3199 * the content driver should continue as normal. 3200 */ 3201 protected boolean elementDepthIsZeroHook() 3202 throws IOException, XNIException { 3203 return false; 3204 } // elementDepthIsZeroHook():boolean 3205 3206 /** 3207 * Scan for root element hook. This method is a hook for 3208 * subclasses to add code that handles scanning for the root 3209 * element. When scanning a document fragment, there is no 3210 * "root" element. However, when scanning a full XML document, 3211 * the scanner must handle the root element specially. 3212 * 3213 * @return True if the caller should stop and return true which 3214 * allows the scanner to switch to a new scanning 3215 * driver. A return value of false indicates that 3216 * the content driver should continue as normal. 3217 */ 3218 protected boolean scanRootElementHook() 3219 throws IOException, XNIException { 3220 return false; 3221 } // scanRootElementHook():boolean 3222 3223 /** 3224 * End of file hook. This method is a hook for subclasses to 3225 * add code that handles the end of file. The end of file in 3226 * a document fragment is OK if the markup depth is zero. 3227 * However, when scanning a full XML document, an end of file 3228 * is always premature. 3229 */ 3230 protected void endOfFileHook(EOFException e) 3231 throws IOException, XNIException { 3232 3233 // NOTE: An end of file is only only an error if we were 3234 // in the middle of scanning some markup. -Ac 3235 if (fMarkupDepth != 0) { 3236 reportFatalError("PrematureEOF", null); 3237 } 3238 3239 } // endOfFileHook() 3240 3241 } // class FragmentContentDriver 3242 3243 static void pr(String str) { 3244 System.out.println(str) ; 3245 } 3246 3247 protected boolean fUsebuffer ; 3248 3249 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3250 * maintained for attributes. 3251 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3252 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3253 * XMLString. 3254 * 3255 * @return XMLString XMLString used to store an attribute value. 3256 */ 3257 3258 protected XMLString getString(){ 3259 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3260 return attributeValueCache.get(fAttributeCacheUsedCount++); 3261 } else{ 3262 XMLString str = new XMLString(); 3263 fAttributeCacheUsedCount++; 3264 attributeValueCache.add(str); 3265 return str; 3266 } 3267 } 3268 3269 /** 3270 * Implements XMLBufferListener interface. 3271 */ 3272 3273 public void refresh(){ 3274 refresh(0); 3275 } 3276 3277 /** 3278 * receives callbacks from {@link XMLEntityReader } when buffer 3279 * is being changed. 3280 * @param refreshPosition 3281 */ 3282 public void refresh(int refreshPosition){ 3283 //If you are reading attributes and you got a callback 3284 //cache available attributes. 3285 if(fReadingAttributes){ 3286 fAttributes.refresh(); 3287 } 3288 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3289 //since fTempString directly matches to the underlying main buffer 3290 //store the data into buffer 3291 fContentBuffer.append(fTempString); 3292 //clear the XMLString so that data can't be added again. 3293 fTempString.length = 0; 3294 fUsebuffer = true; 3295 } 3296 } 3297 3298 } // class XMLDocumentFragmentScannerImpl