1 /* 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.xml.internal.stream.XMLBufferListener; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import com.sun.xml.internal.stream.XMLInputFactoryImpl; 27 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 28 29 import java.io.EOFException; 30 import java.io.IOException; 31 import javax.xml.stream.XMLInputFactory; 32 import javax.xml.stream.events.XMLEvent; 33 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 34 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 35 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 36 import com.sun.org.apache.xerces.internal.util.XMLChar; 37 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 38 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 39 import com.sun.org.apache.xerces.internal.xni.QName; 40 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 41 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 42 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 43 import com.sun.org.apache.xerces.internal.xni.XMLString; 44 import com.sun.org.apache.xerces.internal.xni.XNIException; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 46 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 47 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 50 import com.sun.org.apache.xerces.internal.xni.Augmentations; 51 import com.sun.org.apache.xerces.internal.impl.Constants; 52 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 53 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 54 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 55 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 56 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 57 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 58 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.State; 59 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 60 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 61 import javax.xml.XMLConstants; 62 import javax.xml.stream.XMLStreamConstants; 63 import javax.xml.stream.events.XMLEvent; 64 65 /** 66 * 67 * This class is responsible for scanning the structure and content 68 * of document fragments. 69 * 70 * This class has been modified as per the new design which is more suited to 71 * efficiently build pull parser. Lot of improvements have been done and 72 * the code has been added to support stax functionality/features. 73 * 74 * @author Neeraj Bajaj SUN Microsystems 75 * @author K.Venugopal SUN Microsystems 76 * @author Glenn Marcy, IBM 77 * @author Andy Clark, IBM 78 * @author Arnaud Le Hors, IBM 79 * @author Eric Ye, IBM 80 * @author Sunitha Reddy, SUN Microsystems 81 * 82 */ 83 public class XMLDocumentFragmentScannerImpl 84 extends XMLScanner 85 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 86 87 // 88 // Constants 89 // 90 91 protected int fElementAttributeLimit; 92 93 /** External subset resolver. **/ 94 protected ExternalSubsetResolver fExternalSubsetResolver; 95 96 // scanner states 97 98 //XXX this should be divided into more states. 99 /** Scanner state: start of markup. */ 100 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 101 102 /** Scanner state: content. */ 103 protected static final int SCANNER_STATE_CONTENT = 22; 104 105 /** Scanner state: processing instruction. */ 106 protected static final int SCANNER_STATE_PI = 23; 107 108 /** Scanner state: DOCTYPE. */ 109 protected static final int SCANNER_STATE_DOCTYPE = 24; 110 111 /** Scanner state: XML Declaration */ 112 protected static final int SCANNER_STATE_XML_DECL = 25; 113 114 /** Scanner state: root element. */ 115 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 116 117 /** Scanner state: comment. */ 118 protected static final int SCANNER_STATE_COMMENT = 27; 119 120 /** Scanner state: reference. */ 121 protected static final int SCANNER_STATE_REFERENCE = 28; 122 123 // <book type="hard"> reading attribute name 'type' 124 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 125 126 // <book type="hard"> //reading attribute value. 127 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 128 129 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 130 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 131 132 /** Scanner state: end of input. */ 133 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 134 135 /** Scanner state: terminated. */ 136 protected static final int SCANNER_STATE_TERMINATED = 34; 137 138 /** Scanner state: CDATA section. */ 139 protected static final int SCANNER_STATE_CDATA = 35; 140 141 /** Scanner state: Text declaration. */ 142 protected static final int SCANNER_STATE_TEXT_DECL = 36; 143 144 /** Scanner state: Text declaration. */ 145 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 146 147 //<book type="hard">foo</book> 148 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 149 150 //<book type="hard">foo</book> reading </book> 151 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 152 153 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 154 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 155 156 // feature identifiers 157 158 159 /** Feature identifier: notify built-in refereces. */ 160 protected static final String NOTIFY_BUILTIN_REFS = 161 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 162 163 /** Property identifier: entity resolver. */ 164 protected static final String ENTITY_RESOLVER = 165 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 166 167 /** Feature identifier: standard uri conformant */ 168 protected static final String STANDARD_URI_CONFORMANT = 169 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 170 171 /** Property identifier: Security property manager. */ 172 private static final String XML_SECURITY_PROPERTY_MANAGER = 173 Constants.XML_SECURITY_PROPERTY_MANAGER; 174 175 /** access external dtd: file protocol 176 * For DOM/SAX, the secure feature is set to true by default 177 */ 178 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 179 180 // recognized features and properties 181 182 /** Recognized features. */ 183 private static final String[] RECOGNIZED_FEATURES = { 184 NAMESPACES, 185 VALIDATION, 186 NOTIFY_BUILTIN_REFS, 187 NOTIFY_CHAR_REFS, 188 Constants.STAX_REPORT_CDATA_EVENT 189 }; 190 191 /** Feature defaults. */ 192 private static final Boolean[] FEATURE_DEFAULTS = { 193 Boolean.TRUE, 194 null, 195 Boolean.FALSE, 196 Boolean.FALSE, 197 Boolean.TRUE 198 }; 199 200 /** Recognized properties. */ 201 private static final String[] RECOGNIZED_PROPERTIES = { 202 SYMBOL_TABLE, 203 ERROR_REPORTER, 204 ENTITY_MANAGER, 205 XML_SECURITY_PROPERTY_MANAGER 206 }; 207 208 /** Property defaults. */ 209 private static final Object[] PROPERTY_DEFAULTS = { 210 null, 211 null, 212 null, 213 EXTERNAL_ACCESS_DEFAULT 214 }; 215 216 private static final char [] cdata = {'[','C','D','A','T','A','['}; 217 static final char [] xmlDecl = {'<','?','x','m','l'}; 218 private static final char [] endTag = {'<','/'}; 219 // debugging 220 221 /** Debug scanner state. */ 222 private static final boolean DEBUG_SCANNER_STATE = false; 223 224 /** Debug driver. */ 225 private static final boolean DEBUG_DISPATCHER = false; 226 227 /** Debug content driver scanning. */ 228 protected static final boolean DEBUG_START_END_ELEMENT = false; 229 230 231 /** Debug driver next */ 232 protected static final boolean DEBUG_NEXT = false ; 233 234 /** Debug driver next */ 235 protected static final boolean DEBUG = false; 236 protected static final boolean DEBUG_COALESCE = false; 237 // 238 // Data 239 // 240 241 // protected data 242 243 /** Document handler. */ 244 protected XMLDocumentHandler fDocumentHandler; 245 protected int fScannerLastState ; 246 247 /** Entity Storage */ 248 protected XMLEntityStorage fEntityStore; 249 250 /** Entity stack. */ 251 protected int[] fEntityStack = new int[4]; 252 253 /** Markup depth. */ 254 protected int fMarkupDepth; 255 256 //is the element empty 257 protected boolean fEmptyElement ; 258 259 //track if we are reading attributes, this is usefule while 260 //there is a callback 261 protected boolean fReadingAttributes = false; 262 263 /** Scanner state. */ 264 protected int fScannerState; 265 266 /** SubScanner state: inside scanContent method. */ 267 protected boolean fInScanContent = false; 268 protected boolean fLastSectionWasCData = false; 269 protected boolean fLastSectionWasEntityReference = false; 270 protected boolean fLastSectionWasCharacterData = false; 271 272 /** has external dtd */ 273 protected boolean fHasExternalDTD; 274 275 /** Standalone. */ 276 protected boolean fStandaloneSet; 277 protected boolean fStandalone; 278 protected String fVersion; 279 280 // element information 281 282 /** Current element. */ 283 protected QName fCurrentElement; 284 285 /** Element stack. */ 286 protected ElementStack fElementStack = new ElementStack(); 287 protected ElementStack2 fElementStack2 = new ElementStack2(); 288 289 // other info 290 291 /** Document system identifier. 292 * REVISIT: So what's this used for? - NG 293 * protected String fDocumentSystemId; 294 ******/ 295 296 protected String fPITarget ; 297 298 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 299 protected XMLString fPIData = new XMLString(); 300 301 // features 302 303 304 /** Notify built-in references. */ 305 protected boolean fNotifyBuiltInRefs = false; 306 307 //STAX related properties 308 //defaultValues. 309 protected boolean fSupportDTD = true; 310 protected boolean fReplaceEntityReferences = true; 311 protected boolean fSupportExternalEntities = false; 312 protected boolean fReportCdataEvent = false ; 313 protected boolean fIsCoalesce = false ; 314 protected String fDeclaredEncoding = null; 315 /** Xerces Feature: Disallow doctype declaration. */ 316 protected boolean fDisallowDoctype = false; 317 318 /** 319 * comma-delimited list of protocols that are allowed for the purpose 320 * of accessing external dtd or entity references 321 */ 322 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 323 324 /** 325 * standard uri conformant (strict uri). 326 * http://apache.org/xml/features/standard-uri-conformant 327 */ 328 protected boolean fStrictURI; 329 330 // drivers 331 332 /** Active driver. */ 333 protected Driver fDriver; 334 335 /** Content driver. */ 336 protected Driver fContentDriver = createContentDriver(); 337 338 // temporary variables 339 340 /** Element QName. */ 341 protected QName fElementQName = new QName(); 342 343 /** Attribute QName. */ 344 protected QName fAttributeQName = new QName(); 345 346 /** 347 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 348 * implements Iterator interface so we can directly give Attributes in the form of 349 * iterator. 350 */ 351 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 352 353 354 /** String. */ 355 protected XMLString fTempString = new XMLString(); 356 357 /** String. */ 358 protected XMLString fTempString2 = new XMLString(); 359 360 /** Array of 3 strings. */ 361 private String[] fStrings = new String[3]; 362 363 /** Making the buffer accesible to derived class -- String buffer. */ 364 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 365 366 /** Making the buffer accesible to derived class -- String buffer. */ 367 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 368 369 /** stores character data. */ 370 /** Making the buffer accesible to derived class -- stores PI data */ 371 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 372 373 /** Single character array. */ 374 private final char[] fSingleChar = new char[1]; 375 private String fCurrentEntityName = null; 376 377 // New members 378 protected boolean fScanToEnd = false; 379 380 protected DTDGrammarUtil dtdGrammarUtil= null; 381 382 protected boolean fAddDefaultAttr = false; 383 384 protected boolean foundBuiltInRefs = false; 385 386 /** Built-in reference character event */ 387 protected boolean builtInRefCharacterHandled = false; 388 389 //skip element algorithm 390 static final short MAX_DEPTH_LIMIT = 5 ; 391 static final short ELEMENT_ARRAY_LENGTH = 200 ; 392 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 393 static final boolean DEBUG_SKIP_ALGORITHM = false; 394 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 395 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 396 //pointer location where last element was skipped 397 short fLastPointerLocation = 0 ; 398 short fElementPointer = 0 ; 399 //2D array to store pointer info 400 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 401 protected String fElementRawname ; 402 protected boolean fShouldSkip = false; 403 protected boolean fAdd = false ; 404 protected boolean fSkip = false; 405 406 /** Reusable Augmentations. */ 407 private Augmentations fTempAugmentations = null; 408 // 409 // Constructors 410 // 411 412 /** Default constructor. */ 413 public XMLDocumentFragmentScannerImpl() { 414 } // <init>() 415 416 // 417 // XMLDocumentScanner methods 418 // 419 420 /** 421 * Sets the input source. 422 * 423 * @param inputSource The input source. 424 * 425 * @throws IOException Thrown on i/o error. 426 */ 427 public void setInputSource(XMLInputSource inputSource) throws IOException { 428 fEntityManager.setEntityHandler(this); 429 fEntityManager.startEntity("$fragment$", inputSource, false, true); 430 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 431 } // setInputSource(XMLInputSource) 432 433 /** 434 * Scans a document. 435 * 436 * @param complete True if the scanner should scan the document 437 * completely, pushing all events to the registered 438 * document handler. A value of false indicates that 439 * that the scanner should only scan the next portion 440 * of the document and return. A scanner instance is 441 * permitted to completely scan a document if it does 442 * not support this "pull" scanning model. 443 * 444 * @return True if there is more to scan, false otherwise. 445 */ 446 public boolean scanDocument(boolean complete) 447 throws IOException, XNIException { 448 449 // keep dispatching "events" 450 fEntityManager.setEntityHandler(this); 451 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 452 453 int event = next(); 454 do { 455 switch (event) { 456 case XMLStreamConstants.START_DOCUMENT : 457 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 458 break; 459 case XMLStreamConstants.START_ELEMENT : 460 //System.out.println(" in scann element"); 461 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 462 break; 463 case XMLStreamConstants.CHARACTERS : 464 fDocumentHandler.characters(getCharacterData(),null); 465 break; 466 case XMLStreamConstants.SPACE: 467 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 468 //System.out.println("in the space"); 469 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 470 break; 471 case XMLStreamConstants.ENTITY_REFERENCE : 472 //entity reference callback are given in startEntity 473 break; 474 case XMLStreamConstants.PROCESSING_INSTRUCTION : 475 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 476 break; 477 case XMLStreamConstants.COMMENT : 478 //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 479 fDocumentHandler.comment(getCharacterData(),null); 480 break; 481 case XMLStreamConstants.DTD : 482 //all DTD related callbacks are handled in DTDScanner. 483 //1. Stax doesn't define DTD states as it does for XML Document. 484 //therefore we don't need to take care of anything here. So Just break; 485 break; 486 case XMLStreamConstants.CDATA: 487 fDocumentHandler.startCDATA(null); 488 //xxx: check if CDATA values comes from getCharacterData() function 489 fDocumentHandler.characters(getCharacterData(),null); 490 fDocumentHandler.endCDATA(null); 491 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 492 break; 493 case XMLStreamConstants.NOTATION_DECLARATION : 494 break; 495 case XMLStreamConstants.ENTITY_DECLARATION : 496 break; 497 case XMLStreamConstants.NAMESPACE : 498 break; 499 case XMLStreamConstants.ATTRIBUTE : 500 break; 501 case XMLStreamConstants.END_ELEMENT : 502 //do not give callback here. 503 //this callback is given in scanEndElement function. 504 //fDocumentHandler.endElement(getElementQName(),null); 505 break; 506 default : 507 throw new InternalError("processing event: " + event); 508 509 } 510 //System.out.println("here in before calling next"); 511 event = next(); 512 //System.out.println("here in after calling next"); 513 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 514 515 if(event == XMLStreamConstants.END_DOCUMENT) { 516 fDocumentHandler.endDocument(null); 517 return false; 518 } 519 520 return true; 521 522 } // scanDocument(boolean):boolean 523 524 525 526 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 527 if(fScannerLastState == XMLEvent.END_ELEMENT){ 528 fElementQName.setValues(fElementStack.getLastPoppedElement()); 529 } 530 return fElementQName ; 531 } 532 533 /** return the next state on the input 534 * @return int 535 */ 536 537 public int next() throws IOException, XNIException { 538 return fDriver.next(); 539 } 540 541 // 542 // XMLComponent methods 543 // 544 545 /** 546 * Resets the component. The component can query the component manager 547 * about any features and properties that affect the operation of the 548 * component. 549 * 550 * @param componentManager The component manager. 551 * 552 * @throws SAXException Thrown by component on initialization error. 553 * For example, if a feature or property is 554 * required for the operation of the component, the 555 * component manager may throw a 556 * SAXNotRecognizedException or a 557 * SAXNotSupportedException. 558 */ 559 560 public void reset(XMLComponentManager componentManager) 561 throws XMLConfigurationException { 562 563 super.reset(componentManager); 564 565 // other settings 566 // fDocumentSystemId = null; 567 568 // sax features 569 //fAttributes.setNamespaces(fNamespaces); 570 571 // xerces features 572 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 573 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 574 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 575 576 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 577 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 578 (ExternalSubsetResolver) resolver : null; 579 580 //attribute 581 fReadingAttributes = false; 582 //xxx: external entities are supported in Xerces 583 // it would be good to define feature for this case 584 fSupportExternalEntities = true; 585 fReplaceEntityReferences = true; 586 fIsCoalesce = false; 587 588 // setup Driver 589 setScannerState(SCANNER_STATE_CONTENT); 590 setDriver(fContentDriver); 591 592 // JAXP 1.5 features and properties 593 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 594 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 595 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 596 597 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 598 599 resetCommon(); 600 //fEntityManager.test(); 601 } // reset(XMLComponentManager) 602 603 604 public void reset(PropertyManager propertyManager){ 605 606 super.reset(propertyManager); 607 608 // other settings 609 // fDocumentSystemId = null; 610 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 611 fNotifyBuiltInRefs = false ; 612 613 //fElementStack2.clear(); 614 //fReplaceEntityReferences = true; 615 //fSupportExternalEntities = true; 616 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 617 fReplaceEntityReferences = bo.booleanValue(); 618 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 619 fSupportExternalEntities = bo.booleanValue(); 620 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 621 if(cdata != null) 622 fReportCdataEvent = cdata.booleanValue() ; 623 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 624 if(coalesce != null) 625 fIsCoalesce = coalesce.booleanValue(); 626 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 627 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 628 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 629 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 630 // setup Driver 631 //we dont need to do this -- nb. 632 //setScannerState(SCANNER_STATE_CONTENT); 633 //setDriver(fContentDriver); 634 //fEntityManager.test(); 635 636 // JAXP 1.5 features and properties 637 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 638 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 639 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 640 641 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 642 resetCommon(); 643 } // reset(XMLComponentManager) 644 645 void resetCommon() { 646 // initialize vars 647 fMarkupDepth = 0; 648 fCurrentElement = null; 649 fElementStack.clear(); 650 fHasExternalDTD = false; 651 fStandaloneSet = false; 652 fStandalone = false; 653 fInScanContent = false; 654 //skipping algorithm 655 fShouldSkip = false; 656 fAdd = false; 657 fSkip = false; 658 659 fEntityStore = fEntityManager.getEntityStore(); 660 dtdGrammarUtil = null; 661 662 if (fSecurityManager != null) { 663 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 664 } else { 665 fElementAttributeLimit = 0; 666 } 667 fLimitAnalyzer = new XMLLimitAnalyzer(); 668 fEntityManager.setLimitAnalyzer(fLimitAnalyzer); 669 } 670 671 /** 672 * Returns a list of feature identifiers that are recognized by 673 * this component. This method may return null if no features 674 * are recognized by this component. 675 */ 676 public String[] getRecognizedFeatures() { 677 return (String[])(RECOGNIZED_FEATURES.clone()); 678 } // getRecognizedFeatures():String[] 679 680 /** 681 * Sets the state of a feature. This method is called by the component 682 * manager any time after reset when a feature changes state. 683 * <p> 684 * <strong>Note:</strong> Components should silently ignore features 685 * that do not affect the operation of the component. 686 * 687 * @param featureId The feature identifier. 688 * @param state The state of the feature. 689 * 690 * @throws SAXNotRecognizedException The component should not throw 691 * this exception. 692 * @throws SAXNotSupportedException The component should not throw 693 * this exception. 694 */ 695 public void setFeature(String featureId, boolean state) 696 throws XMLConfigurationException { 697 698 super.setFeature(featureId, state); 699 700 // Xerces properties 701 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 702 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 703 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 704 fNotifyBuiltInRefs = state; 705 } 706 } 707 708 } // setFeature(String,boolean) 709 710 /** 711 * Returns a list of property identifiers that are recognized by 712 * this component. This method may return null if no properties 713 * are recognized by this component. 714 */ 715 public String[] getRecognizedProperties() { 716 return (String[])(RECOGNIZED_PROPERTIES.clone()); 717 } // getRecognizedProperties():String[] 718 719 /** 720 * Sets the value of a property. This method is called by the component 721 * manager any time after reset when a property changes value. 722 * <p> 723 * <strong>Note:</strong> Components should silently ignore properties 724 * that do not affect the operation of the component. 725 * 726 * @param propertyId The property identifier. 727 * @param value The value of the property. 728 * 729 * @throws SAXNotRecognizedException The component should not throw 730 * this exception. 731 * @throws SAXNotSupportedException The component should not throw 732 * this exception. 733 */ 734 public void setProperty(String propertyId, Object value) 735 throws XMLConfigurationException { 736 737 super.setProperty(propertyId, value); 738 739 // Xerces properties 740 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 741 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 742 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 743 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 744 fEntityManager = (XMLEntityManager)value; 745 return; 746 } 747 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 748 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 749 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 750 (ExternalSubsetResolver) value : null; 751 return; 752 } 753 } 754 755 756 // Xerces properties 757 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 758 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 759 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 760 fEntityManager = (XMLEntityManager)value; 761 } 762 return; 763 } 764 765 //JAXP 1.5 properties 766 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 767 { 768 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 769 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 770 } 771 772 } // setProperty(String,Object) 773 774 /** 775 * Returns the default state for a feature, or null if this 776 * component does not want to report a default value for this 777 * feature. 778 * 779 * @param featureId The feature identifier. 780 * 781 * @since Xerces 2.2.0 782 */ 783 public Boolean getFeatureDefault(String featureId) { 784 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 785 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 786 return FEATURE_DEFAULTS[i]; 787 } 788 } 789 return null; 790 } // getFeatureDefault(String):Boolean 791 792 /** 793 * Returns the default state for a property, or null if this 794 * component does not want to report a default value for this 795 * property. 796 * 797 * @param propertyId The property identifier. 798 * 799 * @since Xerces 2.2.0 800 */ 801 public Object getPropertyDefault(String propertyId) { 802 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 803 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 804 return PROPERTY_DEFAULTS[i]; 805 } 806 } 807 return null; 808 } // getPropertyDefault(String):Object 809 810 // 811 // XMLDocumentSource methods 812 // 813 814 /** 815 * setDocumentHandler 816 * 817 * @param documentHandler 818 */ 819 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 820 fDocumentHandler = documentHandler; 821 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 822 } // setDocumentHandler(XMLDocumentHandler) 823 824 825 /** Returns the document handler */ 826 public XMLDocumentHandler getDocumentHandler(){ 827 return fDocumentHandler; 828 } 829 830 // 831 // XMLEntityHandler methods 832 // 833 834 /** 835 * This method notifies of the start of an entity. The DTD has the 836 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 837 * general entities are just specified by their name. 838 * 839 * @param name The name of the entity. 840 * @param identifier The resource identifier. 841 * @param encoding The auto-detected IANA encoding name of the entity 842 * stream. This value will be null in those situations 843 * where the entity encoding is not auto-detected (e.g. 844 * internal entities or a document entity that is 845 * parsed from a java.io.Reader). 846 * @param augs Additional information that may include infoset augmentations 847 * 848 * @throws XNIException Thrown by handler to signal an error. 849 */ 850 public void startEntity(String name, 851 XMLResourceIdentifier identifier, 852 String encoding, Augmentations augs) throws XNIException { 853 854 // keep track of this entity before fEntityDepth is increased 855 if (fEntityDepth == fEntityStack.length) { 856 int[] entityarray = new int[fEntityStack.length * 2]; 857 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 858 fEntityStack = entityarray; 859 } 860 fEntityStack[fEntityDepth] = fMarkupDepth; 861 862 super.startEntity(name, identifier, encoding, augs); 863 864 // WFC: entity declared in external subset in standalone doc 865 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 866 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 867 new Object[]{name}); 868 } 869 870 /** we are not calling the handlers yet.. */ 871 // call handler 872 if (fDocumentHandler != null && !fScanningAttribute) { 873 if (!name.equals("[xml]")) { 874 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 875 } 876 } 877 878 } // startEntity(String,XMLResourceIdentifier,String) 879 880 /** 881 * This method notifies the end of an entity. The DTD has the pseudo-name 882 * of "[dtd]" parameter entity names start with '%'; and general entities 883 * are just specified by their name. 884 * 885 * @param name The name of the entity. 886 * @param augs Additional information that may include infoset augmentations 887 * 888 * @throws XNIException Thrown by handler to signal an error. 889 */ 890 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 891 892 /** 893 * // flush possible pending output buffer - see scanContent 894 * if (fInScanContent && fStringBuffer.length != 0 895 * && fDocumentHandler != null) { 896 * fDocumentHandler.characters(fStringBuffer, null); 897 * fStringBuffer.length = 0; // make sure we know it's been flushed 898 * } 899 */ 900 super.endEntity(name, augs); 901 902 // make sure markup is properly balanced 903 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 904 reportFatalError("MarkupEntityMismatch", null); 905 } 906 907 /**/ 908 // call handler 909 if (fDocumentHandler != null && !fScanningAttribute) { 910 if (!name.equals("[xml]")) { 911 fDocumentHandler.endGeneralEntity(name, augs); 912 } 913 } 914 915 916 } // endEntity(String) 917 918 // 919 // Protected methods 920 // 921 922 // Driver factory methods 923 924 /** Creates a content Driver. */ 925 protected Driver createContentDriver() { 926 return new FragmentContentDriver(); 927 } // createContentDriver():Driver 928 929 // scanning methods 930 931 /** 932 * Scans an XML or text declaration. 933 * <p> 934 * <pre> 935 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 936 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 937 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 938 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 939 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 940 * | ('"' ('yes' | 'no') '"')) 941 * 942 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 943 * </pre> 944 * 945 * @param scanningTextDecl True if a text declaration is to 946 * be scanned instead of an XML 947 * declaration. 948 */ 949 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 950 throws IOException, XNIException { 951 952 // scan decl 953 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 954 fMarkupDepth--; 955 956 // pseudo-attribute values 957 String version = fStrings[0]; 958 String encoding = fStrings[1]; 959 String standalone = fStrings[2]; 960 fDeclaredEncoding = encoding; 961 // set standalone 962 fStandaloneSet = standalone != null; 963 fStandalone = fStandaloneSet && standalone.equals("yes"); 964 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 965 //but this information is only related with Document Entity. 966 fEntityManager.setStandalone(fStandalone); 967 968 969 // call handler 970 if (fDocumentHandler != null) { 971 if (scanningTextDecl) { 972 fDocumentHandler.textDecl(version, encoding, null); 973 } else { 974 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 975 } 976 } 977 978 if(version != null){ 979 fEntityScanner.setVersion(version); 980 fEntityScanner.setXMLVersion(version); 981 } 982 // set encoding on reader, only if encoding was not specified by the application explicitly 983 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 984 fEntityScanner.setEncoding(encoding); 985 } 986 987 } // scanXMLDeclOrTextDecl(boolean) 988 989 public String getPITarget(){ 990 return fPITarget ; 991 } 992 993 public XMLStringBuffer getPIData(){ 994 return fContentBuffer ; 995 } 996 997 //XXX: why not this function behave as per the state of the parser? 998 public XMLString getCharacterData(){ 999 if(fUsebuffer){ 1000 return fContentBuffer ; 1001 }else{ 1002 return fTempString; 1003 } 1004 1005 } 1006 1007 1008 /** 1009 * Scans a processing data. This is needed to handle the situation 1010 * where a document starts with a processing instruction whose 1011 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1012 * 1013 * @param target The PI target 1014 * @param data The XMLStringBuffer to fill in with the data 1015 */ 1016 protected void scanPIData(String target, XMLStringBuffer data) 1017 throws IOException, XNIException { 1018 1019 super.scanPIData(target, data); 1020 1021 //set the PI target and values 1022 fPITarget = target ; 1023 1024 fMarkupDepth--; 1025 1026 } // scanPIData(String) 1027 1028 /** 1029 * Scans a comment. 1030 * <p> 1031 * <pre> 1032 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1033 * </pre> 1034 * <p> 1035 * <strong>Note:</strong> Called after scanning past '<!--' 1036 */ 1037 protected void scanComment() throws IOException, XNIException { 1038 fContentBuffer.clear(); 1039 scanComment(fContentBuffer); 1040 //getTextCharacters can also be called for reading comments 1041 fUsebuffer = true; 1042 fMarkupDepth--; 1043 1044 } // scanComment() 1045 1046 //xxx value returned by this function may not remain valid if another event is scanned. 1047 public String getComment(){ 1048 return fContentBuffer.toString(); 1049 } 1050 1051 void addElement(String rawname){ 1052 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1053 //storing element raw name in a linear list of array 1054 fElementArray[fElementPointer] = rawname ; 1055 //storing elemnetPointer for particular element depth 1056 1057 if(DEBUG_SKIP_ALGORITHM){ 1058 StringBuffer sb = new StringBuffer() ; 1059 sb.append(" Storing element information ") ; 1060 sb.append(" fElementPointer = " + fElementPointer) ; 1061 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1062 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1063 System.out.println(sb.toString()) ; 1064 } 1065 1066 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1067 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1068 short column = storePointerForADepth(fElementPointer); 1069 if(column > 0){ 1070 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1071 //identity comparison shouldn't take much time and we can rely on this 1072 //since its guaranteed to have same object id for same string. 1073 if(rawname == fElementArray[pointer]){ 1074 fShouldSkip = true ; 1075 fLastPointerLocation = pointer ; 1076 //reset the things and return. 1077 resetPointer((short)fElementStack.fDepth , column) ; 1078 fElementArray[fElementPointer] = null ; 1079 return ; 1080 }else{ 1081 fShouldSkip = false ; 1082 } 1083 } 1084 } 1085 fElementPointer++ ; 1086 } 1087 } 1088 1089 1090 void resetPointer(short depth, short column){ 1091 fPointerInfo[depth] [column] = (short)0; 1092 } 1093 1094 //returns column information at which pointer was stored. 1095 short storePointerForADepth(short elementPointer){ 1096 short depth = (short) fElementStack.fDepth ; 1097 1098 //Stores element pointer locations at particular depth , only 4 pointer locations 1099 //are stored at particular depth for now. 1100 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1101 1102 if(canStore(depth, i)){ 1103 fPointerInfo[depth][i] = elementPointer ; 1104 if(DEBUG_SKIP_ALGORITHM){ 1105 StringBuffer sb = new StringBuffer() ; 1106 sb.append(" Pointer information ") ; 1107 sb.append(" fElementPointer = " + fElementPointer) ; 1108 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1109 sb.append(" column = " + i ) ; 1110 System.out.println(sb.toString()) ; 1111 } 1112 return i; 1113 } 1114 //else 1115 //pointer was not stored because we reached the limit 1116 } 1117 return -1 ; 1118 } 1119 1120 boolean canStore(short depth, short column){ 1121 //colum = 0 , means first element at particular depth 1122 //column = 1, means second element at particular depth 1123 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1124 return fPointerInfo[depth][column] == 0 ? true : false ; 1125 } 1126 1127 1128 short getElementPointer(short depth, short column){ 1129 //colum = 0 , means first element at particular depth 1130 //column = 1, means second element at particular depth 1131 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1132 return fPointerInfo[depth][column] ; 1133 } 1134 1135 //this function assumes that string passed is not null and skips 1136 //the following string from the buffer this makes sure 1137 boolean skipFromTheBuffer(String rawname) throws IOException{ 1138 if(fEntityScanner.skipString(rawname)){ 1139 char c = (char)fEntityScanner.peekChar() ; 1140 //If the start element was completely skipped we should encounter either ' '(space), 1141 //or '/' (in case of empty element) or '>' 1142 if( c == ' ' || c == '/' || c == '>'){ 1143 fElementRawname = rawname ; 1144 return true ; 1145 } else{ 1146 return false; 1147 } 1148 } else 1149 return false ; 1150 } 1151 1152 boolean skipQElement(String rawname) throws IOException{ 1153 1154 final int c = fEntityScanner.getChar(rawname.length()); 1155 //if this character is still valid element name -- this means string can't match 1156 if(XMLChar.isName(c)){ 1157 return false; 1158 }else{ 1159 return fEntityScanner.skipString(rawname); 1160 } 1161 } 1162 1163 protected boolean skipElement() throws IOException { 1164 1165 if(!fShouldSkip) return false ; 1166 1167 if(fLastPointerLocation != 0){ 1168 //Look at the next element stored in the array list.. we might just get a match. 1169 String rawname = fElementArray[fLastPointerLocation + 1] ; 1170 if(rawname != null && skipFromTheBuffer(rawname)){ 1171 fLastPointerLocation++ ; 1172 if(DEBUG_SKIP_ALGORITHM){ 1173 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1174 } 1175 return true ; 1176 } else{ 1177 //reset it back to zero... we haven't got the correct subset yet. 1178 fLastPointerLocation = 0 ; 1179 1180 } 1181 } 1182 //xxx: we can put some logic here as from what column it should start looking 1183 //for now we always start at 0 1184 //fallback to tolerant algorithm, it would look for differnt element stored at different 1185 //depth and get us the pointer location. 1186 return fShouldSkip && skipElement((short)0); 1187 1188 } 1189 1190 //start of the column at which it should try searching 1191 boolean skipElement(short column) throws IOException { 1192 short depth = (short)fElementStack.fDepth ; 1193 1194 if(depth > MAX_DEPTH_LIMIT){ 1195 return fShouldSkip = false ; 1196 } 1197 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1198 short pointer = getElementPointer(depth , i ) ; 1199 1200 if(pointer == 0){ 1201 return fShouldSkip = false ; 1202 } 1203 1204 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1205 if(DEBUG_SKIP_ALGORITHM){ 1206 System.out.println(); 1207 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1208 System.out.println(); 1209 } 1210 fLastPointerLocation = pointer ; 1211 return fShouldSkip = true ; 1212 } 1213 } 1214 return fShouldSkip = false ; 1215 } 1216 1217 /** 1218 * Scans a start element. This method will handle the binding of 1219 * namespace information and notifying the handler of the start 1220 * of the element. 1221 * <p> 1222 * <pre> 1223 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1224 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1225 * </pre> 1226 * <p> 1227 * <strong>Note:</strong> This method assumes that the leading 1228 * '<' character has been consumed. 1229 * <p> 1230 * <strong>Note:</strong> This method uses the fElementQName and 1231 * fAttributes variables. The contents of these variables will be 1232 * destroyed. The caller should copy important information out of 1233 * these variables before calling this method. 1234 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1235 * 1236 * @return True if element is empty. (i.e. It matches 1237 * production [44]. 1238 */ 1239 // fElementQName will have the details of element just read.. 1240 // fAttributes will have the details of all the attributes. 1241 protected boolean scanStartElement() 1242 throws IOException, XNIException { 1243 1244 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1245 //when skipping is true and no more elements should be added 1246 if(fSkip && !fAdd){ 1247 //get the stored element -- if everything goes right this should match the 1248 //token in the buffer 1249 1250 QName name = fElementStack.getNext(); 1251 1252 if(DEBUG_SKIP_ALGORITHM){ 1253 System.out.println("Trying to skip String = " + name.rawname); 1254 } 1255 1256 //Be conservative -- if skipping fails -- stop. 1257 fSkip = fEntityScanner.skipString(name.rawname); 1258 1259 if(fSkip){ 1260 if(DEBUG_SKIP_ALGORITHM){ 1261 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1262 } 1263 fElementStack.push(); 1264 fElementQName = name; 1265 }else{ 1266 //if skipping fails reposition the stack or fallback to normal way of processing 1267 fElementStack.reposition(); 1268 if(DEBUG_SKIP_ALGORITHM){ 1269 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1270 } 1271 } 1272 } 1273 1274 //we are still at the stage of adding elements 1275 //the elements were not matched or 1276 //fSkip is not set to true 1277 if(!fSkip || fAdd){ 1278 //get the next element from the stack 1279 fElementQName = fElementStack.nextElement(); 1280 // name 1281 if (fNamespaces) { 1282 fEntityScanner.scanQName(fElementQName); 1283 } else { 1284 String name = fEntityScanner.scanName(); 1285 fElementQName.setValues(null, name, name, null); 1286 } 1287 1288 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1289 if(DEBUG_SKIP_ALGORITHM){ 1290 if(fAdd){ 1291 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1292 } 1293 } 1294 1295 } 1296 1297 //when the elements are being added , we need to check if we are set for skipping the elements 1298 if(fAdd){ 1299 //this sets the value of fAdd variable 1300 fElementStack.matchElement(fElementQName); 1301 } 1302 1303 1304 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1305 fCurrentElement = fElementQName; 1306 1307 String rawname = fElementQName.rawname; 1308 1309 fEmptyElement = false; 1310 1311 fAttributes.removeAllAttributes(); 1312 1313 checkDepth(rawname); 1314 if(!seekCloseOfStartTag()){ 1315 fReadingAttributes = true; 1316 fAttributeCacheUsedCount =0; 1317 fStringBufferIndex =0; 1318 fAddDefaultAttr = true; 1319 do { 1320 scanAttribute(fAttributes); 1321 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1322 fAttributes.getLength() > fElementAttributeLimit){ 1323 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1324 "ElementAttributeLimit", 1325 new Object[]{rawname, fElementAttributeLimit }, 1326 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1327 } 1328 1329 } while (!seekCloseOfStartTag()); 1330 fReadingAttributes=false; 1331 } 1332 1333 if (fEmptyElement) { 1334 //decrease the markup depth.. 1335 fMarkupDepth--; 1336 1337 // check that this element was opened in the same entity 1338 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1339 reportFatalError("ElementEntityMismatch", 1340 new Object[]{fCurrentElement.rawname}); 1341 } 1342 // call handler 1343 if (fDocumentHandler != null) { 1344 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1345 } 1346 1347 //We should not be popping out the context here in endELement becaause the namespace context is still 1348 //valid when parser is at the endElement state. 1349 //if (fNamespaces) { 1350 // fNamespaceContext.popContext(); 1351 //} 1352 1353 //pop the element off the stack.. 1354 fElementStack.popElement(); 1355 1356 } else { 1357 1358 if(dtdGrammarUtil != null) 1359 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1360 if(fDocumentHandler != null){ 1361 //complete element and attributes are traversed in this function so we can send a callback 1362 //here. 1363 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1364 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1365 } 1366 } 1367 1368 1369 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1370 return fEmptyElement; 1371 1372 } // scanStartElement():boolean 1373 1374 /** 1375 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1376 * Characters are consumed. 1377 */ 1378 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1379 // spaces 1380 boolean sawSpace = fEntityScanner.skipSpaces(); 1381 1382 // end tag? 1383 final int c = fEntityScanner.peekChar(); 1384 if (c == '>') { 1385 fEntityScanner.scanChar(); 1386 return true; 1387 } else if (c == '/') { 1388 fEntityScanner.scanChar(); 1389 if (!fEntityScanner.skipChar('>')) { 1390 reportFatalError("ElementUnterminated", 1391 new Object[]{fElementQName.rawname}); 1392 } 1393 fEmptyElement = true; 1394 return true; 1395 } else if (!isValidNameStartChar(c) || !sawSpace) { 1396 reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); 1397 } 1398 1399 return false; 1400 } 1401 1402 public boolean hasAttributes(){ 1403 return fAttributes.getLength() > 0 ? true : false ; 1404 } 1405 1406 1407 /** 1408 * Scans an attribute. 1409 * <p> 1410 * <pre> 1411 * [41] Attribute ::= Name Eq AttValue 1412 * </pre> 1413 * <p> 1414 * <strong>Note:</strong> This method assumes that the next 1415 * character on the stream is the first character of the attribute 1416 * name. 1417 * <p> 1418 * <strong>Note:</strong> This method uses the fAttributeQName and 1419 * fQName variables. The contents of these variables will be 1420 * destroyed. 1421 * 1422 * @param attributes The attributes list for the scanned attribute. 1423 */ 1424 1425 /** 1426 * protected void scanAttribute(AttributeIteratorImpl attributes) 1427 * throws IOException, XNIException { 1428 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1429 * 1430 * 1431 * // name 1432 * if (fNamespaces) { 1433 * fEntityScanner.scanQName(fAttributeQName); 1434 * } 1435 * else { 1436 * String name = fEntityScanner.scanName(); 1437 * fAttributeQName.setValues(null, name, name, null); 1438 * } 1439 * 1440 * // equals 1441 * fEntityScanner.skipSpaces(); 1442 * if (!fEntityScanner.skipChar('=')) { 1443 * reportFatalError("EqRequiredInAttribute", 1444 * new Object[]{fAttributeQName.rawname}); 1445 * } 1446 * fEntityScanner.skipSpaces(); 1447 * 1448 * 1449 * // content 1450 * int oldLen = attributes.getLength(); 1451 */ 1452 /**xxx there is one check of duplicate attribute that has been removed. 1453 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1454 * 1455 * // WFC: Unique Att Spec 1456 * if (oldLen == attributes.getLength()) { 1457 * reportFatalError("AttributeNotUnique", 1458 * new Object[]{fCurrentElement.rawname, 1459 * fAttributeQName.rawname}); 1460 * } 1461 */ 1462 1463 /* 1464 //REVISIT: one more case needs to be included: external PE and standalone is no 1465 boolean isVC = fHasExternalDTD && !fStandalone; 1466 scanAttributeValue(fTempString, fTempString2, 1467 fAttributeQName.rawname, attributes, 1468 oldLen, isVC); 1469 1470 //attributes.setValue(oldLen, fTempString.toString()); 1471 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1472 //attributes.setSpecified(oldLen, true); 1473 1474 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1475 fAttributes.addAttribute(attribute); 1476 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1477 } // scanAttribute(XMLAttributes) 1478 1479 */ 1480 1481 /** return the attribute iterator implementation */ 1482 public XMLAttributesIteratorImpl getAttributeIterator(){ 1483 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1484 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1485 fAddDefaultAttr = false; 1486 } 1487 return fAttributes; 1488 } 1489 1490 /** return if standalone is set */ 1491 public boolean standaloneSet(){ 1492 return fStandaloneSet; 1493 } 1494 /** return if the doucment is standalone */ 1495 public boolean isStandAlone(){ 1496 return fStandalone ; 1497 } 1498 /** 1499 * Scans an attribute name value pair. 1500 * <p> 1501 * <pre> 1502 * [41] Attribute ::= Name Eq AttValue 1503 * </pre> 1504 * <p> 1505 * <strong>Note:</strong> This method assumes that the next 1506 * character on the stream is the first character of the attribute 1507 * name. 1508 * <p> 1509 * <strong>Note:</strong> This method uses the fAttributeQName and 1510 * fQName variables. The contents of these variables will be 1511 * destroyed. 1512 * 1513 * @param attributes The attributes list for the scanned attribute. 1514 */ 1515 1516 protected void scanAttribute(XMLAttributes attributes) 1517 throws IOException, XNIException { 1518 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1519 1520 // name 1521 if (fNamespaces) { 1522 fEntityScanner.scanQName(fAttributeQName); 1523 } else { 1524 String name = fEntityScanner.scanName(); 1525 fAttributeQName.setValues(null, name, name, null); 1526 } 1527 1528 // equals 1529 fEntityScanner.skipSpaces(); 1530 if (!fEntityScanner.skipChar('=')) { 1531 reportFatalError("EqRequiredInAttribute", 1532 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1533 } 1534 fEntityScanner.skipSpaces(); 1535 1536 int attIndex = 0 ; 1537 //REVISIT: one more case needs to be included: external PE and standalone is no 1538 boolean isVC = fHasExternalDTD && !fStandalone; 1539 //fTempString would store attribute value 1540 ///fTempString2 would store attribute non-normalized value 1541 1542 //this function doesn't use 'attIndex'. We are adding the attribute later 1543 //after we have figured out that current attribute is not namespace declaration 1544 //since scanAttributeValue doesn't use attIndex parameter therefore we 1545 //can safely add the attribute later.. 1546 XMLString tmpStr = getString(); 1547 1548 scanAttributeValue(tmpStr, fTempString2, 1549 fAttributeQName.rawname, attributes, 1550 attIndex, isVC); 1551 1552 // content 1553 int oldLen = attributes.getLength(); 1554 //if the attribute name already exists.. new value is replaced with old value 1555 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1556 1557 // WFC: Unique Att Spec 1558 //attributes count will be same if the current attribute name already exists for this element name. 1559 //this means there are two duplicate attributes. 1560 if (oldLen == attributes.getLength()) { 1561 reportFatalError("AttributeNotUnique", 1562 new Object[]{fCurrentElement.rawname, 1563 fAttributeQName.rawname}); 1564 } 1565 1566 //tmpString contains attribute value 1567 //we are passing null as the attribute value 1568 attributes.setValue(attIndex, null, tmpStr); 1569 1570 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1571 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1572 attributes.setSpecified(attIndex, true); 1573 1574 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1575 1576 } // scanAttribute(XMLAttributes) 1577 1578 /** 1579 * Scans element content. 1580 * 1581 * @return Returns the next character on the stream. 1582 */ 1583 //CHANGED: 1584 //EARLIER: scanContent() 1585 //NOW: scanContent(XMLStringBuffer) 1586 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1587 //this function appends the data to the buffer. 1588 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1589 //set the fTempString length to 0 before passing it on to scanContent 1590 //scanContent sets the correct co-ordinates as per the content read 1591 fTempString.length = 0; 1592 int c = fEntityScanner.scanContent(fTempString); 1593 content.append(fTempString); 1594 fTempString.length = 0; 1595 if (c == '\r') { 1596 // happens when there is the character reference 1597 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1598 fEntityScanner.scanChar(); 1599 content.append((char)c); 1600 c = -1; 1601 } else if (c == ']') { 1602 //fStringBuffer.clear(); 1603 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1604 content.append((char)fEntityScanner.scanChar()); 1605 // remember where we are in case we get an endEntity before we 1606 // could flush the buffer out - this happens when we're parsing an 1607 // entity which ends with a ] 1608 fInScanContent = true; 1609 // 1610 // We work on a single character basis to handle cases such as: 1611 // ']]]>' which we might otherwise miss. 1612 // 1613 if (fEntityScanner.skipChar(']')) { 1614 content.append(']'); 1615 while (fEntityScanner.skipChar(']')) { 1616 content.append(']'); 1617 } 1618 if (fEntityScanner.skipChar('>')) { 1619 reportFatalError("CDEndInContent", null); 1620 } 1621 } 1622 fInScanContent = false; 1623 c = -1; 1624 } 1625 if (fDocumentHandler != null && content.length > 0) { 1626 //fDocumentHandler.characters(content, null); 1627 } 1628 return c; 1629 1630 } // scanContent():int 1631 1632 1633 /** 1634 * Scans a CDATA section. 1635 * <p> 1636 * <strong>Note:</strong> This method uses the fTempString and 1637 * fStringBuffer variables. 1638 * 1639 * @param complete True if the CDATA section is to be scanned 1640 * completely. 1641 * 1642 * @return True if CDATA is completely scanned. 1643 */ 1644 //CHANGED: 1645 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1646 throws IOException, XNIException { 1647 1648 // call handler 1649 if (fDocumentHandler != null) { 1650 //fDocumentHandler.startCDATA(null); 1651 } 1652 1653 while (true) { 1654 //scanData will fill the contentBuffer 1655 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1656 break ; 1657 /** We dont need all this code if we pass ']]>' as delimeter.. 1658 * int brackets = 2; 1659 * while (fEntityScanner.skipChar(']')) { 1660 * brackets++; 1661 * } 1662 * 1663 * //When we find more than 2 square brackets 1664 * if (fDocumentHandler != null && brackets > 2) { 1665 * //we dont need to clear the buffer.. 1666 * //contentBuffer.clear(); 1667 * for (int i = 2; i < brackets; i++) { 1668 * contentBuffer.append(']'); 1669 * } 1670 * fDocumentHandler.characters(contentBuffer, null); 1671 * } 1672 * 1673 * if (fEntityScanner.skipChar('>')) { 1674 * break; 1675 * } 1676 * if (fDocumentHandler != null) { 1677 * //we dont need to clear the buffer now.. 1678 * //contentBuffer.clear(); 1679 * contentBuffer.append("]]"); 1680 * fDocumentHandler.characters(contentBuffer, null); 1681 * } 1682 **/ 1683 } else { 1684 int c = fEntityScanner.peekChar(); 1685 if (c != -1 && isInvalidLiteral(c)) { 1686 if (XMLChar.isHighSurrogate(c)) { 1687 //contentBuffer.clear(); 1688 //scan surrogates if any.... 1689 scanSurrogates(contentBuffer); 1690 } else { 1691 reportFatalError("InvalidCharInCDSect", 1692 new Object[]{Integer.toString(c,16)}); 1693 fEntityScanner.scanChar(); 1694 } 1695 } 1696 //by this time we have also read surrogate contents if any... 1697 if (fDocumentHandler != null) { 1698 //fDocumentHandler.characters(contentBuffer, null); 1699 } 1700 } 1701 } 1702 fMarkupDepth--; 1703 1704 if (fDocumentHandler != null && contentBuffer.length > 0) { 1705 //fDocumentHandler.characters(contentBuffer, null); 1706 } 1707 1708 // call handler 1709 if (fDocumentHandler != null) { 1710 //fDocumentHandler.endCDATA(null); 1711 } 1712 1713 return true; 1714 1715 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1716 1717 /** 1718 * Scans an end element. 1719 * <p> 1720 * <pre> 1721 * [42] ETag ::= '</' Name S? '>' 1722 * </pre> 1723 * <p> 1724 * <strong>Note:</strong> This method uses the fElementQName variable. 1725 * The contents of this variable will be destroyed. The caller should 1726 * copy the needed information out of this variable before calling 1727 * this method. 1728 * 1729 * @return The element depth. 1730 */ 1731 protected int scanEndElement() throws IOException, XNIException { 1732 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1733 1734 // pop context 1735 QName endElementName = fElementStack.popElement(); 1736 1737 String rawname = endElementName.rawname; 1738 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1739 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1740 //In scanners most of the time is consumed on checks done for XML characters, we can 1741 // optimize on it and avoid the checks done for endElement, 1742 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1743 1744 // this should work both for namespace processing true or false... 1745 1746 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1747 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1748 1749 if (!fEntityScanner.skipString(endElementName.rawname)) { 1750 reportFatalError("ETagRequired", new Object[]{rawname}); 1751 } 1752 1753 // end 1754 fEntityScanner.skipSpaces(); 1755 if (!fEntityScanner.skipChar('>')) { 1756 reportFatalError("ETagUnterminated", 1757 new Object[]{rawname}); 1758 } 1759 fMarkupDepth--; 1760 1761 //we have increased the depth for two markup "<" characters 1762 fMarkupDepth--; 1763 1764 // check that this element was opened in the same entity 1765 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1766 reportFatalError("ElementEntityMismatch", 1767 new Object[]{rawname}); 1768 } 1769 1770 //We should not be popping out the context here in endELement becaause the namespace context is still 1771 //valid when parser is at the endElement state. 1772 1773 //if (fNamespaces) { 1774 // fNamespaceContext.popContext(); 1775 //} 1776 1777 // call handler 1778 if (fDocumentHandler != null ) { 1779 //end element is scanned in this function so we can send a callback 1780 //here. 1781 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1782 1783 fDocumentHandler.endElement(endElementName, null); 1784 } 1785 if(dtdGrammarUtil != null) 1786 dtdGrammarUtil.endElement(endElementName); 1787 1788 return fMarkupDepth; 1789 1790 } // scanEndElement():int 1791 1792 /** 1793 * Scans a character reference. 1794 * <p> 1795 * <pre> 1796 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1797 * </pre> 1798 */ 1799 protected void scanCharReference() 1800 throws IOException, XNIException { 1801 1802 fStringBuffer2.clear(); 1803 int ch = scanCharReferenceValue(fStringBuffer2, null); 1804 fMarkupDepth--; 1805 if (ch != -1) { 1806 // call handler 1807 1808 if (fDocumentHandler != null) { 1809 if (fNotifyCharRefs) { 1810 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1811 } 1812 Augmentations augs = null; 1813 if (fValidation && ch <= 0x20) { 1814 if (fTempAugmentations != null) { 1815 fTempAugmentations.removeAllItems(); 1816 } 1817 else { 1818 fTempAugmentations = new AugmentationsImpl(); 1819 } 1820 augs = fTempAugmentations; 1821 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1822 } 1823 //xxx: How do we deal with this - how to return charReferenceValues 1824 //now this is being commented because this is taken care in scanDocument() 1825 //fDocumentHandler.characters(fStringBuffer2, null); 1826 if (fNotifyCharRefs) { 1827 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1828 } 1829 } 1830 } 1831 1832 } // scanCharReference() 1833 1834 1835 /** 1836 * Scans an entity reference. 1837 * 1838 * @return returns true if the new entity is started. If it was built-in entity 1839 * 'false' is returned. 1840 * @throws IOException Thrown if i/o error occurs. 1841 * @throws XNIException Thrown if handler throws exception upon 1842 * notification. 1843 */ 1844 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1845 String name = fEntityScanner.scanName(); 1846 if (name == null) { 1847 reportFatalError("NameRequiredInReference", null); 1848 return; 1849 } 1850 if (!fEntityScanner.skipChar(';')) { 1851 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1852 } 1853 if (fEntityStore.isUnparsedEntity(name)) { 1854 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1855 } 1856 fMarkupDepth--; 1857 fCurrentEntityName = name; 1858 1859 // handle built-in entities 1860 if (name == fAmpSymbol) { 1861 handleCharacter('&', fAmpSymbol, content); 1862 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1863 return ; 1864 } else if (name == fLtSymbol) { 1865 handleCharacter('<', fLtSymbol, content); 1866 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1867 return ; 1868 } else if (name == fGtSymbol) { 1869 handleCharacter('>', fGtSymbol, content); 1870 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1871 return ; 1872 } else if (name == fQuotSymbol) { 1873 handleCharacter('"', fQuotSymbol, content); 1874 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1875 return ; 1876 } else if (name == fAposSymbol) { 1877 handleCharacter('\'', fAposSymbol, content); 1878 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1879 return ; 1880 } 1881 1882 //1. if the entity is external and support to external entities is not required 1883 // 2. or entities should not be replaced 1884 //3. or if it is built in entity reference. 1885 boolean isEE = fEntityStore.isExternalEntity(name); 1886 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1887 fScannerState = SCANNER_STATE_REFERENCE; 1888 return ; 1889 } 1890 // start general entity 1891 if (!fEntityStore.isDeclaredEntity(name)) { 1892 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1893 if (!fSupportDTD && fReplaceEntityReferences) { 1894 reportFatalError("EntityNotDeclared", new Object[]{name}); 1895 return; 1896 } 1897 //REVISIT: one more case needs to be included: external PE and standalone is no 1898 if ( fHasExternalDTD && !fStandalone) { 1899 if (fValidation) 1900 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1901 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1902 } else 1903 reportFatalError("EntityNotDeclared", new Object[]{name}); 1904 } 1905 //we are starting the entity even if the entity was not declared 1906 //if that was the case it its taken care in XMLEntityManager.startEntity() 1907 //we immediately call the endEntity. Application gets to know if there was 1908 //any entity that was not declared. 1909 fEntityManager.startEntity(name, false); 1910 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1911 //setScannerState(SCANNER_STATE_CONTENT); 1912 //return true ; 1913 } // scanEntityReference() 1914 1915 // utility methods 1916 1917 /** 1918 * Check if the depth exceeds the maxElementDepth limit 1919 * @param elementName name of the current element 1920 */ 1921 void checkDepth(String elementName) { 1922 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1923 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1924 fSecurityManager.debugPrint(fLimitAnalyzer); 1925 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1926 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1927 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1928 "maxElementDepth"}); 1929 } 1930 } 1931 1932 /** 1933 * Calls document handler with a single character resulting from 1934 * built-in entity resolution. 1935 * 1936 * @param c 1937 * @param entity built-in name 1938 * @param XMLStringBuffer append the character to buffer 1939 * 1940 * we really dont need to call this function -- this function is only required when 1941 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1942 * calling this function to hanlde built-in entity reference. 1943 * 1944 */ 1945 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1946 foundBuiltInRefs = true; 1947 content.append(c); 1948 if (fDocumentHandler != null) { 1949 fSingleChar[0] = c; 1950 if (fNotifyBuiltInRefs) { 1951 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1952 } 1953 fTempString.setValues(fSingleChar, 0, 1); 1954 if(!fIsCoalesce){ 1955 fDocumentHandler.characters(fTempString, null); 1956 builtInRefCharacterHandled = true; 1957 } 1958 1959 if (fNotifyBuiltInRefs) { 1960 fDocumentHandler.endGeneralEntity(entity, null); 1961 } 1962 } 1963 } // handleCharacter(char) 1964 1965 // helper methods 1966 1967 /** 1968 * Sets the scanner state. 1969 * 1970 * @param state The new scanner state. 1971 */ 1972 protected final void setScannerState(int state) { 1973 1974 fScannerState = state; 1975 if (DEBUG_SCANNER_STATE) { 1976 System.out.print("### setScannerState: "); 1977 //System.out.print(fScannerState); 1978 System.out.print(getScannerStateName(state)); 1979 System.out.println(); 1980 } 1981 1982 } // setScannerState(int) 1983 1984 1985 /** 1986 * Sets the Driver. 1987 * 1988 * @param Driver The new Driver. 1989 */ 1990 protected final void setDriver(Driver driver) { 1991 fDriver = driver; 1992 if (DEBUG_DISPATCHER) { 1993 System.out.print("%%% setDriver: "); 1994 System.out.print(getDriverName(driver)); 1995 System.out.println(); 1996 } 1997 } 1998 1999 // 2000 // Private methods 2001 // 2002 2003 /** Returns the scanner state name. */ 2004 protected String getScannerStateName(int state) { 2005 2006 switch (state) { 2007 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 2008 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 2009 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 2010 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 2011 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 2012 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 2013 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 2014 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 2015 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 2016 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 2017 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 2018 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 2019 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 2020 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 2021 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 2022 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 2023 } 2024 2025 return "??? ("+state+')'; 2026 2027 } // getScannerStateName(int):String 2028 public String getEntityName(){ 2029 //return the cached name 2030 return fCurrentEntityName; 2031 } 2032 2033 /** Returns the driver name. */ 2034 public String getDriverName(Driver driver) { 2035 2036 if (DEBUG_DISPATCHER) { 2037 if (driver != null) { 2038 String name = driver.getClass().getName(); 2039 int index = name.lastIndexOf('.'); 2040 if (index != -1) { 2041 name = name.substring(index + 1); 2042 index = name.lastIndexOf('$'); 2043 if (index != -1) { 2044 name = name.substring(index + 1); 2045 } 2046 } 2047 return name; 2048 } 2049 } 2050 return "null"; 2051 2052 } // getDriverName():String 2053 2054 /** 2055 * Check the protocol used in the systemId against allowed protocols 2056 * 2057 * @param systemId the Id of the URI 2058 * @param allowedProtocols a list of allowed protocols separated by comma 2059 * @return the name of the protocol if rejected, null otherwise 2060 */ 2061 String checkAccess(String systemId, String allowedProtocols) throws IOException { 2062 String baseSystemId = fEntityScanner.getBaseSystemId(); 2063 String expandedSystemId = fEntityManager.expandSystemId(systemId, baseSystemId,fStrictURI); 2064 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 2065 } 2066 2067 // 2068 // Classes 2069 // 2070 2071 /** 2072 * @author Neeraj Bajaj, Sun Microsystems. 2073 */ 2074 protected static final class Element { 2075 2076 // 2077 // Data 2078 // 2079 2080 /** Symbol. */ 2081 public QName qname; 2082 2083 //raw name stored as characters 2084 public char[] fRawname; 2085 2086 /** The next Element entry. */ 2087 public Element next; 2088 2089 // 2090 // Constructors 2091 // 2092 2093 /** 2094 * Constructs a new Element from the given QName and next Element 2095 * reference. 2096 */ 2097 public Element(QName qname, Element next) { 2098 this.qname.setValues(qname); 2099 this.fRawname = qname.rawname.toCharArray(); 2100 this.next = next; 2101 } 2102 2103 } // class Element 2104 2105 /** 2106 * Element stack. 2107 * 2108 * @author Neeraj Bajaj, Sun Microsystems. 2109 */ 2110 protected class ElementStack2 { 2111 2112 // 2113 // Data 2114 // 2115 2116 /** The stack data. */ 2117 protected QName [] fQName = new QName[20]; 2118 2119 //Element depth 2120 protected int fDepth; 2121 //total number of elements 2122 protected int fCount; 2123 //current position 2124 protected int fPosition; 2125 //Mark refers to the position 2126 protected int fMark; 2127 2128 protected int fLastDepth ; 2129 2130 // 2131 // Constructors 2132 // 2133 2134 /** Default constructor. */ 2135 public ElementStack2() { 2136 for (int i = 0; i < fQName.length; i++) { 2137 fQName[i] = new QName(); 2138 } 2139 fMark = fPosition = 1; 2140 } // <init>() 2141 2142 public void resize(){ 2143 /** 2144 * int length = fElements.length; 2145 * Element [] temp = new Element[length * 2]; 2146 * System.arraycopy(fElements, 0, temp, 0, length); 2147 * fElements = temp; 2148 */ 2149 //resize QNames 2150 int oldLength = fQName.length; 2151 QName [] tmp = new QName[oldLength * 2]; 2152 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2153 fQName = tmp; 2154 2155 for (int i = oldLength; i < fQName.length; i++) { 2156 fQName[i] = new QName(); 2157 } 2158 2159 } 2160 2161 2162 // 2163 // Public methods 2164 // 2165 2166 /** Check if the element scanned during the start element 2167 *matches the stored element. 2168 * 2169 *@return true if the match suceeds. 2170 */ 2171 public boolean matchElement(QName element) { 2172 //last depth is the depth when last elemnt was pushed 2173 //if last depth is greater than current depth 2174 if(DEBUG_SKIP_ALGORITHM){ 2175 System.out.println("fLastDepth = " + fLastDepth); 2176 System.out.println("fDepth = " + fDepth); 2177 } 2178 boolean match = false; 2179 if(fLastDepth > fDepth && fDepth <= 2){ 2180 if(DEBUG_SKIP_ALGORITHM){ 2181 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2182 } 2183 if(element.rawname == fQName[fDepth].rawname){ 2184 fAdd = false; 2185 //mark this position 2186 //decrease the depth by 1 as arrays are 0 based 2187 fMark = fDepth - 1; 2188 //we found the match and from next element skipping will start, add 1 2189 fPosition = fMark + 1 ; 2190 match = true; 2191 //Once we get match decrease the count -- this was increased by nextElement() 2192 --fCount; 2193 if(DEBUG_SKIP_ALGORITHM){ 2194 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2195 System.out.println("fMark = " + fMark); 2196 System.out.println("fPosition = " + fPosition); 2197 System.out.println("fDepth = " + fDepth); 2198 System.out.println("fCount = " + fCount); 2199 } 2200 }else{ 2201 fAdd = true; 2202 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2203 } 2204 } 2205 //store the last depth 2206 fLastDepth = fDepth++; 2207 return match; 2208 } // pushElement(QName):QName 2209 2210 /** 2211 * This function doesn't increase depth. The function in this function is 2212 *broken down into two functions for efficiency. <@see>matchElement</see>. 2213 * This function just returns the pointer to the object and its values are set. 2214 * 2215 *@return QName reference to the next element in the list 2216 */ 2217 public QName nextElement() { 2218 2219 //if number of elements becomes equal to the length of array -- stop the skipping 2220 if (fCount == fQName.length) { 2221 fShouldSkip = false; 2222 fAdd = false; 2223 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2224 //xxx: this is not correct, we are returning the last element 2225 //this wont make any difference since flag has been set to 'false' 2226 return fQName[--fCount]; 2227 } 2228 if(DEBUG_SKIP_ALGORITHM){ 2229 System.out.println("fCount = " + fCount); 2230 } 2231 return fQName[fCount++]; 2232 2233 } 2234 2235 /** Note that this function is considerably different than nextElement() 2236 * This function just returns the previously stored elements 2237 */ 2238 public QName getNext(){ 2239 //when position reaches number of elements in the list.. 2240 //set the position back to mark, making it a circular linked list. 2241 if(fPosition == fCount){ 2242 fPosition = fMark; 2243 } 2244 return fQName[fPosition++]; 2245 } 2246 2247 /** returns the current depth 2248 */ 2249 public int popElement(){ 2250 return fDepth--; 2251 } 2252 2253 2254 /** Clears the stack without throwing away existing QName objects. */ 2255 public void clear() { 2256 fLastDepth = 0; 2257 fDepth = 0; 2258 fCount = 0 ; 2259 fPosition = fMark = 1; 2260 } // clear() 2261 2262 } // class ElementStack 2263 2264 /** 2265 * Element stack. This stack operates without synchronization, error 2266 * checking, and it re-uses objects instead of throwing popped items 2267 * away. 2268 * 2269 * @author Andy Clark, IBM 2270 */ 2271 protected class ElementStack { 2272 2273 // 2274 // Data 2275 // 2276 2277 /** The stack data. */ 2278 protected QName[] fElements; 2279 protected int [] fInt = new int[20]; 2280 2281 2282 //Element depth 2283 protected int fDepth; 2284 //total number of elements 2285 protected int fCount; 2286 //current position 2287 protected int fPosition; 2288 //Mark refers to the position 2289 protected int fMark; 2290 2291 protected int fLastDepth ; 2292 2293 // 2294 // Constructors 2295 // 2296 2297 /** Default constructor. */ 2298 public ElementStack() { 2299 fElements = new QName[20]; 2300 for (int i = 0; i < fElements.length; i++) { 2301 fElements[i] = new QName(); 2302 } 2303 } // <init>() 2304 2305 // 2306 // Public methods 2307 // 2308 2309 /** 2310 * Pushes an element on the stack. 2311 * <p> 2312 * <strong>Note:</strong> The QName values are copied into the 2313 * stack. In other words, the caller does <em>not</em> orphan 2314 * the element to the stack. Also, the QName object returned 2315 * is <em>not</em> orphaned to the caller. It should be 2316 * considered read-only. 2317 * 2318 * @param element The element to push onto the stack. 2319 * 2320 * @return Returns the actual QName object that stores the 2321 */ 2322 //XXX: THIS FUNCTION IS NOT USED 2323 public QName pushElement(QName element) { 2324 if (fDepth == fElements.length) { 2325 QName[] array = new QName[fElements.length * 2]; 2326 System.arraycopy(fElements, 0, array, 0, fDepth); 2327 fElements = array; 2328 for (int i = fDepth; i < fElements.length; i++) { 2329 fElements[i] = new QName(); 2330 } 2331 } 2332 fElements[fDepth].setValues(element); 2333 return fElements[fDepth++]; 2334 } // pushElement(QName):QName 2335 2336 2337 /** Note that this function is considerably different than nextElement() 2338 * This function just returns the previously stored elements 2339 */ 2340 public QName getNext(){ 2341 //when position reaches number of elements in the list.. 2342 //set the position back to mark, making it a circular linked list. 2343 if(fPosition == fCount){ 2344 fPosition = fMark; 2345 } 2346 //store the position of last opened tag at particular depth 2347 //fInt[++fDepth] = fPosition; 2348 if(DEBUG_SKIP_ALGORITHM){ 2349 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2350 } 2351 //return fElements[fPosition++]; 2352 return fElements[fPosition]; 2353 } 2354 2355 /** This function should be called only when element was skipped sucessfully. 2356 * 1. Increase the depth - because element was sucessfully skipped. 2357 *2. Store the position of the element token in array "last opened tag" at depth. 2358 *3. increase the position counter so as to point to the next element in the array 2359 */ 2360 public void push(){ 2361 2362 fInt[++fDepth] = fPosition++; 2363 } 2364 2365 /** Check if the element scanned during the start element 2366 *matches the stored element. 2367 * 2368 *@return true if the match suceeds. 2369 */ 2370 public boolean matchElement(QName element) { 2371 //last depth is the depth when last elemnt was pushed 2372 //if last depth is greater than current depth 2373 //if(DEBUG_SKIP_ALGORITHM){ 2374 // System.out.println("Check if the element " + element.rawname + " matches"); 2375 // System.out.println("fLastDepth = " + fLastDepth); 2376 // System.out.println("fDepth = " + fDepth); 2377 //} 2378 boolean match = false; 2379 if(fLastDepth > fDepth && fDepth <= 3){ 2380 if(DEBUG_SKIP_ALGORITHM){ 2381 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2382 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2383 } 2384 if(element.rawname == fElements[fDepth - 1].rawname){ 2385 fAdd = false; 2386 //mark this position 2387 //decrease the depth by 1 as arrays are 0 based 2388 fMark = fDepth - 1; 2389 //we found the match 2390 fPosition = fMark; 2391 match = true; 2392 //Once we get match decrease the count -- this was increased by nextElement() 2393 --fCount; 2394 if(DEBUG_SKIP_ALGORITHM){ 2395 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2396 System.out.println("fMark = " + fMark); 2397 System.out.println("fPosition = " + fPosition); 2398 System.out.println("fDepth = " + fDepth); 2399 System.out.println("fCount = " + fCount); 2400 System.out.println("---------MATCH SUCEEDED-----------------"); 2401 System.out.println(""); 2402 } 2403 }else{ 2404 fAdd = true; 2405 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2406 } 2407 } 2408 //store the position for the current depth 2409 //when we are adding the elements, when skipping 2410 //starts even then this should be tracked ie. when 2411 //calling getNext() 2412 if(match){ 2413 //from next element skipping will start, add 1 2414 fInt[fDepth] = fPosition++; 2415 } else{ 2416 if(DEBUG_SKIP_ALGORITHM){ 2417 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2418 } 2419 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2420 fInt[fDepth] = fCount - 1; 2421 } 2422 2423 //if number of elements becomes equal to the length of array -- stop the skipping 2424 //xxx: should we do "fCount == fInt.length" 2425 if (fCount == fElements.length) { 2426 fSkip = false; 2427 fAdd = false; 2428 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2429 reposition(); 2430 if(DEBUG_SKIP_ALGORITHM){ 2431 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2432 System.out.println("REPOSITIONING THE STACK"); 2433 System.out.println("-----------SKIPPING STOPPED----------"); 2434 System.out.println(""); 2435 } 2436 return false; 2437 } 2438 if(DEBUG_SKIP_ALGORITHM){ 2439 if(match){ 2440 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2441 }else{ 2442 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2443 } 2444 } 2445 //store the last depth 2446 fLastDepth = fDepth; 2447 return match; 2448 } // matchElement(QName):QName 2449 2450 2451 /** 2452 * Returns the next element on the stack. 2453 * 2454 * @return Returns the actual QName object. Callee should 2455 * use this object to store the details of next element encountered. 2456 */ 2457 public QName nextElement() { 2458 if(fSkip){ 2459 fDepth++; 2460 //boundary checks are done in matchElement() 2461 return fElements[fCount++]; 2462 } else if (fDepth == fElements.length) { 2463 QName[] array = new QName[fElements.length * 2]; 2464 System.arraycopy(fElements, 0, array, 0, fDepth); 2465 fElements = array; 2466 for (int i = fDepth; i < fElements.length; i++) { 2467 fElements[i] = new QName(); 2468 } 2469 } 2470 2471 return fElements[fDepth++]; 2472 2473 } // pushElement(QName):QName 2474 2475 2476 /** 2477 * Pops an element off of the stack by setting the values of 2478 * the specified QName. 2479 * <p> 2480 * <strong>Note:</strong> The object returned is <em>not</em> 2481 * orphaned to the caller. Therefore, the caller should consider 2482 * the object to be read-only. 2483 */ 2484 public QName popElement() { 2485 //return the same object that was pushed -- this would avoid 2486 //setting the values for every end element. 2487 //STRONG: this object is read only -- this object reference shouldn't be stored. 2488 if(fSkip || fAdd ){ 2489 if(DEBUG_SKIP_ALGORITHM){ 2490 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2491 System.out.println(""); 2492 } 2493 return fElements[fInt[fDepth--]]; 2494 } else{ 2495 if(DEBUG_SKIP_ALGORITHM){ 2496 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2497 } 2498 return fElements[--fDepth] ; 2499 } 2500 //element.setValues(fElements[--fDepth]); 2501 } // popElement(QName) 2502 2503 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2504 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2505 *as per the depth. 2506 */ 2507 public void reposition(){ 2508 for( int i = 2 ; i <= fDepth ; i++){ 2509 fElements[i-1] = fElements[fInt[i]]; 2510 } 2511 if(DEBUG_SKIP_ALGORITHM){ 2512 for( int i = 0 ; i < fDepth ; i++){ 2513 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2514 } 2515 } 2516 } 2517 2518 /** Clears the stack without throwing away existing QName objects. */ 2519 public void clear() { 2520 fDepth = 0; 2521 fLastDepth = 0; 2522 fCount = 0 ; 2523 fPosition = fMark = 1; 2524 2525 } // clear() 2526 2527 /** 2528 * This function is as a result of optimization done for endElement -- 2529 * we dont need to set the value for every end element encouterd. 2530 * For Well formedness checks we can have the same QName object that was pushed. 2531 * the values will be set only if application need to know about the endElement 2532 * -- neeraj.bajaj@sun.com 2533 */ 2534 2535 public QName getLastPoppedElement(){ 2536 return fElements[fDepth]; 2537 } 2538 } // class ElementStack 2539 2540 /** 2541 * Drives the parser to the next state/event on the input. Parser is guaranteed 2542 * to stop at the next state/event. 2543 * 2544 * Internally XML document is divided into several states. Each state represents 2545 * a sections of XML document. When this functions returns normally, it has read 2546 * the section of XML document and returns the state corresponding to section of 2547 * document which has been read. For optimizations, a particular driver 2548 * can read ahead of the section of document (state returned) just read and 2549 * can maintain a different internal state. 2550 * 2551 * 2552 * @author Neeraj Bajaj, Sun Microsystems 2553 */ 2554 protected interface Driver { 2555 2556 2557 /** 2558 * Drives the parser to the next state/event on the input. Parser is guaranteed 2559 * to stop at the next state/event. 2560 * 2561 * Internally XML document is divided into several states. Each state represents 2562 * a sections of XML document. When this functions returns normally, it has read 2563 * the section of XML document and returns the state corresponding to section of 2564 * document which has been read. For optimizations, a particular driver 2565 * can read ahead of the section of document (state returned) just read and 2566 * can maintain a different internal state. 2567 * 2568 * @return state representing the section of document just read. 2569 * 2570 * @throws IOException Thrown on i/o error. 2571 * @throws XNIException Thrown on parse error. 2572 */ 2573 2574 public int next() throws IOException, XNIException; 2575 2576 } // interface Driver 2577 2578 /** 2579 * Driver to handle content scanning. This driver is capable of reading 2580 * the fragment of XML document. When it has finished reading fragment 2581 * of XML documents, it can pass the job of reading to another driver. 2582 * 2583 * This class has been modified as per the new design which is more suited to 2584 * efficiently build pull parser. Lot of performance improvements have been done and 2585 * the code has been added to support stax functionality/features. 2586 * 2587 * @author Neeraj Bajaj, Sun Microsystems 2588 * 2589 * 2590 * @author Andy Clark, IBM 2591 * @author Eric Ye, IBM 2592 */ 2593 protected class FragmentContentDriver 2594 implements Driver { 2595 2596 // 2597 // Driver methods 2598 // 2599 private boolean fContinueDispatching = true; 2600 private boolean fScanningForMarkup = true; 2601 2602 /** 2603 * decides the appropriate state of the parser 2604 */ 2605 private void startOfMarkup() throws IOException { 2606 fMarkupDepth++; 2607 final int ch = fEntityScanner.peekChar(); 2608 2609 switch(ch){ 2610 case '?' :{ 2611 setScannerState(SCANNER_STATE_PI); 2612 fEntityScanner.skipChar(ch); 2613 break; 2614 } 2615 case '!' :{ 2616 fEntityScanner.skipChar(ch); 2617 if (fEntityScanner.skipChar('-')) { 2618 if (!fEntityScanner.skipChar('-')) { 2619 reportFatalError("InvalidCommentStart", 2620 null); 2621 } 2622 setScannerState(SCANNER_STATE_COMMENT); 2623 } else if (fEntityScanner.skipString(cdata)) { 2624 setScannerState(SCANNER_STATE_CDATA ); 2625 } else if (!scanForDoctypeHook()) { 2626 reportFatalError("MarkupNotRecognizedInContent", 2627 null); 2628 } 2629 break; 2630 } 2631 case '/' :{ 2632 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2633 fEntityScanner.skipChar(ch); 2634 break; 2635 } 2636 default :{ 2637 if (isValidNameStartChar(ch)) { 2638 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2639 } else { 2640 reportFatalError("MarkupNotRecognizedInContent", 2641 null); 2642 } 2643 } 2644 } 2645 2646 }//startOfMarkup 2647 2648 private void startOfContent() throws IOException { 2649 if (fEntityScanner.skipChar('<')) { 2650 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2651 } else if (fEntityScanner.skipChar('&')) { 2652 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2653 } else { 2654 //element content is there.. 2655 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2656 } 2657 }//startOfContent 2658 2659 2660 /** 2661 * 2662 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2663 * At any point of time when in doubt over the current state of the parser, the state should be 2664 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2665 * the parser to one of its sub state. 2666 * sub states are defined in the parser on the basis of different XML component like 2667 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2668 * These sub states help the parser to have fine control over the parsing. These are the 2669 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2670 * decided if paresr needs to stop at next milepost ?? 2671 * 2672 */ 2673 public void decideSubState() throws IOException { 2674 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2675 2676 switch (fScannerState) { 2677 2678 case SCANNER_STATE_CONTENT: { 2679 startOfContent() ; 2680 break; 2681 } 2682 2683 case SCANNER_STATE_START_OF_MARKUP: { 2684 startOfMarkup() ; 2685 break; 2686 } 2687 } 2688 } 2689 }//decideSubState 2690 2691 /** 2692 * Drives the parser to the next state/event on the input. Parser is guaranteed 2693 * to stop at the next state/event. Internally XML document 2694 * is divided into several states. Each state represents a sections of XML 2695 * document. When this functions returns normally, it has read the section 2696 * of XML document and returns the state corresponding to section of 2697 * document which has been read. For optimizations, a particular driver 2698 * can read ahead of the section of document (state returned) just read and 2699 * can maintain a different internal state. 2700 * 2701 * State returned corresponds to Stax states. 2702 * 2703 * @return state representing the section of document just read. 2704 * 2705 * @throws IOException Thrown on i/o error. 2706 * @throws XNIException Thrown on parse error. 2707 */ 2708 2709 public int next() throws IOException, XNIException { 2710 while (true) { 2711 try { 2712 if(DEBUG_NEXT){ 2713 System.out.println("NOW IN FragmentContentDriver"); 2714 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2715 } 2716 2717 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2718 //decideSubState. 2719 2720 switch (fScannerState) { 2721 case SCANNER_STATE_CONTENT: { 2722 final int ch = fEntityScanner.peekChar(); 2723 if (ch == '<') { 2724 fEntityScanner.scanChar(); 2725 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2726 } else if (ch == '&') { 2727 fEntityScanner.scanChar(); 2728 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2729 break; 2730 } else { 2731 //element content is there.. 2732 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2733 break; 2734 } 2735 } 2736 2737 case SCANNER_STATE_START_OF_MARKUP: { 2738 startOfMarkup(); 2739 break; 2740 }//case: SCANNER_STATE_START_OF_MARKUP 2741 2742 }//end of switch 2743 //decideSubState() ; 2744 2745 //do some special handling if isCoalesce is set to true. 2746 if(fIsCoalesce){ 2747 fUsebuffer = true ; 2748 //if the last section was character data 2749 if(fLastSectionWasCharacterData){ 2750 2751 //if we dont encounter any CDATA or ENTITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2752 //return the last scanned charactrer data. 2753 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2754 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2755 fLastSectionWasCharacterData = false; 2756 return XMLEvent.CHARACTERS; 2757 } 2758 }//if last section was CDATA or ENTITY REFERENCE 2759 //xxx: there might be another entity reference or CDATA after this 2760 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2761 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2762 //and current state is not SCANNER_STATE_CHARACTER_DATA 2763 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2764 //this means there is nothing more to be coalesced. 2765 //return the CHARACTERS event. 2766 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2767 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2768 2769 fLastSectionWasCData = false; 2770 fLastSectionWasEntityReference = false; 2771 return XMLEvent.CHARACTERS; 2772 } 2773 } 2774 } 2775 2776 2777 if(DEBUG_NEXT){ 2778 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2779 } 2780 2781 switch(fScannerState){ 2782 2783 case XMLEvent.START_DOCUMENT : 2784 return XMLEvent.START_DOCUMENT; 2785 2786 case SCANNER_STATE_START_ELEMENT_TAG :{ 2787 2788 //xxx this function returns true when element is empty.. can be linked to end element event. 2789 //returns true if the element is empty 2790 fEmptyElement = scanStartElement() ; 2791 //if the element is empty the next event is "end element" 2792 if(fEmptyElement){ 2793 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2794 }else{ 2795 //set the next possible state 2796 setScannerState(SCANNER_STATE_CONTENT); 2797 } 2798 return XMLEvent.START_ELEMENT ; 2799 } 2800 2801 case SCANNER_STATE_CHARACTER_DATA: { 2802 if(DEBUG_COALESCE){ 2803 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2804 System.out.println("fIsCoalesce = " + fIsCoalesce); 2805 } 2806 //if last section was either entity reference or cdata or character data we should be using buffer 2807 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2808 2809 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2810 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2811 fLastSectionWasEntityReference = false; 2812 fLastSectionWasCData = false; 2813 fLastSectionWasCharacterData = true ; 2814 fUsebuffer = true; 2815 }else{ 2816 //clear the buffer 2817 fContentBuffer.clear(); 2818 } 2819 2820 //set the fTempString length to 0 before passing it on to scanContent 2821 //scanContent sets the correct co-ordinates as per the content read 2822 fTempString.length = 0; 2823 int c = fEntityScanner.scanContent(fTempString); 2824 if(DEBUG){ 2825 System.out.println("fTempString = " + fTempString); 2826 } 2827 if(fEntityScanner.skipChar('<')){ 2828 //check if we have reached end of element 2829 if(fEntityScanner.skipChar('/')){ 2830 //increase the mark up depth 2831 fMarkupDepth++; 2832 fLastSectionWasCharacterData = false; 2833 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2834 //check if its start of new element 2835 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2836 fMarkupDepth++; 2837 fLastSectionWasCharacterData = false; 2838 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2839 }else{ 2840 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2841 //there can be cdata ahead if coalesce is true we should call again 2842 if(fIsCoalesce){ 2843 fUsebuffer = true; 2844 fLastSectionWasCharacterData = true; 2845 fContentBuffer.append(fTempString); 2846 fTempString.length = 0; 2847 continue; 2848 } 2849 } 2850 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2851 if(fUsebuffer){ 2852 fContentBuffer.append(fTempString); 2853 fTempString.length = 0; 2854 } 2855 if(DEBUG){ 2856 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2857 } 2858 //check limit before returning event 2859 checkLimit(fContentBuffer); 2860 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2861 if(DEBUG)System.out.println("Return SPACE EVENT"); 2862 return XMLEvent.SPACE; 2863 }else 2864 return XMLEvent.CHARACTERS; 2865 2866 } else{ 2867 fUsebuffer = true ; 2868 if(DEBUG){ 2869 System.out.println("fContentBuffer = " + fContentBuffer); 2870 System.out.println("fTempString = " + fTempString); 2871 } 2872 fContentBuffer.append(fTempString); 2873 fTempString.length = 0; 2874 } 2875 if (c == '\r') { 2876 if(DEBUG){ 2877 System.out.println("'\r' character found"); 2878 } 2879 // happens when there is the character reference 2880 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2881 fEntityScanner.scanChar(); 2882 fUsebuffer = true; 2883 fContentBuffer.append((char)c); 2884 c = -1 ; 2885 } else if (c == ']') { 2886 //fStringBuffer.clear(); 2887 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2888 fUsebuffer = true; 2889 fContentBuffer.append((char)fEntityScanner.scanChar()); 2890 // remember where we are in case we get an endEntity before we 2891 // could flush the buffer out - this happens when we're parsing an 2892 // entity which ends with a ] 2893 fInScanContent = true; 2894 2895 // We work on a single character basis to handle cases such as: 2896 // ']]]>' which we might otherwise miss. 2897 // 2898 if (fEntityScanner.skipChar(']')) { 2899 fContentBuffer.append(']'); 2900 while (fEntityScanner.skipChar(']')) { 2901 fContentBuffer.append(']'); 2902 } 2903 if (fEntityScanner.skipChar('>')) { 2904 reportFatalError("CDEndInContent", null); 2905 } 2906 } 2907 c = -1 ; 2908 fInScanContent = false; 2909 } 2910 2911 do{ 2912 //xxx: we should be using only one buffer.. 2913 // we need not to grow the buffer only when isCoalesce() is not true; 2914 2915 if (c == '<') { 2916 fEntityScanner.scanChar(); 2917 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2918 break; 2919 }//xxx what should be the behavior if entity reference is present in the content ? 2920 else if (c == '&') { 2921 fEntityScanner.scanChar(); 2922 setScannerState(SCANNER_STATE_REFERENCE); 2923 break; 2924 }///xxx since this part is also characters, it should be merged... 2925 else if (c != -1 && isInvalidLiteral(c)) { 2926 if (XMLChar.isHighSurrogate(c)) { 2927 // special case: surrogates 2928 scanSurrogates(fContentBuffer) ; 2929 setScannerState(SCANNER_STATE_CONTENT); 2930 } else { 2931 reportFatalError("InvalidCharInContent", 2932 new Object[] { 2933 Integer.toString(c, 16)}); 2934 fEntityScanner.scanChar(); 2935 } 2936 break; 2937 } 2938 //xxx: scanContent also gives character callback. 2939 c = scanContent(fContentBuffer) ; 2940 //we should not be iterating again if fIsCoalesce is not set to true 2941 2942 if(!fIsCoalesce){ 2943 setScannerState(SCANNER_STATE_CONTENT); 2944 break; 2945 } 2946 2947 }while(true); 2948 2949 //if (fDocumentHandler != null) { 2950 // fDocumentHandler.characters(fContentBuffer, null); 2951 //} 2952 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2953 //if fIsCoalesce is true there might be more data so call fDriver.next() 2954 if(fIsCoalesce){ 2955 fLastSectionWasCharacterData = true ; 2956 continue; 2957 }else{ 2958 //check limit before returning event 2959 checkLimit(fContentBuffer); 2960 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2961 if(DEBUG)System.out.println("Return SPACE EVENT"); 2962 return XMLEvent.SPACE; 2963 } else 2964 return XMLEvent.CHARACTERS ; 2965 } 2966 } 2967 2968 case SCANNER_STATE_END_ELEMENT_TAG :{ 2969 if(fEmptyElement){ 2970 //set it back to false. 2971 fEmptyElement = false; 2972 setScannerState(SCANNER_STATE_CONTENT); 2973 //check the case when there is comment after single element document 2974 //<foo/> and some comment after this 2975 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2976 2977 } else if(scanEndElement() == 0) { 2978 //It is last element of the document 2979 if (elementDepthIsZeroHook()) { 2980 //if element depth is zero , it indicates the end of the document 2981 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2982 //xxx understand this point once again.. 2983 return XMLEvent.END_ELEMENT ; 2984 } 2985 2986 } 2987 setScannerState(SCANNER_STATE_CONTENT); 2988 return XMLEvent.END_ELEMENT ; 2989 } 2990 2991 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2992 scanComment(); 2993 setScannerState(SCANNER_STATE_CONTENT); 2994 return XMLEvent.COMMENT; 2995 //break; 2996 } 2997 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2998 //clear the buffer first 2999 fContentBuffer.clear() ; 3000 //xxx: which buffer should be passed. Ideally we shouldn't have 3001 //more than two buffers -- 3002 //xxx: where should we add the switch for buffering. 3003 scanPI(fContentBuffer); 3004 setScannerState(SCANNER_STATE_CONTENT); 3005 return XMLEvent.PROCESSING_INSTRUCTION; 3006 //break; 3007 } 3008 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 3009 //xxx: What if CDATA is the first event 3010 //<foo><![CDATA[hello<><>]]>append</foo> 3011 3012 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 3013 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3014 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3015 fLastSectionWasCData = true ; 3016 fLastSectionWasEntityReference = false; 3017 fLastSectionWasCharacterData = false; 3018 }//if we dont need to coalesce clear the buffer 3019 else{ 3020 fContentBuffer.clear(); 3021 } 3022 fUsebuffer = true; 3023 //CDATA section is completely read in all the case. 3024 scanCDATASection(fContentBuffer , true); 3025 setScannerState(SCANNER_STATE_CONTENT); 3026 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 3027 //and just call fDispatche.next(). Since we have set the scanner state to 3028 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 3029 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 3030 //2. Check if application has set for reporting CDATA event 3031 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 3032 //return the cdata event as characters. 3033 if(fIsCoalesce){ 3034 fLastSectionWasCData = true ; 3035 //there might be more data to coalesce. 3036 continue; 3037 }else if(fReportCdataEvent){ 3038 return XMLEvent.CDATA; 3039 } else{ 3040 return XMLEvent.CHARACTERS; 3041 } 3042 } 3043 3044 case SCANNER_STATE_REFERENCE :{ 3045 fMarkupDepth++; 3046 foundBuiltInRefs = false; 3047 3048 //we should not clear the buffer only when the last state was either CDATA or 3049 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3050 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3051 //fLastSectionWasEntityReference or fLastSectionWasCData are only 3052 //used when fIsCoalesce is set to true. 3053 fLastSectionWasEntityReference = true ; 3054 fLastSectionWasCData = false; 3055 fLastSectionWasCharacterData = false; 3056 }//if we dont need to coalesce clear the buffer 3057 else{ 3058 fContentBuffer.clear(); 3059 } 3060 fUsebuffer = true ; 3061 //take care of character reference 3062 if (fEntityScanner.skipChar('#')) { 3063 scanCharReferenceValue(fContentBuffer, null); 3064 fMarkupDepth--; 3065 if(!fIsCoalesce){ 3066 setScannerState(SCANNER_STATE_CONTENT); 3067 return XMLEvent.CHARACTERS; 3068 } 3069 } else { 3070 // this function also starts new entity 3071 scanEntityReference(fContentBuffer); 3072 //if there was built-in entity reference & coalesce is not true 3073 //return CHARACTERS 3074 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3075 setScannerState(SCANNER_STATE_CONTENT); 3076 if (builtInRefCharacterHandled) { 3077 builtInRefCharacterHandled = false; 3078 return XMLEvent.ENTITY_REFERENCE; 3079 } else { 3080 return XMLEvent.CHARACTERS; 3081 } 3082 } 3083 3084 //if there was a text declaration, call next() it will be taken care. 3085 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3086 fLastSectionWasEntityReference = true ; 3087 continue; 3088 } 3089 3090 if(fScannerState == SCANNER_STATE_REFERENCE){ 3091 setScannerState(SCANNER_STATE_CONTENT); 3092 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3093 // Skip the entity reference, we don't care 3094 continue; 3095 } 3096 return XMLEvent.ENTITY_REFERENCE; 3097 } 3098 } 3099 //Wether it was character reference, entity reference or built-in entity 3100 //set the next possible state to SCANNER_STATE_CONTENT 3101 setScannerState(SCANNER_STATE_CONTENT); 3102 fLastSectionWasEntityReference = true ; 3103 continue; 3104 } 3105 3106 case SCANNER_STATE_TEXT_DECL: { 3107 // scan text decl 3108 if (fEntityScanner.skipString("<?xml")) { 3109 fMarkupDepth++; 3110 // NOTE: special case where entity starts with a PI 3111 // whose name starts with "xml" (e.g. "xmlfoo") 3112 if (isValidNameChar(fEntityScanner.peekChar())) { 3113 fStringBuffer.clear(); 3114 fStringBuffer.append("xml"); 3115 3116 if (fNamespaces) { 3117 while (isValidNCName(fEntityScanner.peekChar())) { 3118 fStringBuffer.append((char)fEntityScanner.scanChar()); 3119 } 3120 } else { 3121 while (isValidNameChar(fEntityScanner.peekChar())) { 3122 fStringBuffer.append((char)fEntityScanner.scanChar()); 3123 } 3124 } 3125 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3126 fContentBuffer.clear(); 3127 scanPIData(target, fContentBuffer); 3128 } 3129 3130 // standard text declaration 3131 else { 3132 //xxx: this function gives callback 3133 scanXMLDeclOrTextDecl(true); 3134 } 3135 } 3136 // now that we've straightened out the readers, we can read in chunks: 3137 fEntityManager.fCurrentEntity.mayReadChunks = true; 3138 setScannerState(SCANNER_STATE_CONTENT); 3139 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3140 //it seems we have to careful when to allow function issue a callback 3141 //and when to allow adapter issue a callback. 3142 continue; 3143 } 3144 3145 3146 case SCANNER_STATE_ROOT_ELEMENT: { 3147 if (scanRootElementHook()) { 3148 fEmptyElement = true; 3149 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3150 return XMLEvent.START_ELEMENT; 3151 } 3152 setScannerState(SCANNER_STATE_CONTENT); 3153 return XMLEvent.START_ELEMENT ; 3154 } 3155 case SCANNER_STATE_CHAR_REFERENCE : { 3156 fContentBuffer.clear(); 3157 scanCharReferenceValue(fContentBuffer, null); 3158 fMarkupDepth--; 3159 setScannerState(SCANNER_STATE_CONTENT); 3160 return XMLEvent.CHARACTERS; 3161 } 3162 default: 3163 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3164 3165 }//switch 3166 } 3167 // premature end of file 3168 catch (EOFException e) { 3169 endOfFileHook(e); 3170 return -1; 3171 } 3172 } //while loop 3173 }//next 3174 3175 /** 3176 * Add the count of the content buffer and check if the accumulated 3177 * value exceeds the limit 3178 * @param buffer content buffer 3179 */ 3180 protected void checkLimit(XMLStringBuffer buffer) { 3181 if (fLimitAnalyzer.isTracking(fCurrentEntityName)) { 3182 fLimitAnalyzer.addValue(Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntityName, buffer.length); 3183 if (fSecurityManager.isOverLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 3184 fSecurityManager.debugPrint(fLimitAnalyzer); 3185 reportFatalError("MaxEntitySizeLimit", new Object[]{fCurrentEntityName, 3186 fLimitAnalyzer.getValue(Limit.GENERAL_ENTITY_SIZE_LIMIT), 3187 fSecurityManager.getLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT), 3188 fSecurityManager.getStateLiteral(Limit.GENERAL_ENTITY_SIZE_LIMIT)}); 3189 } 3190 if (fSecurityManager.isOverLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 3191 fSecurityManager.debugPrint(fLimitAnalyzer); 3192 reportFatalError("TotalEntitySizeLimit", 3193 new Object[]{fLimitAnalyzer.getTotalValue(Limit.TOTAL_ENTITY_SIZE_LIMIT), 3194 fSecurityManager.getLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT), 3195 fSecurityManager.getStateLiteral(Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 3196 } 3197 } 3198 } 3199 3200 // 3201 // Protected methods 3202 // 3203 3204 // hooks 3205 3206 // NOTE: These hook methods are added so that the full document 3207 // scanner can share the majority of code with this class. 3208 3209 /** 3210 * Scan for DOCTYPE hook. This method is a hook for subclasses 3211 * to add code to handle scanning for a the "DOCTYPE" string 3212 * after the string "<!" has been scanned. 3213 * 3214 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3215 * was not scanned. 3216 */ 3217 protected boolean scanForDoctypeHook() 3218 throws IOException, XNIException { 3219 return false; 3220 } // scanForDoctypeHook():boolean 3221 3222 /** 3223 * Element depth iz zero. This methos is a hook for subclasses 3224 * to add code to handle when the element depth hits zero. When 3225 * scanning a document fragment, an element depth of zero is 3226 * normal. However, when scanning a full XML document, the 3227 * scanner must handle the trailing miscellanous section of 3228 * the document after the end of the document's root element. 3229 * 3230 * @return True if the caller should stop and return true which 3231 * allows the scanner to switch to a new scanning 3232 * driver. A return value of false indicates that 3233 * the content driver should continue as normal. 3234 */ 3235 protected boolean elementDepthIsZeroHook() 3236 throws IOException, XNIException { 3237 return false; 3238 } // elementDepthIsZeroHook():boolean 3239 3240 /** 3241 * Scan for root element hook. This method is a hook for 3242 * subclasses to add code that handles scanning for the root 3243 * element. When scanning a document fragment, there is no 3244 * "root" element. However, when scanning a full XML document, 3245 * the scanner must handle the root element specially. 3246 * 3247 * @return True if the caller should stop and return true which 3248 * allows the scanner to switch to a new scanning 3249 * driver. A return value of false indicates that 3250 * the content driver should continue as normal. 3251 */ 3252 protected boolean scanRootElementHook() 3253 throws IOException, XNIException { 3254 return false; 3255 } // scanRootElementHook():boolean 3256 3257 /** 3258 * End of file hook. This method is a hook for subclasses to 3259 * add code that handles the end of file. The end of file in 3260 * a document fragment is OK if the markup depth is zero. 3261 * However, when scanning a full XML document, an end of file 3262 * is always premature. 3263 */ 3264 protected void endOfFileHook(EOFException e) 3265 throws IOException, XNIException { 3266 3267 // NOTE: An end of file is only only an error if we were 3268 // in the middle of scanning some markup. -Ac 3269 if (fMarkupDepth != 0) { 3270 reportFatalError("PrematureEOF", null); 3271 } 3272 3273 } // endOfFileHook() 3274 3275 } // class FragmentContentDriver 3276 3277 static void pr(String str) { 3278 System.out.println(str) ; 3279 } 3280 3281 protected boolean fUsebuffer ; 3282 3283 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3284 * maintained for attributes. 3285 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3286 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3287 * XMLString. 3288 * 3289 * @return XMLString XMLString used to store an attribute value. 3290 */ 3291 3292 protected XMLString getString(){ 3293 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3294 return (XMLString)attributeValueCache.get(fAttributeCacheUsedCount++); 3295 } else{ 3296 XMLString str = new XMLString(); 3297 fAttributeCacheUsedCount++; 3298 attributeValueCache.add(str); 3299 return str; 3300 } 3301 } 3302 3303 /** 3304 * Implements XMLBufferListener interface. 3305 */ 3306 3307 public void refresh(){ 3308 refresh(0); 3309 } 3310 3311 /** 3312 * receives callbacks from {@link XMLEntityReader } when buffer 3313 * is being changed. 3314 * @param refreshPosition 3315 */ 3316 public void refresh(int refreshPosition){ 3317 //If you are reading attributes and you got a callback 3318 //cache available attributes. 3319 if(fReadingAttributes){ 3320 fAttributes.refresh(); 3321 } 3322 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3323 //since fTempString directly matches to the underlying main buffer 3324 //store the data into buffer 3325 fContentBuffer.append(fTempString); 3326 //clear the XMLString so that data can't be added again. 3327 fTempString.length = 0; 3328 fUsebuffer = true; 3329 } 3330 } 3331 3332 } // class XMLDocumentFragmentScannerImpl