1 /* 2 * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException; 25 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 26 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 27 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 28 import com.sun.org.apache.xerces.internal.util.XMLChar; 29 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 30 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 31 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 32 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 34 import com.sun.org.apache.xerces.internal.xni.Augmentations; 35 import com.sun.org.apache.xerces.internal.xni.QName; 36 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 37 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 46 import com.sun.xml.internal.stream.XMLBufferListener; 47 import com.sun.xml.internal.stream.XMLEntityStorage; 48 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 49 import java.io.CharConversionException; 50 import java.io.EOFException; 51 import java.io.IOException; 52 import javax.xml.XMLConstants; 53 import javax.xml.stream.XMLInputFactory; 54 import javax.xml.stream.XMLStreamConstants; 55 import javax.xml.stream.events.XMLEvent; 56 import jdk.xml.internal.JdkXmlUtils; 57 import jdk.xml.internal.SecuritySupport; 58 59 /** 60 * 61 * This class is responsible for scanning the structure and content 62 * of document fragments. 63 * 64 * This class has been modified as per the new design which is more suited to 65 * efficiently build pull parser. Lot of improvements have been done and 66 * the code has been added to support stax functionality/features. 67 * 68 * @author Neeraj Bajaj SUN Microsystems 69 * @author K.Venugopal SUN Microsystems 70 * @author Glenn Marcy, IBM 71 * @author Andy Clark, IBM 72 * @author Arnaud Le Hors, IBM 73 * @author Eric Ye, IBM 74 * @author Sunitha Reddy, SUN Microsystems 75 * 76 * @LastModified: Sep 2017 77 */ 78 public class XMLDocumentFragmentScannerImpl 79 extends XMLScanner 80 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 81 82 // 83 // Constants 84 // 85 86 protected int fElementAttributeLimit, fXMLNameLimit; 87 88 /** External subset resolver. **/ 89 protected ExternalSubsetResolver fExternalSubsetResolver; 90 91 // scanner states 92 93 //XXX this should be divided into more states. 94 /** Scanner state: start of markup. */ 95 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 96 97 /** Scanner state: content. */ 98 protected static final int SCANNER_STATE_CONTENT = 22; 99 100 /** Scanner state: processing instruction. */ 101 protected static final int SCANNER_STATE_PI = 23; 102 103 /** Scanner state: DOCTYPE. */ 104 protected static final int SCANNER_STATE_DOCTYPE = 24; 105 106 /** Scanner state: XML Declaration */ 107 protected static final int SCANNER_STATE_XML_DECL = 25; 108 109 /** Scanner state: root element. */ 110 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 111 112 /** Scanner state: comment. */ 113 protected static final int SCANNER_STATE_COMMENT = 27; 114 115 /** Scanner state: reference. */ 116 protected static final int SCANNER_STATE_REFERENCE = 28; 117 118 // <book type="hard"> reading attribute name 'type' 119 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 120 121 // <book type="hard"> //reading attribute value. 122 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 123 124 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 125 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 126 127 /** Scanner state: end of input. */ 128 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 129 130 /** Scanner state: terminated. */ 131 protected static final int SCANNER_STATE_TERMINATED = 34; 132 133 /** Scanner state: CDATA section. */ 134 protected static final int SCANNER_STATE_CDATA = 35; 135 136 /** Scanner state: Text declaration. */ 137 protected static final int SCANNER_STATE_TEXT_DECL = 36; 138 139 /** Scanner state: Text declaration. */ 140 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 141 142 //<book type="hard">foo</book> 143 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 144 145 //<book type="hard">foo</book> reading </book> 146 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 147 148 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 149 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 150 151 // feature identifiers 152 153 154 /** Feature identifier: notify built-in refereces. */ 155 protected static final String NOTIFY_BUILTIN_REFS = 156 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 157 158 /** Property identifier: entity resolver. */ 159 protected static final String ENTITY_RESOLVER = 160 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 161 162 /** Feature identifier: standard uri conformant */ 163 protected static final String STANDARD_URI_CONFORMANT = 164 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 165 166 /** Property identifier: Security property manager. */ 167 private static final String XML_SECURITY_PROPERTY_MANAGER = 168 Constants.XML_SECURITY_PROPERTY_MANAGER; 169 170 /** access external dtd: file protocol 171 * For DOM/SAX, the secure feature is set to true by default 172 */ 173 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 174 175 // recognized features and properties 176 177 /** Recognized features. */ 178 private static final String[] RECOGNIZED_FEATURES = { 179 NAMESPACES, 180 VALIDATION, 181 NOTIFY_BUILTIN_REFS, 182 NOTIFY_CHAR_REFS, 183 Constants.STAX_REPORT_CDATA_EVENT, 184 XMLConstants.USE_CATALOG 185 }; 186 187 /** Feature defaults. */ 188 private static final Boolean[] FEATURE_DEFAULTS = { 189 Boolean.TRUE, 190 null, 191 Boolean.FALSE, 192 Boolean.FALSE, 193 Boolean.TRUE, 194 JdkXmlUtils.USE_CATALOG_DEFAULT 195 }; 196 197 /** Recognized properties. */ 198 private static final String[] RECOGNIZED_PROPERTIES = { 199 SYMBOL_TABLE, 200 ERROR_REPORTER, 201 ENTITY_MANAGER, 202 XML_SECURITY_PROPERTY_MANAGER, 203 JdkXmlUtils.CATALOG_DEFER, 204 JdkXmlUtils.CATALOG_FILES, 205 JdkXmlUtils.CATALOG_PREFER, 206 JdkXmlUtils.CATALOG_RESOLVE, 207 JdkXmlUtils.CDATA_CHUNK_SIZE 208 }; 209 210 /** Property defaults. */ 211 private static final Object[] PROPERTY_DEFAULTS = { 212 null, 213 null, 214 null, 215 null, 216 null, 217 null, 218 null, 219 null, 220 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT 221 }; 222 223 224 private static final char [] CDATA = {'[','C','D','A','T','A','['}; 225 static final char [] XMLDECL = {'<','?','x','m','l'}; 226 // private static final char [] endTag = {'<','/'}; 227 // debugging 228 229 /** Debug scanner state. */ 230 private static final boolean DEBUG_SCANNER_STATE = false; 231 232 /** Debug driver. */ 233 private static final boolean DEBUG_DISPATCHER = false; 234 235 /** Debug content driver scanning. */ 236 protected static final boolean DEBUG_START_END_ELEMENT = false; 237 238 /** Debug driver next */ 239 protected static final boolean DEBUG = false; 240 241 // 242 // Data 243 // 244 245 // protected data 246 247 /** Document handler. */ 248 protected XMLDocumentHandler fDocumentHandler; 249 protected int fScannerLastState ; 250 251 /** Entity Storage */ 252 protected XMLEntityStorage fEntityStore; 253 254 /** Entity stack. */ 255 protected int[] fEntityStack = new int[4]; 256 257 /** Markup depth. */ 258 protected int fMarkupDepth; 259 260 //is the element empty 261 protected boolean fEmptyElement ; 262 263 //track if we are reading attributes, this is usefule while 264 //there is a callback 265 protected boolean fReadingAttributes = false; 266 267 /** Scanner state. */ 268 protected int fScannerState; 269 270 /** SubScanner state: inside scanContent method. */ 271 protected boolean fInScanContent = false; 272 protected boolean fLastSectionWasCData = false; 273 protected boolean fCDataStart = false; 274 protected boolean fInCData = false; 275 protected boolean fCDataEnd = false; 276 protected boolean fLastSectionWasEntityReference = false; 277 protected boolean fLastSectionWasCharacterData = false; 278 279 /** has external dtd */ 280 protected boolean fHasExternalDTD; 281 282 /** Standalone. */ 283 protected boolean fStandaloneSet; 284 protected boolean fStandalone; 285 protected String fVersion; 286 287 // element information 288 289 /** Current element. */ 290 protected QName fCurrentElement; 291 292 /** Element stack. */ 293 protected ElementStack fElementStack = new ElementStack(); 294 protected ElementStack2 fElementStack2 = new ElementStack2(); 295 296 // other info 297 298 /** Document system identifier. 299 * REVISIT: So what's this used for? - NG 300 * protected String fDocumentSystemId; 301 ******/ 302 303 protected String fPITarget ; 304 305 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 306 protected XMLString fPIData = new XMLString(); 307 308 // features 309 310 311 /** Notify built-in references. */ 312 protected boolean fNotifyBuiltInRefs = false; 313 314 //STAX related properties 315 //defaultValues. 316 protected boolean fSupportDTD = true; 317 protected boolean fReplaceEntityReferences = true; 318 protected boolean fSupportExternalEntities = false; 319 protected boolean fReportCdataEvent = false ; 320 protected boolean fIsCoalesce = false ; 321 protected String fDeclaredEncoding = null; 322 /** Xerces Feature: Disallow doctype declaration. */ 323 protected boolean fDisallowDoctype = false; 324 325 /** 326 * CDATA chunk size limit 327 */ 328 private int fChunkSize; 329 330 /** 331 * comma-delimited list of protocols that are allowed for the purpose 332 * of accessing external dtd or entity references 333 */ 334 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 335 336 /** 337 * standard uri conformant (strict uri). 338 * http://apache.org/xml/features/standard-uri-conformant 339 */ 340 protected boolean fStrictURI; 341 342 // drivers 343 344 /** Active driver. */ 345 protected Driver fDriver; 346 347 /** Content driver. */ 348 protected Driver fContentDriver = createContentDriver(); 349 350 // temporary variables 351 352 /** Element QName. */ 353 protected QName fElementQName = new QName(); 354 355 /** Attribute QName. */ 356 protected QName fAttributeQName = new QName(); 357 358 /** 359 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 360 * implements Iterator interface so we can directly give Attributes in the form of 361 * iterator. 362 */ 363 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 364 365 366 /** String. */ 367 protected XMLString fTempString = new XMLString(); 368 369 /** String. */ 370 protected XMLString fTempString2 = new XMLString(); 371 372 /** Array of 3 strings. */ 373 private final String[] fStrings = new String[3]; 374 375 /** Making the buffer accessible to derived class -- String buffer. */ 376 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 377 378 /** Making the buffer accessible to derived class -- String buffer. */ 379 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 380 381 /** stores character data. */ 382 /** Making the buffer accessible to derived class -- stores PI data */ 383 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 384 385 /** Single character array. */ 386 private final char[] fSingleChar = new char[1]; 387 private String fCurrentEntityName = null; 388 389 // New members 390 protected boolean fScanToEnd = false; 391 392 protected DTDGrammarUtil dtdGrammarUtil= null; 393 394 protected boolean fAddDefaultAttr = false; 395 396 protected boolean foundBuiltInRefs = false; 397 398 /** Built-in reference character event */ 399 protected boolean builtInRefCharacterHandled = false; 400 401 //skip element algorithm 402 static final short MAX_DEPTH_LIMIT = 5 ; 403 static final short ELEMENT_ARRAY_LENGTH = 200 ; 404 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 405 static final boolean DEBUG_SKIP_ALGORITHM = false; 406 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 407 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 408 //pointer location where last element was skipped 409 short fLastPointerLocation = 0 ; 410 short fElementPointer = 0 ; 411 //2D array to store pointer info 412 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 413 protected String fElementRawname ; 414 protected boolean fShouldSkip = false; 415 protected boolean fAdd = false ; 416 protected boolean fSkip = false; 417 418 /** Reusable Augmentations. */ 419 private Augmentations fTempAugmentations = null; 420 // 421 // Constructors 422 // 423 424 /** Default constructor. */ 425 public XMLDocumentFragmentScannerImpl() { 426 } // <init>() 427 428 // 429 // XMLDocumentScanner methods 430 // 431 432 /** 433 * Sets the input source. 434 * 435 * @param inputSource The input source. 436 * 437 * @throws IOException Thrown on i/o error. 438 */ 439 public void setInputSource(XMLInputSource inputSource) throws IOException { 440 fEntityManager.setEntityHandler(this); 441 fEntityManager.startEntity(false, "$fragment$", inputSource, false, true); 442 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 443 } // setInputSource(XMLInputSource) 444 445 /** 446 * Scans a document. 447 * 448 * @param complete True if the scanner should scan the document 449 * completely, pushing all events to the registered 450 * document handler. A value of false indicates that 451 * that the scanner should only scan the next portion 452 * of the document and return. A scanner instance is 453 * permitted to completely scan a document if it does 454 * not support this "pull" scanning model. 455 * 456 * @return True if there is more to scan, false otherwise. 457 */ 458 public boolean scanDocument(boolean complete) 459 throws IOException, XNIException { 460 461 // keep dispatching "events" 462 fEntityManager.setEntityHandler(this); 463 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 464 465 int event = next(); 466 do { 467 switch (event) { 468 case XMLStreamConstants.START_DOCUMENT : 469 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 470 break; 471 case XMLStreamConstants.START_ELEMENT : 472 //System.out.println(" in scann element"); 473 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 474 break; 475 case XMLStreamConstants.CHARACTERS : 476 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 477 fDocumentHandler.characters(getCharacterData(),null); 478 break; 479 case XMLStreamConstants.SPACE: 480 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 481 //System.out.println("in the space"); 482 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 483 break; 484 case XMLStreamConstants.ENTITY_REFERENCE : 485 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 486 //entity reference callback are given in startEntity 487 break; 488 case XMLStreamConstants.PROCESSING_INSTRUCTION : 489 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 490 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 491 break; 492 case XMLStreamConstants.COMMENT : 493 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 494 fDocumentHandler.comment(getCharacterData(),null); 495 break; 496 case XMLStreamConstants.DTD : 497 //all DTD related callbacks are handled in DTDScanner. 498 //1. Stax doesn't define DTD states as it does for XML Document. 499 //therefore we don't need to take care of anything here. So Just break; 500 break; 501 case XMLStreamConstants.CDATA: 502 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 503 if (fCDataStart) { 504 fDocumentHandler.startCDATA(null); 505 fCDataStart = false; 506 fInCData = true; 507 } 508 509 fDocumentHandler.characters(getCharacterData(),null); 510 if (fCDataEnd) { 511 fDocumentHandler.endCDATA(null); 512 fCDataEnd = false; 513 } 514 break; 515 case XMLStreamConstants.NOTATION_DECLARATION : 516 break; 517 case XMLStreamConstants.ENTITY_DECLARATION : 518 break; 519 case XMLStreamConstants.NAMESPACE : 520 break; 521 case XMLStreamConstants.ATTRIBUTE : 522 break; 523 case XMLStreamConstants.END_ELEMENT : 524 //do not give callback here. 525 //this callback is given in scanEndElement function. 526 //fDocumentHandler.endElement(getElementQName(),null); 527 break; 528 default : 529 // Errors should have already been handled by the Scanner 530 return false; 531 532 } 533 //System.out.println("here in before calling next"); 534 event = next(); 535 //System.out.println("here in after calling next"); 536 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 537 538 if(event == XMLStreamConstants.END_DOCUMENT) { 539 fDocumentHandler.endDocument(null); 540 return false; 541 } 542 543 return true; 544 545 } // scanDocument(boolean):boolean 546 547 548 549 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 550 if(fScannerLastState == XMLEvent.END_ELEMENT){ 551 fElementQName.setValues(fElementStack.getLastPoppedElement()); 552 } 553 return fElementQName ; 554 } 555 556 /** return the next state on the input 557 * @return int 558 */ 559 560 public int next() throws IOException, XNIException { 561 return fDriver.next(); 562 } 563 564 // 565 // XMLComponent methods 566 // 567 568 /** 569 * Resets the component. The component can query the component manager 570 * about any features and properties that affect the operation of the 571 * component. 572 * 573 * @param componentManager The component manager. 574 * 575 * @throws SAXException Thrown by component on initialization error. 576 * For example, if a feature or property is 577 * required for the operation of the component, the 578 * component manager may throw a 579 * SAXNotRecognizedException or a 580 * SAXNotSupportedException. 581 */ 582 583 public void reset(XMLComponentManager componentManager) 584 throws XMLConfigurationException { 585 586 super.reset(componentManager); 587 588 // other settings 589 // fDocumentSystemId = null; 590 591 // sax features 592 //fAttributes.setNamespaces(fNamespaces); 593 594 // xerces features 595 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 596 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 597 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 598 599 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 600 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 601 (ExternalSubsetResolver) resolver : null; 602 603 //attribute 604 fReadingAttributes = false; 605 //xxx: external entities are supported in Xerces 606 // it would be good to define feature for this case 607 fSupportExternalEntities = true; 608 fReplaceEntityReferences = true; 609 fIsCoalesce = false; 610 611 // setup Driver 612 setScannerState(SCANNER_STATE_CONTENT); 613 setDriver(fContentDriver); 614 615 // JAXP 1.5 features and properties 616 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 617 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 618 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 619 620 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 621 fChunkSize = JdkXmlUtils.getValue(componentManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 622 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 623 624 resetCommon(); 625 //fEntityManager.test(); 626 } // reset(XMLComponentManager) 627 628 629 public void reset(PropertyManager propertyManager){ 630 631 super.reset(propertyManager); 632 633 // other settings 634 // fDocumentSystemId = null; 635 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)); 636 fNotifyBuiltInRefs = false ; 637 638 //fElementStack2.clear(); 639 //fReplaceEntityReferences = true; 640 //fSupportExternalEntities = true; 641 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 642 fReplaceEntityReferences = bo; 643 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 644 fSupportExternalEntities = bo; 645 Boolean cdata = (Boolean)propertyManager.getProperty( 646 Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 647 if(cdata != null) 648 fReportCdataEvent = cdata ; 649 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 650 if(coalesce != null) 651 fIsCoalesce = coalesce; 652 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 653 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 654 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 655 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 656 // setup Driver 657 //we dont need to do this -- nb. 658 //setScannerState(SCANNER_STATE_CONTENT); 659 //setDriver(fContentDriver); 660 //fEntityManager.test(); 661 662 // JAXP 1.5 features and properties 663 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 664 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 665 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 666 667 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 668 fChunkSize = JdkXmlUtils.getValue(propertyManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 669 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 670 resetCommon(); 671 } // reset(XMLComponentManager) 672 673 void resetCommon() { 674 // initialize vars 675 fMarkupDepth = 0; 676 fCurrentElement = null; 677 fElementStack.clear(); 678 fHasExternalDTD = false; 679 fStandaloneSet = false; 680 fStandalone = false; 681 fInScanContent = false; 682 //skipping algorithm 683 fShouldSkip = false; 684 fAdd = false; 685 fSkip = false; 686 687 fEntityStore = fEntityManager.getEntityStore(); 688 dtdGrammarUtil = null; 689 690 if (fSecurityManager != null) { 691 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 692 fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT); 693 } else { 694 fElementAttributeLimit = 0; 695 fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue(); 696 } 697 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 698 } 699 700 /** 701 * Returns a list of feature identifiers that are recognized by 702 * this component. This method may return null if no features 703 * are recognized by this component. 704 */ 705 public String[] getRecognizedFeatures() { 706 return RECOGNIZED_FEATURES.clone(); 707 } // getRecognizedFeatures():String[] 708 709 /** 710 * Sets the state of a feature. This method is called by the component 711 * manager any time after reset when a feature changes state. 712 * <p> 713 * <strong>Note:</strong> Components should silently ignore features 714 * that do not affect the operation of the component. 715 * 716 * @param featureId The feature identifier. 717 * @param state The state of the feature. 718 * 719 * @throws SAXNotRecognizedException The component should not throw 720 * this exception. 721 * @throws SAXNotSupportedException The component should not throw 722 * this exception. 723 */ 724 public void setFeature(String featureId, boolean state) 725 throws XMLConfigurationException { 726 727 super.setFeature(featureId, state); 728 729 // Xerces properties 730 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 731 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 732 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 733 fNotifyBuiltInRefs = state; 734 } 735 } 736 737 } // setFeature(String,boolean) 738 739 /** 740 * Returns a list of property identifiers that are recognized by 741 * this component. This method may return null if no properties 742 * are recognized by this component. 743 */ 744 public String[] getRecognizedProperties() { 745 return RECOGNIZED_PROPERTIES.clone(); 746 } // getRecognizedProperties():String[] 747 748 /** 749 * Sets the value of a property. This method is called by the component 750 * manager any time after reset when a property changes value. 751 * <p> 752 * <strong>Note:</strong> Components should silently ignore properties 753 * that do not affect the operation of the component. 754 * 755 * @param propertyId The property identifier. 756 * @param value The value of the property. 757 * 758 * @throws SAXNotRecognizedException The component should not throw 759 * this exception. 760 * @throws SAXNotSupportedException The component should not throw 761 * this exception. 762 */ 763 public void setProperty(String propertyId, Object value) 764 throws XMLConfigurationException { 765 766 super.setProperty(propertyId, value); 767 768 // Xerces properties 769 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 770 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 771 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 772 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 773 fEntityManager = (XMLEntityManager)value; 774 return; 775 } 776 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 777 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 778 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 779 (ExternalSubsetResolver) value : null; 780 return; 781 } 782 } 783 784 785 // Xerces properties 786 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 787 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 788 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 789 fEntityManager = (XMLEntityManager)value; 790 } 791 return; 792 } 793 794 //JAXP 1.5 properties 795 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 796 { 797 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 798 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 799 } 800 801 } // setProperty(String,Object) 802 803 /** 804 * Returns the default state for a feature, or null if this 805 * component does not want to report a default value for this 806 * feature. 807 * 808 * @param featureId The feature identifier. 809 * 810 * @since Xerces 2.2.0 811 */ 812 public Boolean getFeatureDefault(String featureId) { 813 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 814 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 815 return FEATURE_DEFAULTS[i]; 816 } 817 } 818 return null; 819 } // getFeatureDefault(String):Boolean 820 821 /** 822 * Returns the default state for a property, or null if this 823 * component does not want to report a default value for this 824 * property. 825 * 826 * @param propertyId The property identifier. 827 * 828 * @since Xerces 2.2.0 829 */ 830 public Object getPropertyDefault(String propertyId) { 831 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 832 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 833 return PROPERTY_DEFAULTS[i]; 834 } 835 } 836 return null; 837 } // getPropertyDefault(String):Object 838 839 // 840 // XMLDocumentSource methods 841 // 842 843 /** 844 * setDocumentHandler 845 * 846 * @param documentHandler 847 */ 848 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 849 fDocumentHandler = documentHandler; 850 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 851 } // setDocumentHandler(XMLDocumentHandler) 852 853 854 /** Returns the document handler */ 855 public XMLDocumentHandler getDocumentHandler(){ 856 return fDocumentHandler; 857 } 858 859 // 860 // XMLEntityHandler methods 861 // 862 863 /** 864 * This method notifies of the start of an entity. The DTD has the 865 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 866 * general entities are just specified by their name. 867 * 868 * @param name The name of the entity. 869 * @param identifier The resource identifier. 870 * @param encoding The auto-detected IANA encoding name of the entity 871 * stream. This value will be null in those situations 872 * where the entity encoding is not auto-detected (e.g. 873 * internal entities or a document entity that is 874 * parsed from a java.io.Reader). 875 * @param augs Additional information that may include infoset augmentations 876 * 877 * @throws XNIException Thrown by handler to signal an error. 878 */ 879 public void startEntity(String name, 880 XMLResourceIdentifier identifier, 881 String encoding, Augmentations augs) throws XNIException { 882 883 // keep track of this entity before fEntityDepth is increased 884 if (fEntityDepth == fEntityStack.length) { 885 int[] entityarray = new int[fEntityStack.length * 2]; 886 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 887 fEntityStack = entityarray; 888 } 889 fEntityStack[fEntityDepth] = fMarkupDepth; 890 891 super.startEntity(name, identifier, encoding, augs); 892 893 // WFC: entity declared in external subset in standalone doc 894 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 895 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 896 new Object[]{name}); 897 } 898 899 /** we are not calling the handlers yet.. */ 900 // call handler 901 if (fDocumentHandler != null && !fScanningAttribute) { 902 if (!name.equals("[xml]")) { 903 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 904 } 905 } 906 907 } // startEntity(String,XMLResourceIdentifier,String) 908 909 /** 910 * This method notifies the end of an entity. The DTD has the pseudo-name 911 * of "[dtd]" parameter entity names start with '%'; and general entities 912 * are just specified by their name. 913 * 914 * @param name The name of the entity. 915 * @param augs Additional information that may include infoset augmentations 916 * 917 * @throws XNIException Thrown by handler to signal an error. 918 */ 919 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 920 921 /** 922 * // flush possible pending output buffer - see scanContent 923 * if (fInScanContent && fStringBuffer.length != 0 924 * && fDocumentHandler != null) { 925 * fDocumentHandler.characters(fStringBuffer, null); 926 * fStringBuffer.length = 0; // make sure we know it's been flushed 927 * } 928 */ 929 super.endEntity(name, augs); 930 931 // make sure markup is properly balanced 932 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 933 reportFatalError("MarkupEntityMismatch", null); 934 } 935 936 /**/ 937 // call handler 938 if (fDocumentHandler != null && !fScanningAttribute) { 939 if (!name.equals("[xml]")) { 940 fDocumentHandler.endGeneralEntity(name, augs); 941 } 942 } 943 944 945 } // endEntity(String) 946 947 // 948 // Protected methods 949 // 950 951 // Driver factory methods 952 953 /** Creates a content Driver. */ 954 protected Driver createContentDriver() { 955 return new FragmentContentDriver(); 956 } // createContentDriver():Driver 957 958 // scanning methods 959 960 /** 961 * Scans an XML or text declaration. 962 * <p> 963 * <pre> 964 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 965 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 966 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 967 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 968 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 969 * | ('"' ('yes' | 'no') '"')) 970 * 971 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 972 * </pre> 973 * 974 * @param scanningTextDecl True if a text declaration is to 975 * be scanned instead of an XML 976 * declaration. 977 */ 978 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 979 throws IOException, XNIException { 980 981 // scan decl 982 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 983 fMarkupDepth--; 984 985 // pseudo-attribute values 986 String version = fStrings[0]; 987 String encoding = fStrings[1]; 988 String standalone = fStrings[2]; 989 fDeclaredEncoding = encoding; 990 // set standalone 991 fStandaloneSet = standalone != null; 992 fStandalone = fStandaloneSet && standalone.equals("yes"); 993 ///xxx see where its used.. this is not used anywhere. 994 //it may be useful for entity to store this information 995 //but this information is only related with Document Entity. 996 fEntityManager.setStandalone(fStandalone); 997 998 999 // call handler 1000 if (fDocumentHandler != null) { 1001 if (scanningTextDecl) { 1002 fDocumentHandler.textDecl(version, encoding, null); 1003 } else { 1004 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 1005 } 1006 } 1007 1008 if(version != null){ 1009 fEntityScanner.setVersion(version); 1010 fEntityScanner.setXMLVersion(version); 1011 } 1012 // set encoding on reader, only if encoding was not specified by the application explicitly 1013 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 1014 fEntityScanner.setEncoding(encoding); 1015 } 1016 1017 } // scanXMLDeclOrTextDecl(boolean) 1018 1019 public String getPITarget(){ 1020 return fPITarget ; 1021 } 1022 1023 public XMLStringBuffer getPIData(){ 1024 return fContentBuffer ; 1025 } 1026 1027 //XXX: why not this function behave as per the state of the parser? 1028 public XMLString getCharacterData(){ 1029 if(fUsebuffer){ 1030 return fContentBuffer ; 1031 }else{ 1032 return fTempString; 1033 } 1034 1035 } 1036 1037 1038 /** 1039 * Scans a processing data. This is needed to handle the situation 1040 * where a document starts with a processing instruction whose 1041 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1042 * 1043 * @param target The PI target 1044 * @param data The XMLStringBuffer to fill in with the data 1045 */ 1046 protected void scanPIData(String target, XMLStringBuffer data) 1047 throws IOException, XNIException { 1048 1049 super.scanPIData(target, data); 1050 1051 //set the PI target and values 1052 fPITarget = target ; 1053 1054 fMarkupDepth--; 1055 1056 } // scanPIData(String) 1057 1058 /** 1059 * Scans a comment. 1060 * <p> 1061 * <pre> 1062 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1063 * </pre> 1064 * <p> 1065 * <strong>Note:</strong> Called after scanning past '<!--' 1066 */ 1067 protected void scanComment() throws IOException, XNIException { 1068 fContentBuffer.clear(); 1069 scanComment(fContentBuffer); 1070 //getTextCharacters can also be called for reading comments 1071 fUsebuffer = true; 1072 fMarkupDepth--; 1073 1074 } // scanComment() 1075 1076 //xxx value returned by this function may not remain valid if another event is scanned. 1077 public String getComment(){ 1078 return fContentBuffer.toString(); 1079 } 1080 1081 void addElement(String rawname){ 1082 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1083 //storing element raw name in a linear list of array 1084 fElementArray[fElementPointer] = rawname ; 1085 //storing elemnetPointer for particular element depth 1086 1087 if(DEBUG_SKIP_ALGORITHM){ 1088 StringBuffer sb = new StringBuffer() ; 1089 sb.append(" Storing element information ") ; 1090 sb.append(" fElementPointer = " + fElementPointer) ; 1091 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1092 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1093 System.out.println(sb.toString()) ; 1094 } 1095 1096 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1097 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1098 short column = storePointerForADepth(fElementPointer); 1099 if(column > 0){ 1100 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1101 //identity comparison shouldn't take much time and we can rely on this 1102 //since its guaranteed to have same object id for same string. 1103 if(rawname == fElementArray[pointer]){ 1104 fShouldSkip = true ; 1105 fLastPointerLocation = pointer ; 1106 //reset the things and return. 1107 resetPointer((short)fElementStack.fDepth , column) ; 1108 fElementArray[fElementPointer] = null ; 1109 return ; 1110 }else{ 1111 fShouldSkip = false ; 1112 } 1113 } 1114 } 1115 fElementPointer++ ; 1116 } 1117 } 1118 1119 1120 void resetPointer(short depth, short column){ 1121 fPointerInfo[depth] [column] = (short)0; 1122 } 1123 1124 //returns column information at which pointer was stored. 1125 short storePointerForADepth(short elementPointer){ 1126 short depth = (short) fElementStack.fDepth ; 1127 1128 //Stores element pointer locations at particular depth , only 4 pointer locations 1129 //are stored at particular depth for now. 1130 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1131 1132 if(canStore(depth, i)){ 1133 fPointerInfo[depth][i] = elementPointer ; 1134 if(DEBUG_SKIP_ALGORITHM){ 1135 StringBuffer sb = new StringBuffer() ; 1136 sb.append(" Pointer information ") ; 1137 sb.append(" fElementPointer = " + fElementPointer) ; 1138 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1139 sb.append(" column = " + i ) ; 1140 System.out.println(sb.toString()) ; 1141 } 1142 return i; 1143 } 1144 //else 1145 //pointer was not stored because we reached the limit 1146 } 1147 return -1 ; 1148 } 1149 1150 boolean canStore(short depth, short column){ 1151 //colum = 0 , means first element at particular depth 1152 //column = 1, means second element at particular depth 1153 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1154 return fPointerInfo[depth][column] == 0 ? true : false ; 1155 } 1156 1157 1158 short getElementPointer(short depth, short column){ 1159 //colum = 0 , means first element at particular depth 1160 //column = 1, means second element at particular depth 1161 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1162 return fPointerInfo[depth][column] ; 1163 } 1164 1165 //this function assumes that string passed is not null and skips 1166 //the following string from the buffer this makes sure 1167 boolean skipFromTheBuffer(String rawname) throws IOException{ 1168 if(fEntityScanner.skipString(rawname)){ 1169 char c = (char)fEntityScanner.peekChar() ; 1170 //If the start element was completely skipped we should encounter either ' '(space), 1171 //or '/' (in case of empty element) or '>' 1172 if( c == ' ' || c == '/' || c == '>'){ 1173 fElementRawname = rawname ; 1174 return true ; 1175 } else{ 1176 return false; 1177 } 1178 } else 1179 return false ; 1180 } 1181 1182 boolean skipQElement(String rawname) throws IOException{ 1183 1184 final int c = fEntityScanner.getChar(rawname.length()); 1185 //if this character is still valid element name -- this means string can't match 1186 if(XMLChar.isName(c)){ 1187 return false; 1188 }else{ 1189 return fEntityScanner.skipString(rawname); 1190 } 1191 } 1192 1193 protected boolean skipElement() throws IOException { 1194 1195 if(!fShouldSkip) return false ; 1196 1197 if(fLastPointerLocation != 0){ 1198 //Look at the next element stored in the array list.. we might just get a match. 1199 String rawname = fElementArray[fLastPointerLocation + 1] ; 1200 if(rawname != null && skipFromTheBuffer(rawname)){ 1201 fLastPointerLocation++ ; 1202 if(DEBUG_SKIP_ALGORITHM){ 1203 System.out.println("Element " + fElementRawname + 1204 " was SKIPPED at pointer location = " + fLastPointerLocation); 1205 } 1206 return true ; 1207 } else{ 1208 //reset it back to zero... we haven't got the correct subset yet. 1209 fLastPointerLocation = 0 ; 1210 1211 } 1212 } 1213 //xxx: we can put some logic here as from what column it should start looking 1214 //for now we always start at 0 1215 //fallback to tolerant algorithm, it would look for differnt element stored at different 1216 //depth and get us the pointer location. 1217 return fShouldSkip && skipElement((short)0); 1218 1219 } 1220 1221 //start of the column at which it should try searching 1222 boolean skipElement(short column) throws IOException { 1223 short depth = (short)fElementStack.fDepth ; 1224 1225 if(depth > MAX_DEPTH_LIMIT){ 1226 return fShouldSkip = false ; 1227 } 1228 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1229 short pointer = getElementPointer(depth , i ) ; 1230 1231 if(pointer == 0){ 1232 return fShouldSkip = false ; 1233 } 1234 1235 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1236 if(DEBUG_SKIP_ALGORITHM){ 1237 System.out.println(); 1238 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + 1239 fElementStack.fDepth + " column = " + column ); 1240 System.out.println(); 1241 } 1242 fLastPointerLocation = pointer ; 1243 return fShouldSkip = true ; 1244 } 1245 } 1246 return fShouldSkip = false ; 1247 } 1248 1249 /** 1250 * Scans a start element. This method will handle the binding of 1251 * namespace information and notifying the handler of the start 1252 * of the element. 1253 * <p> 1254 * <pre> 1255 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1256 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1257 * </pre> 1258 * <p> 1259 * <strong>Note:</strong> This method assumes that the leading 1260 * '<' character has been consumed. 1261 * <p> 1262 * <strong>Note:</strong> This method uses the fElementQName and 1263 * fAttributes variables. The contents of these variables will be 1264 * destroyed. The caller should copy important information out of 1265 * these variables before calling this method. 1266 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1267 * 1268 * @return True if element is empty. (i.e. It matches 1269 * production [44]. 1270 */ 1271 // fElementQName will have the details of element just read.. 1272 // fAttributes will have the details of all the attributes. 1273 protected boolean scanStartElement() 1274 throws IOException, XNIException { 1275 1276 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1277 //when skipping is true and no more elements should be added 1278 if(fSkip && !fAdd){ 1279 //get the stored element -- if everything goes right this should match the 1280 //token in the buffer 1281 1282 QName name = fElementStack.getNext(); 1283 1284 if(DEBUG_SKIP_ALGORITHM){ 1285 System.out.println("Trying to skip String = " + name.rawname); 1286 } 1287 1288 //Be conservative -- if skipping fails -- stop. 1289 fSkip = fEntityScanner.skipString(name.rawname); 1290 1291 if(fSkip){ 1292 if(DEBUG_SKIP_ALGORITHM){ 1293 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1294 } 1295 fElementStack.push(); 1296 fElementQName = name; 1297 }else{ 1298 //if skipping fails reposition the stack or fallback to normal way of processing 1299 fElementStack.reposition(); 1300 if(DEBUG_SKIP_ALGORITHM){ 1301 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1302 } 1303 } 1304 } 1305 1306 //we are still at the stage of adding elements 1307 //the elements were not matched or 1308 //fSkip is not set to true 1309 if(!fSkip || fAdd){ 1310 //get the next element from the stack 1311 fElementQName = fElementStack.nextElement(); 1312 // name 1313 if (fNamespaces) { 1314 fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART); 1315 } else { 1316 String name = fEntityScanner.scanName(NameType.ELEMENTSTART); 1317 fElementQName.setValues(null, name, name, null); 1318 } 1319 1320 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1321 if(DEBUG_SKIP_ALGORITHM){ 1322 if(fAdd){ 1323 System.out.println("Elements are being ADDED -- elemet added is = " + 1324 fElementQName.rawname + " at count = " + fElementStack.fCount); 1325 } 1326 } 1327 1328 } 1329 1330 //when the elements are being added , we need to check if we are set for skipping the elements 1331 if(fAdd){ 1332 //this sets the value of fAdd variable 1333 fElementStack.matchElement(fElementQName); 1334 } 1335 1336 1337 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1338 fCurrentElement = fElementQName; 1339 1340 String rawname = fElementQName.rawname; 1341 1342 fEmptyElement = false; 1343 1344 fAttributes.removeAllAttributes(); 1345 1346 checkDepth(rawname); 1347 if(!seekCloseOfStartTag()){ 1348 fReadingAttributes = true; 1349 fAttributeCacheUsedCount =0; 1350 fStringBufferIndex =0; 1351 fAddDefaultAttr = true; 1352 do { 1353 scanAttribute(fAttributes); 1354 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1355 fAttributes.getLength() > fElementAttributeLimit){ 1356 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1357 "ElementAttributeLimit", 1358 new Object[]{rawname, fElementAttributeLimit }, 1359 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1360 } 1361 1362 } while (!seekCloseOfStartTag()); 1363 fReadingAttributes=false; 1364 } 1365 1366 if (fEmptyElement) { 1367 //decrease the markup depth.. 1368 fMarkupDepth--; 1369 1370 // check that this element was opened in the same entity 1371 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1372 reportFatalError("ElementEntityMismatch", 1373 new Object[]{fCurrentElement.rawname}); 1374 } 1375 // call handler 1376 if (fDocumentHandler != null) { 1377 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1378 } 1379 1380 //We should not be popping out the context here in endELement becaause the namespace context is still 1381 //valid when parser is at the endElement state. 1382 //if (fNamespaces) { 1383 // fNamespaceContext.popContext(); 1384 //} 1385 1386 //pop the element off the stack.. 1387 fElementStack.popElement(); 1388 1389 } else { 1390 1391 if(dtdGrammarUtil != null) 1392 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1393 if(fDocumentHandler != null){ 1394 //complete element and attributes are traversed in this function so we can send a callback 1395 //here. 1396 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1397 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1398 } 1399 } 1400 1401 1402 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + 1403 "<<< scanStartElement(): "+fEmptyElement); 1404 return fEmptyElement; 1405 1406 } // scanStartElement():boolean 1407 1408 /** 1409 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1410 * Characters are consumed. 1411 */ 1412 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1413 // spaces 1414 boolean sawSpace = fEntityScanner.skipSpaces(); 1415 1416 // end tag? 1417 final int c = fEntityScanner.peekChar(); 1418 if (c == '>') { 1419 fEntityScanner.scanChar(null); 1420 return true; 1421 } else if (c == '/') { 1422 fEntityScanner.scanChar(null); 1423 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1424 reportFatalError("ElementUnterminated", 1425 new Object[]{fElementQName.rawname}); 1426 } 1427 fEmptyElement = true; 1428 return true; 1429 } else if (!isValidNameStartChar(c) || !sawSpace) { 1430 // Second chance. Check if this character is a high 1431 // surrogate of a valid name start character. 1432 if (!isValidNameStartHighSurrogate(c) || !sawSpace) { 1433 reportFatalError("ElementUnterminated", 1434 new Object[]{fElementQName.rawname}); 1435 } 1436 } 1437 1438 return false; 1439 } 1440 1441 public boolean hasAttributes(){ 1442 return fAttributes.getLength() > 0; 1443 } 1444 1445 /** return the attribute iterator implementation */ 1446 public XMLAttributesIteratorImpl getAttributeIterator(){ 1447 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1448 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1449 fAddDefaultAttr = false; 1450 } 1451 return fAttributes; 1452 } 1453 1454 /** return if standalone is set */ 1455 public boolean standaloneSet(){ 1456 return fStandaloneSet; 1457 } 1458 /** return if the doucment is standalone */ 1459 public boolean isStandAlone(){ 1460 return fStandalone ; 1461 } 1462 /** 1463 * Scans an attribute name value pair. 1464 * <p> 1465 * <pre> 1466 * [41] Attribute ::= Name Eq AttValue 1467 * </pre> 1468 * <p> 1469 * <strong>Note:</strong> This method assumes that the next 1470 * character on the stream is the first character of the attribute 1471 * name. 1472 * <p> 1473 * <strong>Note:</strong> This method uses the fAttributeQName and 1474 * fQName variables. The contents of these variables will be 1475 * destroyed. 1476 * 1477 * @param attributes The attributes list for the scanned attribute. 1478 */ 1479 1480 protected void scanAttribute(XMLAttributes attributes) 1481 throws IOException, XNIException { 1482 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1483 1484 // name 1485 if (fNamespaces) { 1486 fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME); 1487 } else { 1488 String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME); 1489 fAttributeQName.setValues(null, name, name, null); 1490 } 1491 1492 // equals 1493 fEntityScanner.skipSpaces(); 1494 if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) { 1495 reportFatalError("EqRequiredInAttribute", 1496 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1497 } 1498 fEntityScanner.skipSpaces(); 1499 1500 int attIndex = 0 ; 1501 //REVISIT: one more case needs to be included: external PE and standalone is no 1502 boolean isVC = fHasExternalDTD && !fStandalone; 1503 //fTempString would store attribute value 1504 ///fTempString2 would store attribute non-normalized value 1505 1506 //this function doesn't use 'attIndex'. We are adding the attribute later 1507 //after we have figured out that current attribute is not namespace declaration 1508 //since scanAttributeValue doesn't use attIndex parameter therefore we 1509 //can safely add the attribute later.. 1510 XMLString tmpStr = getString(); 1511 1512 scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes, 1513 attIndex, isVC, fCurrentElement.rawname, false); 1514 1515 // content 1516 int oldLen = attributes.getLength(); 1517 //if the attribute name already exists.. new value is replaced with old value 1518 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1519 1520 // WFC: Unique Att Spec 1521 //attributes count will be same if the current attribute name already exists for this element name. 1522 //this means there are two duplicate attributes. 1523 if (oldLen == attributes.getLength()) { 1524 reportFatalError("AttributeNotUnique", 1525 new Object[]{fCurrentElement.rawname, 1526 fAttributeQName.rawname}); 1527 } 1528 1529 //tmpString contains attribute value 1530 //we are passing null as the attribute value 1531 attributes.setValue(attIndex, null, tmpStr); 1532 1533 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1534 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1535 attributes.setSpecified(attIndex, true); 1536 1537 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1538 1539 } // scanAttribute(XMLAttributes) 1540 1541 /** 1542 * Scans element content. 1543 * 1544 * @return Returns the next character on the stream. 1545 */ 1546 //CHANGED: 1547 //EARLIER: scanContent() 1548 //NOW: scanContent(XMLStringBuffer) 1549 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1550 //this function appends the data to the buffer. 1551 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1552 //set the fTempString length to 0 before passing it on to scanContent 1553 //scanContent sets the correct co-ordinates as per the content read 1554 fTempString.length = 0; 1555 int c = fEntityScanner.scanContent(fTempString); 1556 content.append(fTempString); 1557 fTempString.length = 0; 1558 if (c == '\r') { 1559 // happens when there is the character reference 1560 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1561 fEntityScanner.scanChar(null); 1562 content.append((char)c); 1563 c = -1; 1564 } else if (c == ']') { 1565 //fStringBuffer.clear(); 1566 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1567 content.append((char)fEntityScanner.scanChar(null)); 1568 // remember where we are in case we get an endEntity before we 1569 // could flush the buffer out - this happens when we're parsing an 1570 // entity which ends with a ] 1571 fInScanContent = true; 1572 // 1573 // We work on a single character basis to handle cases such as: 1574 // ']]]>' which we might otherwise miss. 1575 // 1576 if (fEntityScanner.skipChar(']', null)) { 1577 content.append(']'); 1578 while (fEntityScanner.skipChar(']', null)) { 1579 content.append(']'); 1580 } 1581 if (fEntityScanner.skipChar('>', null)) { 1582 reportFatalError("CDEndInContent", null); 1583 } 1584 } 1585 fInScanContent = false; 1586 c = -1; 1587 } 1588 if (fDocumentHandler != null && content.length > 0) { 1589 //fDocumentHandler.characters(content, null); 1590 } 1591 return c; 1592 1593 } // scanContent():int 1594 1595 1596 /** 1597 * Scans a CDATA section. 1598 * <p> 1599 * <strong>Note:</strong> This method uses the fTempString and 1600 * fStringBuffer variables. 1601 * 1602 * @param complete True if the CDATA section is to be scanned 1603 * completely. 1604 * 1605 * @return True if CDATA is completely scanned. 1606 */ 1607 //CHANGED: 1608 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1609 throws IOException, XNIException { 1610 1611 // call handler 1612 if (fDocumentHandler != null) { 1613 //fDocumentHandler.startCDATA(null); 1614 } 1615 1616 while (true) { 1617 //scanData will fill the contentBuffer 1618 if (!fEntityScanner.scanData("]]>", contentBuffer, fChunkSize)) { 1619 fInCData = false; 1620 fCDataEnd = true; 1621 fMarkupDepth--; 1622 break ; 1623 } else { 1624 int c = fEntityScanner.peekChar(); 1625 if (c != -1 && isInvalidLiteral(c)) { 1626 if (XMLChar.isHighSurrogate(c)) { 1627 //contentBuffer.clear(); 1628 //scan surrogates if any.... 1629 scanSurrogates(contentBuffer); 1630 } else { 1631 reportFatalError("InvalidCharInCDSect", 1632 new Object[]{Integer.toString(c,16)}); 1633 fEntityScanner.scanChar(null); 1634 } 1635 } else { 1636 //CData partially returned due to the size limit 1637 break; 1638 } 1639 //by this time we have also read surrogate contents if any... 1640 if (fDocumentHandler != null) { 1641 //fDocumentHandler.characters(contentBuffer, null); 1642 } 1643 } 1644 } 1645 1646 return true; 1647 1648 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1649 1650 /** 1651 * Scans an end element. 1652 * <p> 1653 * <pre> 1654 * [42] ETag ::= '</' Name S? '>' 1655 * </pre> 1656 * <p> 1657 * <strong>Note:</strong> This method uses the fElementQName variable. 1658 * The contents of this variable will be destroyed. The caller should 1659 * copy the needed information out of this variable before calling 1660 * this method. 1661 * 1662 * @return The element depth. 1663 */ 1664 protected int scanEndElement() throws IOException, XNIException { 1665 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1666 1667 // pop context 1668 QName endElementName = fElementStack.popElement(); 1669 1670 String rawname = endElementName.rawname; 1671 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1672 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1673 //In scanners most of the time is consumed on checks done for XML characters, we can 1674 // optimize on it and avoid the checks done for endElement, 1675 //we will also avoid symbol table lookup. 1676 1677 // this should work both for namespace processing true or false... 1678 1679 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1680 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1681 1682 if (!fEntityScanner.skipString(endElementName.rawname)) { 1683 reportFatalError("ETagRequired", new Object[]{rawname}); 1684 } 1685 1686 // end 1687 fEntityScanner.skipSpaces(); 1688 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1689 reportFatalError("ETagUnterminated", 1690 new Object[]{rawname}); 1691 } 1692 fMarkupDepth--; 1693 1694 //we have increased the depth for two markup "<" characters 1695 fMarkupDepth--; 1696 1697 // check that this element was opened in the same entity 1698 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1699 reportFatalError("ElementEntityMismatch", 1700 new Object[]{rawname}); 1701 } 1702 1703 //We should not be popping out the context here in endELement becaause the namespace context is still 1704 //valid when parser is at the endElement state. 1705 1706 //if (fNamespaces) { 1707 // fNamespaceContext.popContext(); 1708 //} 1709 1710 // call handler 1711 if (fDocumentHandler != null ) { 1712 //end element is scanned in this function so we can send a callback 1713 //here. 1714 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1715 1716 fDocumentHandler.endElement(endElementName, null); 1717 } 1718 if(dtdGrammarUtil != null) 1719 dtdGrammarUtil.endElement(endElementName); 1720 1721 return fMarkupDepth; 1722 1723 } // scanEndElement():int 1724 1725 /** 1726 * Scans a character reference. 1727 * <p> 1728 * <pre> 1729 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1730 * </pre> 1731 */ 1732 protected void scanCharReference() 1733 throws IOException, XNIException { 1734 1735 fStringBuffer2.clear(); 1736 int ch = scanCharReferenceValue(fStringBuffer2, null); 1737 fMarkupDepth--; 1738 if (ch != -1) { 1739 // call handler 1740 1741 if (fDocumentHandler != null) { 1742 if (fNotifyCharRefs) { 1743 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1744 } 1745 Augmentations augs = null; 1746 if (fValidation && ch <= 0x20) { 1747 if (fTempAugmentations != null) { 1748 fTempAugmentations.removeAllItems(); 1749 } 1750 else { 1751 fTempAugmentations = new AugmentationsImpl(); 1752 } 1753 augs = fTempAugmentations; 1754 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1755 } 1756 //xxx: How do we deal with this - how to return charReferenceValues 1757 //now this is being commented because this is taken care in scanDocument() 1758 //fDocumentHandler.characters(fStringBuffer2, null); 1759 if (fNotifyCharRefs) { 1760 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1761 } 1762 } 1763 } 1764 1765 } // scanCharReference() 1766 1767 1768 /** 1769 * Scans an entity reference. 1770 * 1771 * @return returns true if the new entity is started. If it was built-in entity 1772 * 'false' is returned. 1773 * @throws IOException Thrown if i/o error occurs. 1774 * @throws XNIException Thrown if handler throws exception upon 1775 * notification. 1776 */ 1777 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1778 String name = fEntityScanner.scanName(NameType.REFERENCE); 1779 if (name == null) { 1780 reportFatalError("NameRequiredInReference", null); 1781 return; 1782 } 1783 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1784 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1785 } 1786 if (fEntityStore.isUnparsedEntity(name)) { 1787 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1788 } 1789 fMarkupDepth--; 1790 fCurrentEntityName = name; 1791 1792 // handle built-in entities 1793 if (name == fAmpSymbol) { 1794 handleCharacter('&', fAmpSymbol, content); 1795 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1796 return ; 1797 } else if (name == fLtSymbol) { 1798 handleCharacter('<', fLtSymbol, content); 1799 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1800 return ; 1801 } else if (name == fGtSymbol) { 1802 handleCharacter('>', fGtSymbol, content); 1803 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1804 return ; 1805 } else if (name == fQuotSymbol) { 1806 handleCharacter('"', fQuotSymbol, content); 1807 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1808 return ; 1809 } else if (name == fAposSymbol) { 1810 handleCharacter('\'', fAposSymbol, content); 1811 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1812 return ; 1813 } 1814 1815 //1. if the entity is external and support to external entities is not required 1816 // 2. or entities should not be replaced 1817 //3. or if it is built in entity reference. 1818 boolean isEE = fEntityStore.isExternalEntity(name); 1819 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1820 fScannerState = SCANNER_STATE_REFERENCE; 1821 return ; 1822 } 1823 // start general entity 1824 if (!fEntityStore.isDeclaredEntity(name)) { 1825 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1826 if (!fSupportDTD && fReplaceEntityReferences) { 1827 reportFatalError("EntityNotDeclared", new Object[]{name}); 1828 return; 1829 } 1830 //REVISIT: one more case needs to be included: external PE and standalone is no 1831 if ( fHasExternalDTD && !fStandalone) { 1832 if (fValidation) 1833 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1834 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1835 } else 1836 reportFatalError("EntityNotDeclared", new Object[]{name}); 1837 } 1838 //we are starting the entity even if the entity was not declared 1839 //if that was the case it its taken care in XMLEntityManager.startEntity() 1840 //we immediately call the endEntity. Application gets to know if there was 1841 //any entity that was not declared. 1842 fEntityManager.startEntity(true, name, false); 1843 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1844 //setScannerState(SCANNER_STATE_CONTENT); 1845 //return true ; 1846 } // scanEntityReference() 1847 1848 // utility methods 1849 1850 /** 1851 * Check if the depth exceeds the maxElementDepth limit 1852 * @param elementName name of the current element 1853 */ 1854 void checkDepth(String elementName) { 1855 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1856 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1857 fSecurityManager.debugPrint(fLimitAnalyzer); 1858 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1859 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1860 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1861 "maxElementDepth"}); 1862 } 1863 } 1864 1865 /** 1866 * Calls document handler with a single character resulting from 1867 * built-in entity resolution. 1868 * 1869 * @param c 1870 * @param entity built-in name 1871 * @param XMLStringBuffer append the character to buffer 1872 * 1873 * we really dont need to call this function -- this function is only required when 1874 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1875 * calling this function to hanlde built-in entity reference. 1876 * 1877 */ 1878 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1879 foundBuiltInRefs = true; 1880 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1881 content.append(c); 1882 if (fDocumentHandler != null) { 1883 fSingleChar[0] = c; 1884 if (fNotifyBuiltInRefs) { 1885 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1886 } 1887 fTempString.setValues(fSingleChar, 0, 1); 1888 if(!fIsCoalesce){ 1889 fDocumentHandler.characters(fTempString, null); 1890 builtInRefCharacterHandled = true; 1891 } 1892 1893 if (fNotifyBuiltInRefs) { 1894 fDocumentHandler.endGeneralEntity(entity, null); 1895 } 1896 } 1897 } // handleCharacter(char) 1898 1899 // helper methods 1900 1901 /** 1902 * Sets the scanner state. 1903 * 1904 * @param state The new scanner state. 1905 */ 1906 protected final void setScannerState(int state) { 1907 1908 fScannerState = state; 1909 if (DEBUG_SCANNER_STATE) { 1910 System.out.print("### setScannerState: "); 1911 //System.out.print(fScannerState); 1912 System.out.print(getScannerStateName(state)); 1913 System.out.println(); 1914 } 1915 1916 } // setScannerState(int) 1917 1918 1919 /** 1920 * Sets the Driver. 1921 * 1922 * @param Driver The new Driver. 1923 */ 1924 protected final void setDriver(Driver driver) { 1925 fDriver = driver; 1926 if (DEBUG_DISPATCHER) { 1927 System.out.print("%%% setDriver: "); 1928 System.out.print(getDriverName(driver)); 1929 System.out.println(); 1930 } 1931 } 1932 1933 // 1934 // Private methods 1935 // 1936 1937 /** Returns the scanner state name. */ 1938 protected String getScannerStateName(int state) { 1939 1940 switch (state) { 1941 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1942 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1943 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1944 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1945 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1946 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1947 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1948 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1949 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1950 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1951 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1952 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1953 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 1954 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 1955 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 1956 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 1957 } 1958 1959 return "??? ("+state+')'; 1960 1961 } // getScannerStateName(int):String 1962 public String getEntityName(){ 1963 //return the cached name 1964 return fCurrentEntityName; 1965 } 1966 1967 /** Returns the driver name. */ 1968 public String getDriverName(Driver driver) { 1969 1970 if (DEBUG_DISPATCHER) { 1971 if (driver != null) { 1972 String name = driver.getClass().getName(); 1973 int index = name.lastIndexOf('.'); 1974 if (index != -1) { 1975 name = name.substring(index + 1); 1976 index = name.lastIndexOf('$'); 1977 if (index != -1) { 1978 name = name.substring(index + 1); 1979 } 1980 } 1981 return name; 1982 } 1983 } 1984 return "null"; 1985 1986 } // getDriverName():String 1987 1988 /** 1989 * Check the protocol used in the systemId against allowed protocols 1990 * 1991 * @param systemId the Id of the URI 1992 * @param allowedProtocols a list of allowed protocols separated by comma 1993 * @return the name of the protocol if rejected, null otherwise 1994 */ 1995 String checkAccess(String systemId, String allowedProtocols) throws IOException { 1996 String baseSystemId = fEntityScanner.getBaseSystemId(); 1997 String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI); 1998 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 1999 } 2000 2001 // 2002 // Classes 2003 // 2004 2005 /** 2006 * @author Neeraj Bajaj, Sun Microsystems. 2007 */ 2008 protected static final class Element { 2009 2010 // 2011 // Data 2012 // 2013 2014 /** Symbol. */ 2015 public QName qname; 2016 2017 //raw name stored as characters 2018 public char[] fRawname; 2019 2020 /** The next Element entry. */ 2021 public Element next; 2022 2023 // 2024 // Constructors 2025 // 2026 2027 /** 2028 * Constructs a new Element from the given QName and next Element 2029 * reference. 2030 */ 2031 public Element(QName qname, Element next) { 2032 this.qname.setValues(qname); 2033 this.fRawname = qname.rawname.toCharArray(); 2034 this.next = next; 2035 } 2036 2037 } // class Element 2038 2039 /** 2040 * Element stack. 2041 * 2042 * @author Neeraj Bajaj, Sun Microsystems. 2043 */ 2044 protected class ElementStack2 { 2045 2046 // 2047 // Data 2048 // 2049 2050 /** The stack data. */ 2051 protected QName [] fQName = new QName[20]; 2052 2053 //Element depth 2054 protected int fDepth; 2055 //total number of elements 2056 protected int fCount; 2057 //current position 2058 protected int fPosition; 2059 //Mark refers to the position 2060 protected int fMark; 2061 2062 protected int fLastDepth ; 2063 2064 // 2065 // Constructors 2066 // 2067 2068 /** Default constructor. */ 2069 public ElementStack2() { 2070 for (int i = 0; i < fQName.length; i++) { 2071 fQName[i] = new QName(); 2072 } 2073 fMark = fPosition = 1; 2074 } // <init>() 2075 2076 public void resize(){ 2077 /** 2078 * int length = fElements.length; 2079 * Element [] temp = new Element[length * 2]; 2080 * System.arraycopy(fElements, 0, temp, 0, length); 2081 * fElements = temp; 2082 */ 2083 //resize QNames 2084 int oldLength = fQName.length; 2085 QName [] tmp = new QName[oldLength * 2]; 2086 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2087 fQName = tmp; 2088 2089 for (int i = oldLength; i < fQName.length; i++) { 2090 fQName[i] = new QName(); 2091 } 2092 2093 } 2094 2095 2096 // 2097 // Public methods 2098 // 2099 2100 /** Check if the element scanned during the start element 2101 *matches the stored element. 2102 * 2103 *@return true if the match suceeds. 2104 */ 2105 public boolean matchElement(QName element) { 2106 //last depth is the depth when last elemnt was pushed 2107 //if last depth is greater than current depth 2108 if(DEBUG_SKIP_ALGORITHM){ 2109 System.out.println("fLastDepth = " + fLastDepth); 2110 System.out.println("fDepth = " + fDepth); 2111 } 2112 boolean match = false; 2113 if(fLastDepth > fDepth && fDepth <= 2){ 2114 if(DEBUG_SKIP_ALGORITHM){ 2115 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2116 } 2117 if(element.rawname == fQName[fDepth].rawname){ 2118 fAdd = false; 2119 //mark this position 2120 //decrease the depth by 1 as arrays are 0 based 2121 fMark = fDepth - 1; 2122 //we found the match and from next element skipping will start, add 1 2123 fPosition = fMark + 1 ; 2124 match = true; 2125 //Once we get match decrease the count -- this was increased by nextElement() 2126 --fCount; 2127 if(DEBUG_SKIP_ALGORITHM){ 2128 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2129 System.out.println("fMark = " + fMark); 2130 System.out.println("fPosition = " + fPosition); 2131 System.out.println("fDepth = " + fDepth); 2132 System.out.println("fCount = " + fCount); 2133 } 2134 }else{ 2135 fAdd = true; 2136 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2137 } 2138 } 2139 //store the last depth 2140 fLastDepth = fDepth++; 2141 return match; 2142 } // pushElement(QName):QName 2143 2144 /** 2145 * This function doesn't increase depth. The function in this function is 2146 *broken down into two functions for efficiency. <@see>matchElement</see>. 2147 * This function just returns the pointer to the object and its values are set. 2148 * 2149 *@return QName reference to the next element in the list 2150 */ 2151 public QName nextElement() { 2152 2153 //if number of elements becomes equal to the length of array -- stop the skipping 2154 if (fCount == fQName.length) { 2155 fShouldSkip = false; 2156 fAdd = false; 2157 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2158 //xxx: this is not correct, we are returning the last element 2159 //this wont make any difference since flag has been set to 'false' 2160 return fQName[--fCount]; 2161 } 2162 if(DEBUG_SKIP_ALGORITHM){ 2163 System.out.println("fCount = " + fCount); 2164 } 2165 return fQName[fCount++]; 2166 2167 } 2168 2169 /** Note that this function is considerably different than nextElement() 2170 * This function just returns the previously stored elements 2171 */ 2172 public QName getNext(){ 2173 //when position reaches number of elements in the list.. 2174 //set the position back to mark, making it a circular linked list. 2175 if(fPosition == fCount){ 2176 fPosition = fMark; 2177 } 2178 return fQName[fPosition++]; 2179 } 2180 2181 /** returns the current depth 2182 */ 2183 public int popElement(){ 2184 return fDepth--; 2185 } 2186 2187 2188 /** Clears the stack without throwing away existing QName objects. */ 2189 public void clear() { 2190 fLastDepth = 0; 2191 fDepth = 0; 2192 fCount = 0 ; 2193 fPosition = fMark = 1; 2194 } // clear() 2195 2196 } // class ElementStack 2197 2198 /** 2199 * Element stack. This stack operates without synchronization, error 2200 * checking, and it re-uses objects instead of throwing popped items 2201 * away. 2202 * 2203 * @author Andy Clark, IBM 2204 */ 2205 protected class ElementStack { 2206 2207 // 2208 // Data 2209 // 2210 2211 /** The stack data. */ 2212 protected QName[] fElements; 2213 protected int [] fInt = new int[20]; 2214 2215 2216 //Element depth 2217 protected int fDepth; 2218 //total number of elements 2219 protected int fCount; 2220 //current position 2221 protected int fPosition; 2222 //Mark refers to the position 2223 protected int fMark; 2224 2225 protected int fLastDepth ; 2226 2227 // 2228 // Constructors 2229 // 2230 2231 /** Default constructor. */ 2232 public ElementStack() { 2233 fElements = new QName[20]; 2234 for (int i = 0; i < fElements.length; i++) { 2235 fElements[i] = new QName(); 2236 } 2237 } // <init>() 2238 2239 // 2240 // Public methods 2241 // 2242 2243 /** 2244 * Pushes an element on the stack. 2245 * <p> 2246 * <strong>Note:</strong> The QName values are copied into the 2247 * stack. In other words, the caller does <em>not</em> orphan 2248 * the element to the stack. Also, the QName object returned 2249 * is <em>not</em> orphaned to the caller. It should be 2250 * considered read-only. 2251 * 2252 * @param element The element to push onto the stack. 2253 * 2254 * @return Returns the actual QName object that stores the 2255 */ 2256 //XXX: THIS FUNCTION IS NOT USED 2257 public QName pushElement(QName element) { 2258 if (fDepth == fElements.length) { 2259 QName[] array = new QName[fElements.length * 2]; 2260 System.arraycopy(fElements, 0, array, 0, fDepth); 2261 fElements = array; 2262 for (int i = fDepth; i < fElements.length; i++) { 2263 fElements[i] = new QName(); 2264 } 2265 } 2266 fElements[fDepth].setValues(element); 2267 return fElements[fDepth++]; 2268 } // pushElement(QName):QName 2269 2270 2271 /** Note that this function is considerably different than nextElement() 2272 * This function just returns the previously stored elements 2273 */ 2274 public QName getNext(){ 2275 //when position reaches number of elements in the list.. 2276 //set the position back to mark, making it a circular linked list. 2277 if(fPosition == fCount){ 2278 fPosition = fMark; 2279 } 2280 //store the position of last opened tag at particular depth 2281 //fInt[++fDepth] = fPosition; 2282 if(DEBUG_SKIP_ALGORITHM){ 2283 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2284 } 2285 //return fElements[fPosition++]; 2286 return fElements[fPosition]; 2287 } 2288 2289 /** This function should be called only when element was skipped sucessfully. 2290 * 1. Increase the depth - because element was sucessfully skipped. 2291 *2. Store the position of the element token in array "last opened tag" at depth. 2292 *3. increase the position counter so as to point to the next element in the array 2293 */ 2294 public void push(){ 2295 2296 fInt[++fDepth] = fPosition++; 2297 } 2298 2299 /** Check if the element scanned during the start element 2300 *matches the stored element. 2301 * 2302 *@return true if the match suceeds. 2303 */ 2304 public boolean matchElement(QName element) { 2305 //last depth is the depth when last elemnt was pushed 2306 //if last depth is greater than current depth 2307 //if(DEBUG_SKIP_ALGORITHM){ 2308 // System.out.println("Check if the element " + element.rawname + " matches"); 2309 // System.out.println("fLastDepth = " + fLastDepth); 2310 // System.out.println("fDepth = " + fDepth); 2311 //} 2312 boolean match = false; 2313 if(fLastDepth > fDepth && fDepth <= 3){ 2314 if(DEBUG_SKIP_ALGORITHM){ 2315 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2316 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2317 } 2318 if(element.rawname == fElements[fDepth - 1].rawname){ 2319 fAdd = false; 2320 //mark this position 2321 //decrease the depth by 1 as arrays are 0 based 2322 fMark = fDepth - 1; 2323 //we found the match 2324 fPosition = fMark; 2325 match = true; 2326 //Once we get match decrease the count -- this was increased by nextElement() 2327 --fCount; 2328 if(DEBUG_SKIP_ALGORITHM){ 2329 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2330 System.out.println("fMark = " + fMark); 2331 System.out.println("fPosition = " + fPosition); 2332 System.out.println("fDepth = " + fDepth); 2333 System.out.println("fCount = " + fCount); 2334 System.out.println("---------MATCH SUCEEDED-----------------"); 2335 System.out.println(""); 2336 } 2337 }else{ 2338 fAdd = true; 2339 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2340 } 2341 } 2342 //store the position for the current depth 2343 //when we are adding the elements, when skipping 2344 //starts even then this should be tracked ie. when 2345 //calling getNext() 2346 if(match){ 2347 //from next element skipping will start, add 1 2348 fInt[fDepth] = fPosition++; 2349 } else{ 2350 if(DEBUG_SKIP_ALGORITHM){ 2351 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2352 } 2353 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2354 fInt[fDepth] = fCount - 1; 2355 } 2356 2357 //if number of elements becomes equal to the length of array -- stop the skipping 2358 //xxx: should we do "fCount == fInt.length" 2359 if (fCount == fElements.length) { 2360 fSkip = false; 2361 fAdd = false; 2362 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2363 reposition(); 2364 if(DEBUG_SKIP_ALGORITHM){ 2365 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2366 System.out.println("REPOSITIONING THE STACK"); 2367 System.out.println("-----------SKIPPING STOPPED----------"); 2368 System.out.println(""); 2369 } 2370 return false; 2371 } 2372 if(DEBUG_SKIP_ALGORITHM){ 2373 if(match){ 2374 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2375 }else{ 2376 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2377 } 2378 } 2379 //store the last depth 2380 fLastDepth = fDepth; 2381 return match; 2382 } // matchElement(QName):QName 2383 2384 2385 /** 2386 * Returns the next element on the stack. 2387 * 2388 * @return Returns the actual QName object. Callee should 2389 * use this object to store the details of next element encountered. 2390 */ 2391 public QName nextElement() { 2392 if(fSkip){ 2393 fDepth++; 2394 //boundary checks are done in matchElement() 2395 return fElements[fCount++]; 2396 } else if (fDepth == fElements.length) { 2397 QName[] array = new QName[fElements.length * 2]; 2398 System.arraycopy(fElements, 0, array, 0, fDepth); 2399 fElements = array; 2400 for (int i = fDepth; i < fElements.length; i++) { 2401 fElements[i] = new QName(); 2402 } 2403 } 2404 2405 return fElements[fDepth++]; 2406 2407 } // pushElement(QName):QName 2408 2409 2410 /** 2411 * Pops an element off of the stack by setting the values of 2412 * the specified QName. 2413 * <p> 2414 * <strong>Note:</strong> The object returned is <em>not</em> 2415 * orphaned to the caller. Therefore, the caller should consider 2416 * the object to be read-only. 2417 */ 2418 public QName popElement() { 2419 //return the same object that was pushed -- this would avoid 2420 //setting the values for every end element. 2421 //STRONG: this object is read only -- this object reference shouldn't be stored. 2422 if(fSkip || fAdd ){ 2423 if(DEBUG_SKIP_ALGORITHM){ 2424 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2425 System.out.println(""); 2426 } 2427 return fElements[fInt[fDepth--]]; 2428 } else{ 2429 if(DEBUG_SKIP_ALGORITHM){ 2430 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2431 } 2432 return fElements[--fDepth] ; 2433 } 2434 //element.setValues(fElements[--fDepth]); 2435 } // popElement(QName) 2436 2437 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2438 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2439 *as per the depth. 2440 */ 2441 public void reposition(){ 2442 for( int i = 2 ; i <= fDepth ; i++){ 2443 fElements[i-1] = fElements[fInt[i]]; 2444 } 2445 if(DEBUG_SKIP_ALGORITHM){ 2446 for( int i = 0 ; i < fDepth ; i++){ 2447 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2448 } 2449 } 2450 } 2451 2452 /** Clears the stack without throwing away existing QName objects. */ 2453 public void clear() { 2454 fDepth = 0; 2455 fLastDepth = 0; 2456 fCount = 0 ; 2457 fPosition = fMark = 1; 2458 2459 } // clear() 2460 2461 /** 2462 * This function is as a result of optimization done for endElement -- 2463 * we dont need to set the value for every end element encouterd. 2464 * For Well formedness checks we can have the same QName object that was pushed. 2465 * the values will be set only if application need to know about the endElement 2466 */ 2467 2468 public QName getLastPoppedElement(){ 2469 return fElements[fDepth]; 2470 } 2471 } // class ElementStack 2472 2473 /** 2474 * Drives the parser to the next state/event on the input. Parser is guaranteed 2475 * to stop at the next state/event. 2476 * 2477 * Internally XML document is divided into several states. Each state represents 2478 * a sections of XML document. When this functions returns normally, it has read 2479 * the section of XML document and returns the state corresponding to section of 2480 * document which has been read. For optimizations, a particular driver 2481 * can read ahead of the section of document (state returned) just read and 2482 * can maintain a different internal state. 2483 * 2484 * 2485 * @author Neeraj Bajaj, Sun Microsystems 2486 */ 2487 protected interface Driver { 2488 2489 2490 /** 2491 * Drives the parser to the next state/event on the input. Parser is guaranteed 2492 * to stop at the next state/event. 2493 * 2494 * Internally XML document is divided into several states. Each state represents 2495 * a sections of XML document. When this functions returns normally, it has read 2496 * the section of XML document and returns the state corresponding to section of 2497 * document which has been read. For optimizations, a particular driver 2498 * can read ahead of the section of document (state returned) just read and 2499 * can maintain a different internal state. 2500 * 2501 * @return state representing the section of document just read. 2502 * 2503 * @throws IOException Thrown on i/o error. 2504 * @throws XNIException Thrown on parse error. 2505 */ 2506 2507 public int next() throws IOException, XNIException; 2508 2509 } // interface Driver 2510 2511 /** 2512 * Driver to handle content scanning. This driver is capable of reading 2513 * the fragment of XML document. When it has finished reading fragment 2514 * of XML documents, it can pass the job of reading to another driver. 2515 * 2516 * This class has been modified as per the new design which is more suited to 2517 * efficiently build pull parser. Lot of performance improvements have been done and 2518 * the code has been added to support stax functionality/features. 2519 * 2520 * @author Neeraj Bajaj, Sun Microsystems 2521 * 2522 * 2523 * @author Andy Clark, IBM 2524 * @author Eric Ye, IBM 2525 */ 2526 protected class FragmentContentDriver 2527 implements Driver { 2528 2529 // 2530 // Driver methods 2531 // 2532 2533 /** 2534 * decides the appropriate state of the parser 2535 */ 2536 private void startOfMarkup() throws IOException { 2537 fMarkupDepth++; 2538 final int ch = fEntityScanner.peekChar(); 2539 if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { 2540 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2541 } else { 2542 switch(ch){ 2543 case '?' :{ 2544 setScannerState(SCANNER_STATE_PI); 2545 fEntityScanner.skipChar(ch, null); 2546 break; 2547 } 2548 case '!' :{ 2549 fEntityScanner.skipChar(ch, null); 2550 if (fEntityScanner.skipChar('-', null)) { 2551 if (!fEntityScanner.skipChar('-', NameType.COMMENT)) { 2552 reportFatalError("InvalidCommentStart", 2553 null); 2554 } 2555 setScannerState(SCANNER_STATE_COMMENT); 2556 } else if (fEntityScanner.skipString(CDATA)) { 2557 fCDataStart = true; 2558 setScannerState(SCANNER_STATE_CDATA ); 2559 } else if (!scanForDoctypeHook()) { 2560 reportFatalError("MarkupNotRecognizedInContent", 2561 null); 2562 } 2563 break; 2564 } 2565 case '/' :{ 2566 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2567 fEntityScanner.skipChar(ch, NameType.ELEMENTEND); 2568 break; 2569 } 2570 default :{ 2571 reportFatalError("MarkupNotRecognizedInContent", null); 2572 } 2573 } 2574 } 2575 2576 }//startOfMarkup 2577 2578 private void startOfContent() throws IOException { 2579 if (fEntityScanner.skipChar('<', null)) { 2580 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2581 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 2582 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2583 } else { 2584 //element content is there.. 2585 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2586 } 2587 }//startOfContent 2588 2589 2590 /** 2591 * 2592 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2593 * At any point of time when in doubt over the current state of the parser, the state should be 2594 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2595 * the parser to one of its sub state. 2596 * sub states are defined in the parser on the basis of different XML component like 2597 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2598 * These sub states help the parser to have fine control over the parsing. These are the 2599 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2600 * decided if paresr needs to stop at next milepost ?? 2601 * 2602 */ 2603 public void decideSubState() throws IOException { 2604 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2605 2606 switch (fScannerState) { 2607 2608 case SCANNER_STATE_CONTENT: { 2609 startOfContent() ; 2610 break; 2611 } 2612 2613 case SCANNER_STATE_START_OF_MARKUP: { 2614 startOfMarkup() ; 2615 break; 2616 } 2617 } 2618 } 2619 }//decideSubState 2620 2621 /** 2622 * Drives the parser to the next state/event on the input. Parser is guaranteed 2623 * to stop at the next state/event. Internally XML document 2624 * is divided into several states. Each state represents a sections of XML 2625 * document. When this functions returns normally, it has read the section 2626 * of XML document and returns the state corresponding to section of 2627 * document which has been read. For optimizations, a particular driver 2628 * can read ahead of the section of document (state returned) just read and 2629 * can maintain a different internal state. 2630 * 2631 * State returned corresponds to Stax states. 2632 * 2633 * @return state representing the section of document just read. 2634 * 2635 * @throws IOException Thrown on i/o error. 2636 * @throws XNIException Thrown on parse error. 2637 */ 2638 2639 public int next() throws IOException, XNIException { 2640 while (true) { 2641 try { 2642 2643 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2644 //decideSubState. 2645 2646 if (fScannerState == SCANNER_STATE_CONTENT) { 2647 final int ch = fEntityScanner.peekChar(); 2648 if (ch == '<') { 2649 fEntityScanner.scanChar(null); 2650 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2651 } else if (ch == '&') { 2652 fEntityScanner.scanChar(NameType.REFERENCE); 2653 setScannerState(SCANNER_STATE_REFERENCE) ; 2654 } else { 2655 //element content is there.. 2656 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2657 } 2658 } 2659 2660 if (fScannerState == SCANNER_STATE_START_OF_MARKUP) { 2661 startOfMarkup(); 2662 } 2663 2664 //decideSubState() ; 2665 2666 //do some special handling if isCoalesce is set to true. 2667 if (fIsCoalesce) { 2668 fUsebuffer = true ; 2669 //if the last section was character data 2670 if (fLastSectionWasCharacterData) { 2671 2672 //if we dont encounter any CDATA or ENTITY REFERENCE and 2673 //current state is also not SCANNER_STATE_CHARACTER_DATA 2674 //return the last scanned charactrer data. 2675 if ((fScannerState != SCANNER_STATE_CDATA) 2676 && (fScannerState != SCANNER_STATE_REFERENCE) 2677 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)) { 2678 fLastSectionWasCharacterData = false; 2679 return XMLEvent.CHARACTERS; 2680 } 2681 }//if last section was CDATA or ENTITY REFERENCE 2682 //xxx: there might be another entity reference or CDATA after this 2683 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2684 else if ((fLastSectionWasCData || fLastSectionWasEntityReference)) { 2685 //and current state is not SCANNER_STATE_CHARACTER_DATA 2686 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2687 //this means there is nothing more to be coalesced. 2688 //return the CHARACTERS event. 2689 if ((fScannerState != SCANNER_STATE_CDATA) 2690 && (fScannerState != SCANNER_STATE_REFERENCE) 2691 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2692 2693 fLastSectionWasCData = false; 2694 fLastSectionWasEntityReference = false; 2695 return XMLEvent.CHARACTERS; 2696 } 2697 } 2698 } 2699 2700 switch(fScannerState){ 2701 2702 case XMLEvent.START_DOCUMENT : 2703 return XMLEvent.START_DOCUMENT; 2704 2705 case SCANNER_STATE_START_ELEMENT_TAG :{ 2706 2707 //returns true if the element is empty 2708 fEmptyElement = scanStartElement() ; 2709 //if the element is empty the next event is "end element" 2710 if(fEmptyElement){ 2711 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2712 }else{ 2713 //set the next possible state 2714 setScannerState(SCANNER_STATE_CONTENT); 2715 } 2716 return XMLEvent.START_ELEMENT ; 2717 } 2718 2719 case SCANNER_STATE_CHARACTER_DATA: { 2720 2721 //if last section was either entity reference or cdata or 2722 //character data we should be using buffer 2723 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData 2724 || fLastSectionWasCharacterData ; 2725 2726 //When coalesce is set to true and last state was REFERENCE or 2727 //CDATA or CHARACTER_DATA, buffer should not be cleared. 2728 if( fIsCoalesce && (fLastSectionWasEntityReference || 2729 fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2730 fLastSectionWasEntityReference = false; 2731 fLastSectionWasCData = false; 2732 fLastSectionWasCharacterData = true ; 2733 fUsebuffer = true; 2734 }else{ 2735 //clear the buffer 2736 fContentBuffer.clear(); 2737 } 2738 2739 //set the fTempString length to 0 before passing it on to scanContent 2740 //scanContent sets the correct co-ordinates as per the content read 2741 fTempString.length = 0; 2742 int c = fEntityScanner.scanContent(fTempString); 2743 2744 if(fEntityScanner.skipChar('<', null)){ 2745 //check if we have reached end of element 2746 if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){ 2747 //increase the mark up depth 2748 fMarkupDepth++; 2749 fLastSectionWasCharacterData = false; 2750 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2751 //check if its start of new element 2752 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2753 fMarkupDepth++; 2754 fLastSectionWasCharacterData = false; 2755 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2756 }else{ 2757 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2758 //there can be cdata ahead if coalesce is true we should call again 2759 if(fIsCoalesce){ 2760 fLastSectionWasCharacterData = true; 2761 bufferContent(); 2762 continue; 2763 } 2764 } 2765 //in case last section was either entity reference or 2766 //cdata or character data -- we should be using buffer 2767 if(fUsebuffer){ 2768 bufferContent(); 2769 } 2770 2771 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2772 if(DEBUG)System.out.println("Return SPACE EVENT"); 2773 return XMLEvent.SPACE; 2774 }else 2775 return XMLEvent.CHARACTERS; 2776 2777 } else{ 2778 bufferContent(); 2779 } 2780 if (c == '\r') { 2781 if(DEBUG){ 2782 System.out.println("'\r' character found"); 2783 } 2784 // happens when there is the character reference 2785 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2786 fEntityScanner.scanChar(null); 2787 fUsebuffer = true; 2788 fContentBuffer.append((char)c); 2789 c = -1 ; 2790 } else if (c == ']') { 2791 //fStringBuffer.clear(); 2792 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2793 fUsebuffer = true; 2794 fContentBuffer.append((char)fEntityScanner.scanChar(null)); 2795 // remember where we are in case we get an endEntity before we 2796 // could flush the buffer out - this happens when we're parsing an 2797 // entity which ends with a ] 2798 fInScanContent = true; 2799 2800 // We work on a single character basis to handle cases such as: 2801 // ']]]>' which we might otherwise miss. 2802 // 2803 if (fEntityScanner.skipChar(']', null)) { 2804 fContentBuffer.append(']'); 2805 while (fEntityScanner.skipChar(']', null)) { 2806 fContentBuffer.append(']'); 2807 } 2808 if (fEntityScanner.skipChar('>', null)) { 2809 reportFatalError("CDEndInContent", null); 2810 } 2811 } 2812 c = -1 ; 2813 fInScanContent = false; 2814 } 2815 2816 do{ 2817 //xxx: we should be using only one buffer.. 2818 // we need not to grow the buffer only when isCoalesce() is not true; 2819 2820 if (c == '<') { 2821 fEntityScanner.scanChar(null); 2822 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2823 break; 2824 }//xxx what should be the behavior if entity reference is present in the content ? 2825 else if (c == '&') { 2826 fEntityScanner.scanChar(NameType.REFERENCE); 2827 setScannerState(SCANNER_STATE_REFERENCE); 2828 break; 2829 }///xxx since this part is also characters, it should be merged... 2830 else if (c != -1 && isInvalidLiteral(c)) { 2831 if (XMLChar.isHighSurrogate(c)) { 2832 // special case: surrogates 2833 scanSurrogates(fContentBuffer) ; 2834 setScannerState(SCANNER_STATE_CONTENT); 2835 } else { 2836 reportFatalError("InvalidCharInContent", 2837 new Object[] { 2838 Integer.toString(c, 16)}); 2839 fEntityScanner.scanChar(null); 2840 } 2841 break; 2842 } 2843 //xxx: scanContent also gives character callback. 2844 c = scanContent(fContentBuffer) ; 2845 //we should not be iterating again if fIsCoalesce is not set to true 2846 2847 if(!fIsCoalesce){ 2848 setScannerState(SCANNER_STATE_CONTENT); 2849 break; 2850 } 2851 2852 }while(true); 2853 2854 //if (fDocumentHandler != null) { 2855 // fDocumentHandler.characters(fContentBuffer, null); 2856 //} 2857 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2858 //if fIsCoalesce is true there might be more data so call fDriver.next() 2859 if(fIsCoalesce){ 2860 fLastSectionWasCharacterData = true ; 2861 continue; 2862 }else{ 2863 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2864 if(DEBUG)System.out.println("Return SPACE EVENT"); 2865 return XMLEvent.SPACE; 2866 } else 2867 return XMLEvent.CHARACTERS ; 2868 } 2869 } 2870 2871 case SCANNER_STATE_END_ELEMENT_TAG :{ 2872 if(fEmptyElement){ 2873 //set it back to false. 2874 fEmptyElement = false; 2875 setScannerState(SCANNER_STATE_CONTENT); 2876 //check the case when there is comment after single element document 2877 //<foo/> and some comment after this 2878 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? 2879 XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2880 2881 } else if(scanEndElement() == 0) { 2882 //It is last element of the document 2883 if (elementDepthIsZeroHook()) { 2884 //if element depth is zero , it indicates the end of the document 2885 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2886 //xxx understand this point once again.. 2887 return XMLEvent.END_ELEMENT ; 2888 } 2889 2890 } 2891 setScannerState(SCANNER_STATE_CONTENT); 2892 return XMLEvent.END_ELEMENT ; 2893 } 2894 2895 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2896 scanComment(); 2897 setScannerState(SCANNER_STATE_CONTENT); 2898 return XMLEvent.COMMENT; 2899 //break; 2900 } 2901 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2902 //clear the buffer first 2903 fContentBuffer.clear() ; 2904 //xxx: which buffer should be passed. Ideally we shouldn't have 2905 //more than two buffers -- 2906 //xxx: where should we add the switch for buffering. 2907 scanPI(fContentBuffer); 2908 setScannerState(SCANNER_STATE_CONTENT); 2909 return XMLEvent.PROCESSING_INSTRUCTION; 2910 //break; 2911 } 2912 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2913 //xxx: What if CDATA is the first event 2914 //<foo><![CDATA[hello<><>]]>append</foo> 2915 2916 //we should not clear the buffer only when the last state was 2917 //either SCANNER_STATE_REFERENCE or 2918 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2919 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2920 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2921 fLastSectionWasCData = true ; 2922 fLastSectionWasEntityReference = false; 2923 fLastSectionWasCharacterData = false; 2924 }//if we dont need to coalesce clear the buffer 2925 else{ 2926 fContentBuffer.clear(); 2927 } 2928 fUsebuffer = true; 2929 //CDATA section is read up to the chunk size limit 2930 scanCDATASection(fContentBuffer , true); 2931 setScannerState(SCANNER_STATE_CONTENT); 2932 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 2933 //and just call fDispatche.next(). Since we have set the scanner state to 2934 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 2935 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 2936 //2. Check if application has set for reporting CDATA event 2937 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 2938 //return the cdata event as characters. 2939 if (fIsCoalesce) { 2940 fLastSectionWasCData = true ; 2941 //there might be more data to coalesce. 2942 continue; 2943 } else if(fReportCdataEvent) { 2944 if (!fCDataEnd) { 2945 setScannerState(SCANNER_STATE_CDATA); 2946 } 2947 return XMLEvent.CDATA; 2948 } else { 2949 return XMLEvent.CHARACTERS; 2950 } 2951 } 2952 2953 case SCANNER_STATE_REFERENCE :{ 2954 fMarkupDepth++; 2955 foundBuiltInRefs = false; 2956 2957 //we should not clear the buffer only when the last state was 2958 //either CDATA or 2959 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2960 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2961 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2962 //fLastSectionWasEntityReference or fLastSectionWasCData are only 2963 //used when fIsCoalesce is set to true. 2964 fLastSectionWasEntityReference = true ; 2965 fLastSectionWasCData = false; 2966 fLastSectionWasCharacterData = false; 2967 }//if we dont need to coalesce clear the buffer 2968 else{ 2969 fContentBuffer.clear(); 2970 } 2971 fUsebuffer = true ; 2972 //take care of character reference 2973 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 2974 scanCharReferenceValue(fContentBuffer, null); 2975 fMarkupDepth--; 2976 if(!fIsCoalesce){ 2977 setScannerState(SCANNER_STATE_CONTENT); 2978 return XMLEvent.CHARACTERS; 2979 } 2980 } else { 2981 // this function also starts new entity 2982 scanEntityReference(fContentBuffer); 2983 //if there was built-in entity reference & coalesce is not true 2984 //return CHARACTERS 2985 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 2986 setScannerState(SCANNER_STATE_CONTENT); 2987 if (builtInRefCharacterHandled) { 2988 builtInRefCharacterHandled = false; 2989 return XMLEvent.ENTITY_REFERENCE; 2990 } else { 2991 return XMLEvent.CHARACTERS; 2992 } 2993 } 2994 2995 //if there was a text declaration, call next() it will be taken care. 2996 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 2997 fLastSectionWasEntityReference = true ; 2998 continue; 2999 } 3000 3001 if(fScannerState == SCANNER_STATE_REFERENCE){ 3002 setScannerState(SCANNER_STATE_CONTENT); 3003 if (fReplaceEntityReferences && 3004 fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3005 // Skip the entity reference, we don't care 3006 continue; 3007 } 3008 return XMLEvent.ENTITY_REFERENCE; 3009 } 3010 } 3011 //Wether it was character reference, entity reference or built-in entity 3012 //set the next possible state to SCANNER_STATE_CONTENT 3013 setScannerState(SCANNER_STATE_CONTENT); 3014 fLastSectionWasEntityReference = true ; 3015 continue; 3016 } 3017 3018 case SCANNER_STATE_TEXT_DECL: { 3019 // scan text decl 3020 if (fEntityScanner.skipString("<?xml")) { 3021 fMarkupDepth++; 3022 // NOTE: special case where entity starts with a PI 3023 // whose name starts with "xml" (e.g. "xmlfoo") 3024 if (isValidNameChar(fEntityScanner.peekChar())) { 3025 fStringBuffer.clear(); 3026 fStringBuffer.append("xml"); 3027 3028 if (fNamespaces) { 3029 while (isValidNCName(fEntityScanner.peekChar())) { 3030 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3031 } 3032 } else { 3033 while (isValidNameChar(fEntityScanner.peekChar())) { 3034 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3035 } 3036 } 3037 String target = fSymbolTable.addSymbol(fStringBuffer.ch, 3038 fStringBuffer.offset, fStringBuffer.length); 3039 fContentBuffer.clear(); 3040 scanPIData(target, fContentBuffer); 3041 } 3042 3043 // standard text declaration 3044 else { 3045 //xxx: this function gives callback 3046 scanXMLDeclOrTextDecl(true); 3047 } 3048 } 3049 // now that we've straightened out the readers, we can read in chunks: 3050 fEntityManager.fCurrentEntity.mayReadChunks = true; 3051 setScannerState(SCANNER_STATE_CONTENT); 3052 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3053 //it seems we have to careful when to allow function issue a callback 3054 //and when to allow adapter issue a callback. 3055 continue; 3056 } 3057 3058 3059 case SCANNER_STATE_ROOT_ELEMENT: { 3060 if (scanRootElementHook()) { 3061 fEmptyElement = true; 3062 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3063 return XMLEvent.START_ELEMENT; 3064 } 3065 setScannerState(SCANNER_STATE_CONTENT); 3066 return XMLEvent.START_ELEMENT ; 3067 } 3068 case SCANNER_STATE_CHAR_REFERENCE : { 3069 fContentBuffer.clear(); 3070 scanCharReferenceValue(fContentBuffer, null); 3071 fMarkupDepth--; 3072 setScannerState(SCANNER_STATE_CONTENT); 3073 return XMLEvent.CHARACTERS; 3074 } 3075 default: 3076 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3077 3078 }//switch 3079 } 3080 // encoding errors 3081 catch (MalformedByteSequenceException e) { 3082 fErrorReporter.reportError(e.getDomain(), e.getKey(), 3083 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 3084 return -1; 3085 } 3086 catch (CharConversionException e) { 3087 fErrorReporter.reportError( 3088 XMLMessageFormatter.XML_DOMAIN, 3089 "CharConversionFailure", 3090 null, 3091 XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 3092 return -1; 3093 } 3094 // premature end of file 3095 catch (EOFException e) { 3096 endOfFileHook(e); 3097 return -1; 3098 } 3099 } //while loop 3100 }//next 3101 3102 // 3103 // Protected methods 3104 // 3105 3106 // hooks 3107 3108 // NOTE: These hook methods are added so that the full document 3109 // scanner can share the majority of code with this class. 3110 3111 /** 3112 * Scan for DOCTYPE hook. This method is a hook for subclasses 3113 * to add code to handle scanning for a the "DOCTYPE" string 3114 * after the string "<!" has been scanned. 3115 * 3116 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3117 * was not scanned. 3118 */ 3119 protected boolean scanForDoctypeHook() 3120 throws IOException, XNIException { 3121 return false; 3122 } // scanForDoctypeHook():boolean 3123 3124 /** 3125 * Element depth iz zero. This methos is a hook for subclasses 3126 * to add code to handle when the element depth hits zero. When 3127 * scanning a document fragment, an element depth of zero is 3128 * normal. However, when scanning a full XML document, the 3129 * scanner must handle the trailing miscellanous section of 3130 * the document after the end of the document's root element. 3131 * 3132 * @return True if the caller should stop and return true which 3133 * allows the scanner to switch to a new scanning 3134 * driver. A return value of false indicates that 3135 * the content driver should continue as normal. 3136 */ 3137 protected boolean elementDepthIsZeroHook() 3138 throws IOException, XNIException { 3139 return false; 3140 } // elementDepthIsZeroHook():boolean 3141 3142 /** 3143 * Scan for root element hook. This method is a hook for 3144 * subclasses to add code that handles scanning for the root 3145 * element. When scanning a document fragment, there is no 3146 * "root" element. However, when scanning a full XML document, 3147 * the scanner must handle the root element specially. 3148 * 3149 * @return True if the caller should stop and return true which 3150 * allows the scanner to switch to a new scanning 3151 * driver. A return value of false indicates that 3152 * the content driver should continue as normal. 3153 */ 3154 protected boolean scanRootElementHook() 3155 throws IOException, XNIException { 3156 return false; 3157 } // scanRootElementHook():boolean 3158 3159 /** 3160 * End of file hook. This method is a hook for subclasses to 3161 * add code that handles the end of file. The end of file in 3162 * a document fragment is OK if the markup depth is zero. 3163 * However, when scanning a full XML document, an end of file 3164 * is always premature. 3165 */ 3166 protected void endOfFileHook(EOFException e) 3167 throws IOException, XNIException { 3168 3169 // NOTE: An end of file is only only an error if we were 3170 // in the middle of scanning some markup. -Ac 3171 if (fMarkupDepth != 0) { 3172 reportFatalError("PrematureEOF", null); 3173 } 3174 3175 } // endOfFileHook() 3176 3177 } // class FragmentContentDriver 3178 3179 static void pr(String str) { 3180 System.out.println(str) ; 3181 } 3182 3183 protected boolean fUsebuffer ; 3184 3185 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3186 * maintained for attributes. 3187 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3188 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3189 * XMLString. 3190 * 3191 * @return XMLString XMLString used to store an attribute value. 3192 */ 3193 3194 protected XMLString getString(){ 3195 if(fAttributeCacheUsedCount < initialCacheCount || 3196 fAttributeCacheUsedCount < attributeValueCache.size()){ 3197 return attributeValueCache.get(fAttributeCacheUsedCount++); 3198 } else{ 3199 XMLString str = new XMLString(); 3200 fAttributeCacheUsedCount++; 3201 attributeValueCache.add(str); 3202 return str; 3203 } 3204 } 3205 3206 /** 3207 * Implements XMLBufferListener interface. 3208 */ 3209 3210 public void refresh(){ 3211 refresh(0); 3212 } 3213 3214 /** 3215 * receives callbacks from {@link XMLEntityReader } when buffer 3216 * is being changed. 3217 * @param refreshPosition 3218 */ 3219 public void refresh(int refreshPosition){ 3220 //If you are reading attributes and you got a callback 3221 //cache available attributes. 3222 if(fReadingAttributes){ 3223 fAttributes.refresh(); 3224 } 3225 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3226 bufferContent(); 3227 } 3228 } 3229 3230 /** 3231 * Since 'TempString' shares the buffer (a char array) with the CurrentEntity, 3232 * when the cursor position reaches the end, that is, before the buffer is 3233 * being loaded with new data, the content in the TempString needs to be 3234 * copied into the ContentBuffer. 3235 */ 3236 private void bufferContent() { 3237 fContentBuffer.append(fTempString); 3238 //clear the XMLString so that data can't be added again. 3239 fTempString.length = 0; 3240 fUsebuffer = true; 3241 } 3242 } // class XMLDocumentFragmentScannerImpl