1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.xml.internal.stream.XMLBufferListener; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import com.sun.xml.internal.stream.XMLInputFactoryImpl; 27 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 28 29 import java.io.EOFException; 30 import java.io.IOException; 31 import javax.xml.stream.XMLInputFactory; 32 import javax.xml.stream.events.XMLEvent; 33 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 34 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 35 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 36 import com.sun.org.apache.xerces.internal.util.XMLChar; 37 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 38 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 39 import com.sun.org.apache.xerces.internal.xni.QName; 40 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 41 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 42 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 43 import com.sun.org.apache.xerces.internal.xni.XMLString; 44 import com.sun.org.apache.xerces.internal.xni.XNIException; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 46 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 47 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 50 import com.sun.org.apache.xerces.internal.xni.Augmentations; 51 import com.sun.org.apache.xerces.internal.impl.Constants; 52 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 53 import com.sun.org.apache.xerces.internal.util.SecurityManager; 54 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 55 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 56 import javax.xml.stream.XMLStreamConstants; 57 import javax.xml.stream.events.XMLEvent; 58 59 /** 60 * 61 * This class is responsible for scanning the structure and content 62 * of document fragments. 63 * 64 * This class has been modified as per the new design which is more suited to 65 * efficiently build pull parser. Lot of improvements have been done and 66 * the code has been added to support stax functionality/features. 67 * 68 * @author Neeraj Bajaj SUN Microsystems 69 * @author K.Venugopal SUN Microsystems 70 * @author Glenn Marcy, IBM 71 * @author Andy Clark, IBM 72 * @author Arnaud Le Hors, IBM 73 * @author Eric Ye, IBM 74 * @author Sunitha Reddy, SUN Microsystems 75 * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.19 2010-11-02 19:54:55 joehw Exp $ 76 * 77 */ 78 public class XMLDocumentFragmentScannerImpl 79 extends XMLScanner 80 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 81 82 // 83 // Constants 84 // 85 86 protected int fElementAttributeLimit; 87 88 /** External subset resolver. **/ 89 protected ExternalSubsetResolver fExternalSubsetResolver; 90 91 // scanner states 92 93 //XXX this should be divided into more states. 94 /** Scanner state: start of markup. */ 95 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 96 97 /** Scanner state: content. */ 98 protected static final int SCANNER_STATE_CONTENT = 22; 99 100 /** Scanner state: processing instruction. */ 101 protected static final int SCANNER_STATE_PI = 23; 102 103 /** Scanner state: DOCTYPE. */ 104 protected static final int SCANNER_STATE_DOCTYPE = 24; 105 106 /** Scanner state: XML Declaration */ 107 protected static final int SCANNER_STATE_XML_DECL = 25; 108 109 /** Scanner state: root element. */ 110 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 111 112 /** Scanner state: comment. */ 113 protected static final int SCANNER_STATE_COMMENT = 27; 114 115 /** Scanner state: reference. */ 116 protected static final int SCANNER_STATE_REFERENCE = 28; 117 118 // <book type="hard"> reading attribute name 'type' 119 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 120 121 // <book type="hard"> //reading attribute value. 122 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 123 124 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 125 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 126 127 /** Scanner state: end of input. */ 128 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 129 130 /** Scanner state: terminated. */ 131 protected static final int SCANNER_STATE_TERMINATED = 34; 132 133 /** Scanner state: CDATA section. */ 134 protected static final int SCANNER_STATE_CDATA = 35; 135 136 /** Scanner state: Text declaration. */ 137 protected static final int SCANNER_STATE_TEXT_DECL = 36; 138 139 /** Scanner state: Text declaration. */ 140 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 141 142 //<book type="hard">foo</book> 143 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 144 145 //<book type="hard">foo</book> reading </book> 146 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 147 148 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 149 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 150 151 // feature identifiers 152 153 154 /** Feature identifier: notify built-in refereces. */ 155 protected static final String NOTIFY_BUILTIN_REFS = 156 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 157 158 /** Property identifier: entity resolver. */ 159 protected static final String ENTITY_RESOLVER = 160 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 161 162 // recognized features and properties 163 164 /** Recognized features. */ 165 private static final String[] RECOGNIZED_FEATURES = { 166 NAMESPACES, 167 VALIDATION, 168 NOTIFY_BUILTIN_REFS, 169 NOTIFY_CHAR_REFS, 170 Constants.STAX_REPORT_CDATA_EVENT 171 }; 172 173 /** Feature defaults. */ 174 private static final Boolean[] FEATURE_DEFAULTS = { 175 Boolean.TRUE, 176 null, 177 Boolean.FALSE, 178 Boolean.FALSE, 179 Boolean.TRUE 180 }; 181 182 /** Recognized properties. */ 183 private static final String[] RECOGNIZED_PROPERTIES = { 184 SYMBOL_TABLE, 185 ERROR_REPORTER, 186 ENTITY_MANAGER, 187 }; 188 189 /** Property defaults. */ 190 private static final Object[] PROPERTY_DEFAULTS = { 191 null, 192 null, 193 null, 194 }; 195 196 protected static final char [] cdata = {'[','C','D','A','T','A','['}; 197 protected static final char [] xmlDecl = {'<','?','x','m','l'}; 198 protected static final char [] endTag = {'<','/'}; 199 // debugging 200 201 /** Debug scanner state. */ 202 private static final boolean DEBUG_SCANNER_STATE = false; 203 204 /** Debug driver. */ 205 private static final boolean DEBUG_DISPATCHER = false; 206 207 /** Debug content driver scanning. */ 208 protected static final boolean DEBUG_START_END_ELEMENT = false; 209 210 211 /** Debug driver next */ 212 protected static final boolean DEBUG_NEXT = false ; 213 214 /** Debug driver next */ 215 protected static final boolean DEBUG = false; 216 protected static final boolean DEBUG_COALESCE = false; 217 // 218 // Data 219 // 220 221 // protected data 222 223 /** Document handler. */ 224 protected XMLDocumentHandler fDocumentHandler; 225 protected int fScannerLastState ; 226 227 /** Entity Storage */ 228 protected XMLEntityStorage fEntityStore; 229 230 /** Entity stack. */ 231 protected int[] fEntityStack = new int[4]; 232 233 /** Markup depth. */ 234 protected int fMarkupDepth; 235 236 //is the element empty 237 protected boolean fEmptyElement ; 238 239 //track if we are reading attributes, this is usefule while 240 //there is a callback 241 protected boolean fReadingAttributes = false; 242 243 /** Scanner state. */ 244 protected int fScannerState; 245 246 /** SubScanner state: inside scanContent method. */ 247 protected boolean fInScanContent = false; 248 protected boolean fLastSectionWasCData = false; 249 protected boolean fLastSectionWasEntityReference = false; 250 protected boolean fLastSectionWasCharacterData = false; 251 252 /** has external dtd */ 253 protected boolean fHasExternalDTD; 254 255 /** Standalone. */ 256 protected boolean fStandaloneSet; 257 protected boolean fStandalone; 258 protected String fVersion; 259 260 // element information 261 262 /** Current element. */ 263 protected QName fCurrentElement; 264 265 /** Element stack. */ 266 protected ElementStack fElementStack = new ElementStack(); 267 protected ElementStack2 fElementStack2 = new ElementStack2(); 268 269 // other info 270 271 /** Document system identifier. 272 * REVISIT: So what's this used for? - NG 273 * protected String fDocumentSystemId; 274 ******/ 275 276 protected String fPITarget ; 277 278 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 279 protected XMLString fPIData = new XMLString(); 280 281 // features 282 283 284 /** Notify built-in references. */ 285 protected boolean fNotifyBuiltInRefs = false; 286 287 //STAX related properties 288 //defaultValues. 289 protected boolean fSupportDTD = true; 290 protected boolean fReplaceEntityReferences = true; 291 protected boolean fSupportExternalEntities = false; 292 protected boolean fReportCdataEvent = false ; 293 protected boolean fIsCoalesce = false ; 294 protected String fDeclaredEncoding = null; 295 /** Xerces Feature: Disallow doctype declaration. */ 296 protected boolean fDisallowDoctype = false; 297 298 // drivers 299 300 /** Active driver. */ 301 protected Driver fDriver; 302 303 /** Content driver. */ 304 protected Driver fContentDriver = createContentDriver(); 305 306 // temporary variables 307 308 /** Element QName. */ 309 protected QName fElementQName = new QName(); 310 311 /** Attribute QName. */ 312 protected QName fAttributeQName = new QName(); 313 314 /** 315 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 316 * implements Iterator interface so we can directly give Attributes in the form of 317 * iterator. 318 */ 319 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 320 321 322 /** String. */ 323 protected XMLString fTempString = new XMLString(); 324 325 /** String. */ 326 protected XMLString fTempString2 = new XMLString(); 327 328 /** Array of 3 strings. */ 329 private String[] fStrings = new String[3]; 330 331 /** Making the buffer accesible to derived class -- String buffer. */ 332 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 333 334 /** Making the buffer accesible to derived class -- String buffer. */ 335 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 336 337 /** stores character data. */ 338 /** Making the buffer accesible to derived class -- stores PI data */ 339 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 340 341 /** Single character array. */ 342 private final char[] fSingleChar = new char[1]; 343 private String fCurrentEntityName = null; 344 345 // New members 346 protected boolean fScanToEnd = false; 347 348 protected DTDGrammarUtil dtdGrammarUtil= null; 349 350 protected boolean fAddDefaultAttr = false; 351 352 protected boolean foundBuiltInRefs = false; 353 354 protected SecurityManager fSecurityManager = null; 355 356 //skip element algorithm 357 static final short MAX_DEPTH_LIMIT = 5 ; 358 static final short ELEMENT_ARRAY_LENGTH = 200 ; 359 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 360 static final boolean DEBUG_SKIP_ALGORITHM = false; 361 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 362 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 363 //pointer location where last element was skipped 364 short fLastPointerLocation = 0 ; 365 short fElementPointer = 0 ; 366 //2D array to store pointer info 367 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 368 protected String fElementRawname ; 369 protected boolean fShouldSkip = false; 370 protected boolean fAdd = false ; 371 protected boolean fSkip = false; 372 373 /** Reusable Augmentations. */ 374 private Augmentations fTempAugmentations = null; 375 // 376 // Constructors 377 // 378 379 /** Default constructor. */ 380 public XMLDocumentFragmentScannerImpl() { 381 } // <init>() 382 383 // 384 // XMLDocumentScanner methods 385 // 386 387 /** 388 * Sets the input source. 389 * 390 * @param inputSource The input source. 391 * 392 * @throws IOException Thrown on i/o error. 393 */ 394 public void setInputSource(XMLInputSource inputSource) throws IOException { 395 fEntityManager.setEntityHandler(this); 396 fEntityManager.startEntity("$fragment$", inputSource, false, true); 397 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 398 } // setInputSource(XMLInputSource) 399 400 /** 401 * Scans a document. 402 * 403 * @param complete True if the scanner should scan the document 404 * completely, pushing all events to the registered 405 * document handler. A value of false indicates that 406 * that the scanner should only scan the next portion 407 * of the document and return. A scanner instance is 408 * permitted to completely scan a document if it does 409 * not support this "pull" scanning model. 410 * 411 * @return True if there is more to scan, false otherwise. 412 */ 413 /* public boolean scanDocument(boolean complete) 414 throws IOException, XNIException { 415 416 // keep dispatching "events" 417 fEntityManager.setEntityHandler(this); 418 419 return true; 420 421 } // scanDocument(boolean):boolean 422 */ 423 424 public boolean scanDocument(boolean complete) 425 throws IOException, XNIException { 426 427 // keep dispatching "events" 428 fEntityManager.setEntityHandler(this); 429 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 430 431 int event = next(); 432 do { 433 switch (event) { 434 case XMLStreamConstants.START_DOCUMENT : 435 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 436 break; 437 case XMLStreamConstants.START_ELEMENT : 438 //System.out.println(" in scann element"); 439 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 440 break; 441 case XMLStreamConstants.CHARACTERS : 442 fDocumentHandler.characters(getCharacterData(),null); 443 break; 444 case XMLStreamConstants.SPACE: 445 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 446 //System.out.println("in the space"); 447 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 448 break; 449 case XMLStreamConstants.ENTITY_REFERENCE : 450 //entity reference callback are given in startEntity 451 break; 452 case XMLStreamConstants.PROCESSING_INSTRUCTION : 453 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 454 break; 455 case XMLStreamConstants.COMMENT : 456 //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 457 fDocumentHandler.comment(getCharacterData(),null); 458 break; 459 case XMLStreamConstants.DTD : 460 //all DTD related callbacks are handled in DTDScanner. 461 //1. Stax doesn't define DTD states as it does for XML Document. 462 //therefore we don't need to take care of anything here. So Just break; 463 break; 464 case XMLStreamConstants.CDATA: 465 fDocumentHandler.startCDATA(null); 466 //xxx: check if CDATA values comes from getCharacterData() function 467 fDocumentHandler.characters(getCharacterData(),null); 468 fDocumentHandler.endCDATA(null); 469 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 470 break; 471 case XMLStreamConstants.NOTATION_DECLARATION : 472 break; 473 case XMLStreamConstants.ENTITY_DECLARATION : 474 break; 475 case XMLStreamConstants.NAMESPACE : 476 break; 477 case XMLStreamConstants.ATTRIBUTE : 478 break; 479 case XMLStreamConstants.END_ELEMENT : 480 //do not give callback here. 481 //this callback is given in scanEndElement function. 482 //fDocumentHandler.endElement(getElementQName(),null); 483 break; 484 default : 485 throw new InternalError("processing event: " + event); 486 487 } 488 //System.out.println("here in before calling next"); 489 event = next(); 490 //System.out.println("here in after calling next"); 491 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 492 493 if(event == XMLStreamConstants.END_DOCUMENT) { 494 fDocumentHandler.endDocument(null); 495 return false; 496 } 497 498 return true; 499 500 } // scanDocument(boolean):boolean 501 502 503 504 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 505 if(fScannerLastState == XMLEvent.END_ELEMENT){ 506 fElementQName.setValues(fElementStack.getLastPoppedElement()); 507 } 508 return fElementQName ; 509 } 510 511 /** return the next state on the input 512 * @return int 513 */ 514 515 public int next() throws IOException, XNIException { 516 return fDriver.next(); 517 } 518 519 // 520 // XMLComponent methods 521 // 522 523 /** 524 * Resets the component. The component can query the component manager 525 * about any features and properties that affect the operation of the 526 * component. 527 * 528 * @param componentManager The component manager. 529 * 530 * @throws SAXException Thrown by component on initialization error. 531 * For example, if a feature or property is 532 * required for the operation of the component, the 533 * component manager may throw a 534 * SAXNotRecognizedException or a 535 * SAXNotSupportedException. 536 */ 537 538 public void reset(XMLComponentManager componentManager) 539 throws XMLConfigurationException { 540 541 super.reset(componentManager); 542 543 // other settings 544 // fDocumentSystemId = null; 545 546 // sax features 547 //fAttributes.setNamespaces(fNamespaces); 548 549 // xerces features 550 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 551 552 fSecurityManager = (SecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 553 fElementAttributeLimit = (fSecurityManager != null)?fSecurityManager.getElementAttrLimit():0; 554 555 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 556 557 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 558 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 559 (ExternalSubsetResolver) resolver : null; 560 561 // initialize vars 562 fMarkupDepth = 0; 563 fCurrentElement = null; 564 fElementStack.clear(); 565 fHasExternalDTD = false; 566 fStandaloneSet = false; 567 fStandalone = false; 568 fInScanContent = false; 569 //skipping algorithm 570 fShouldSkip = false; 571 fAdd = false; 572 fSkip = false; 573 574 //attribute 575 fReadingAttributes = false; 576 //xxx: external entities are supported in Xerces 577 // it would be good to define feature for this case 578 fSupportExternalEntities = true; 579 fReplaceEntityReferences = true; 580 fIsCoalesce = false; 581 582 // setup Driver 583 setScannerState(SCANNER_STATE_CONTENT); 584 setDriver(fContentDriver); 585 fEntityStore = fEntityManager.getEntityStore(); 586 587 dtdGrammarUtil = null; 588 589 590 //fEntityManager.test(); 591 } // reset(XMLComponentManager) 592 593 594 public void reset(PropertyManager propertyManager){ 595 596 super.reset(propertyManager); 597 598 // other settings 599 // fDocumentSystemId = null; 600 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 601 fNotifyBuiltInRefs = false ; 602 603 // initialize vars 604 fMarkupDepth = 0; 605 fCurrentElement = null; 606 fShouldSkip = false; 607 fAdd = false; 608 fSkip = false; 609 fElementStack.clear(); 610 //fElementStack2.clear(); 611 fHasExternalDTD = false; 612 fStandaloneSet = false; 613 fStandalone = false; 614 //fReplaceEntityReferences = true; 615 //fSupportExternalEntities = true; 616 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_REPLACING_ENTITY_REFERENCES); 617 fReplaceEntityReferences = bo.booleanValue(); 618 bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_SUPPORTING_EXTERNAL_ENTITIES); 619 fSupportExternalEntities = bo.booleanValue(); 620 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 621 if(cdata != null) 622 fReportCdataEvent = cdata.booleanValue() ; 623 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 624 if(coalesce != null) 625 fIsCoalesce = coalesce.booleanValue(); 626 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 627 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 628 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 629 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 630 // setup Driver 631 //we dont need to do this -- nb. 632 //setScannerState(SCANNER_STATE_CONTENT); 633 //setDriver(fContentDriver); 634 fEntityStore = fEntityManager.getEntityStore(); 635 //fEntityManager.test(); 636 637 dtdGrammarUtil = null; 638 639 } // reset(XMLComponentManager) 640 641 /** 642 * Returns a list of feature identifiers that are recognized by 643 * this component. This method may return null if no features 644 * are recognized by this component. 645 */ 646 public String[] getRecognizedFeatures() { 647 return (String[])(RECOGNIZED_FEATURES.clone()); 648 } // getRecognizedFeatures():String[] 649 650 /** 651 * Sets the state of a feature. This method is called by the component 652 * manager any time after reset when a feature changes state. 653 * <p> 654 * <strong>Note:</strong> Components should silently ignore features 655 * that do not affect the operation of the component. 656 * 657 * @param featureId The feature identifier. 658 * @param state The state of the feature. 659 * 660 * @throws SAXNotRecognizedException The component should not throw 661 * this exception. 662 * @throws SAXNotSupportedException The component should not throw 663 * this exception. 664 */ 665 public void setFeature(String featureId, boolean state) 666 throws XMLConfigurationException { 667 668 super.setFeature(featureId, state); 669 670 // Xerces properties 671 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 672 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 673 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 674 fNotifyBuiltInRefs = state; 675 } 676 } 677 678 } // setFeature(String,boolean) 679 680 /** 681 * Returns a list of property identifiers that are recognized by 682 * this component. This method may return null if no properties 683 * are recognized by this component. 684 */ 685 public String[] getRecognizedProperties() { 686 return (String[])(RECOGNIZED_PROPERTIES.clone()); 687 } // getRecognizedProperties():String[] 688 689 /** 690 * Sets the value of a property. This method is called by the component 691 * manager any time after reset when a property changes value. 692 * <p> 693 * <strong>Note:</strong> Components should silently ignore properties 694 * that do not affect the operation of the component. 695 * 696 * @param propertyId The property identifier. 697 * @param value The value of the property. 698 * 699 * @throws SAXNotRecognizedException The component should not throw 700 * this exception. 701 * @throws SAXNotSupportedException The component should not throw 702 * this exception. 703 */ 704 public void setProperty(String propertyId, Object value) 705 throws XMLConfigurationException { 706 707 super.setProperty(propertyId, value); 708 709 // Xerces properties 710 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 711 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 712 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 713 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 714 fEntityManager = (XMLEntityManager)value; 715 return; 716 } 717 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 718 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 719 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 720 (ExternalSubsetResolver) value : null; 721 return; 722 } 723 } 724 725 726 // Xerces properties 727 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 728 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 729 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 730 fEntityManager = (XMLEntityManager)value; 731 } 732 return; 733 } 734 735 } // setProperty(String,Object) 736 737 /** 738 * Returns the default state for a feature, or null if this 739 * component does not want to report a default value for this 740 * feature. 741 * 742 * @param featureId The feature identifier. 743 * 744 * @since Xerces 2.2.0 745 */ 746 public Boolean getFeatureDefault(String featureId) { 747 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 748 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 749 return FEATURE_DEFAULTS[i]; 750 } 751 } 752 return null; 753 } // getFeatureDefault(String):Boolean 754 755 /** 756 * Returns the default state for a property, or null if this 757 * component does not want to report a default value for this 758 * property. 759 * 760 * @param propertyId The property identifier. 761 * 762 * @since Xerces 2.2.0 763 */ 764 public Object getPropertyDefault(String propertyId) { 765 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 766 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 767 return PROPERTY_DEFAULTS[i]; 768 } 769 } 770 return null; 771 } // getPropertyDefault(String):Object 772 773 // 774 // XMLDocumentSource methods 775 // 776 777 /** 778 * setDocumentHandler 779 * 780 * @param documentHandler 781 */ 782 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 783 fDocumentHandler = documentHandler; 784 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 785 } // setDocumentHandler(XMLDocumentHandler) 786 787 788 /** Returns the document handler */ 789 public XMLDocumentHandler getDocumentHandler(){ 790 return fDocumentHandler; 791 } 792 793 // 794 // XMLEntityHandler methods 795 // 796 797 /** 798 * This method notifies of the start of an entity. The DTD has the 799 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 800 * general entities are just specified by their name. 801 * 802 * @param name The name of the entity. 803 * @param identifier The resource identifier. 804 * @param encoding The auto-detected IANA encoding name of the entity 805 * stream. This value will be null in those situations 806 * where the entity encoding is not auto-detected (e.g. 807 * internal entities or a document entity that is 808 * parsed from a java.io.Reader). 809 * @param augs Additional information that may include infoset augmentations 810 * 811 * @throws XNIException Thrown by handler to signal an error. 812 */ 813 public void startEntity(String name, 814 XMLResourceIdentifier identifier, 815 String encoding, Augmentations augs) throws XNIException { 816 817 // keep track of this entity before fEntityDepth is increased 818 if (fEntityDepth == fEntityStack.length) { 819 int[] entityarray = new int[fEntityStack.length * 2]; 820 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 821 fEntityStack = entityarray; 822 } 823 fEntityStack[fEntityDepth] = fMarkupDepth; 824 825 super.startEntity(name, identifier, encoding, augs); 826 827 // WFC: entity declared in external subset in standalone doc 828 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 829 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 830 new Object[]{name}); 831 } 832 833 /** we are not calling the handlers yet.. */ 834 // call handler 835 if (fDocumentHandler != null && !fScanningAttribute) { 836 if (!name.equals("[xml]")) { 837 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 838 } 839 } 840 841 } // startEntity(String,XMLResourceIdentifier,String) 842 843 /** 844 * This method notifies the end of an entity. The DTD has the pseudo-name 845 * of "[dtd]" parameter entity names start with '%'; and general entities 846 * are just specified by their name. 847 * 848 * @param name The name of the entity. 849 * @param augs Additional information that may include infoset augmentations 850 * 851 * @throws XNIException Thrown by handler to signal an error. 852 */ 853 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 854 855 /** 856 * // flush possible pending output buffer - see scanContent 857 * if (fInScanContent && fStringBuffer.length != 0 858 * && fDocumentHandler != null) { 859 * fDocumentHandler.characters(fStringBuffer, null); 860 * fStringBuffer.length = 0; // make sure we know it's been flushed 861 * } 862 */ 863 super.endEntity(name, augs); 864 865 // make sure markup is properly balanced 866 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 867 reportFatalError("MarkupEntityMismatch", null); 868 } 869 870 /**/ 871 // call handler 872 if (fDocumentHandler != null && !fScanningAttribute) { 873 if (!name.equals("[xml]")) { 874 fDocumentHandler.endGeneralEntity(name, augs); 875 } 876 } 877 878 879 } // endEntity(String) 880 881 // 882 // Protected methods 883 // 884 885 // Driver factory methods 886 887 /** Creates a content Driver. */ 888 protected Driver createContentDriver() { 889 return new FragmentContentDriver(); 890 } // createContentDriver():Driver 891 892 // scanning methods 893 894 /** 895 * Scans an XML or text declaration. 896 * <p> 897 * <pre> 898 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 899 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 900 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 901 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 902 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 903 * | ('"' ('yes' | 'no') '"')) 904 * 905 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 906 * </pre> 907 * 908 * @param scanningTextDecl True if a text declaration is to 909 * be scanned instead of an XML 910 * declaration. 911 */ 912 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 913 throws IOException, XNIException { 914 915 // scan decl 916 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 917 fMarkupDepth--; 918 919 // pseudo-attribute values 920 String version = fStrings[0]; 921 String encoding = fStrings[1]; 922 String standalone = fStrings[2]; 923 fDeclaredEncoding = encoding; 924 // set standalone 925 fStandaloneSet = standalone != null; 926 fStandalone = fStandaloneSet && standalone.equals("yes"); 927 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 928 //but this information is only related with Document Entity. 929 fEntityManager.setStandalone(fStandalone); 930 931 932 // call handler 933 if (fDocumentHandler != null) { 934 if (scanningTextDecl) { 935 fDocumentHandler.textDecl(version, encoding, null); 936 } else { 937 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 938 } 939 } 940 941 if(version != null){ 942 fEntityScanner.setVersion(version); 943 fEntityScanner.setXMLVersion(version); 944 } 945 // set encoding on reader, only if encoding was not specified by the application explicitly 946 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 947 fEntityScanner.setEncoding(encoding); 948 } 949 950 } // scanXMLDeclOrTextDecl(boolean) 951 952 public String getPITarget(){ 953 return fPITarget ; 954 } 955 956 public XMLStringBuffer getPIData(){ 957 return fContentBuffer ; 958 } 959 960 //XXX: why not this function behave as per the state of the parser? 961 public XMLString getCharacterData(){ 962 if(fUsebuffer){ 963 return fContentBuffer ; 964 }else{ 965 return fTempString; 966 } 967 968 } 969 970 971 /** 972 * Scans a processing data. This is needed to handle the situation 973 * where a document starts with a processing instruction whose 974 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 975 * 976 * @param target The PI target 977 * @param data The XMLStringBuffer to fill in with the data 978 */ 979 protected void scanPIData(String target, XMLStringBuffer data) 980 throws IOException, XNIException { 981 982 super.scanPIData(target, data); 983 984 //set the PI target and values 985 fPITarget = target ; 986 987 fMarkupDepth--; 988 989 } // scanPIData(String) 990 991 /** 992 * Scans a comment. 993 * <p> 994 * <pre> 995 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 996 * </pre> 997 * <p> 998 * <strong>Note:</strong> Called after scanning past '<!--' 999 */ 1000 protected void scanComment() throws IOException, XNIException { 1001 fContentBuffer.clear(); 1002 scanComment(fContentBuffer); 1003 //getTextCharacters can also be called for reading comments 1004 fUsebuffer = true; 1005 fMarkupDepth--; 1006 1007 } // scanComment() 1008 1009 //xxx value returned by this function may not remain valid if another event is scanned. 1010 public String getComment(){ 1011 return fContentBuffer.toString(); 1012 } 1013 1014 void addElement(String rawname){ 1015 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1016 //storing element raw name in a linear list of array 1017 fElementArray[fElementPointer] = rawname ; 1018 //storing elemnetPointer for particular element depth 1019 1020 if(DEBUG_SKIP_ALGORITHM){ 1021 StringBuffer sb = new StringBuffer() ; 1022 sb.append(" Storing element information ") ; 1023 sb.append(" fElementPointer = " + fElementPointer) ; 1024 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1025 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1026 System.out.println(sb.toString()) ; 1027 } 1028 1029 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1030 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1031 short column = storePointerForADepth(fElementPointer); 1032 if(column > 0){ 1033 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1034 //identity comparison shouldn't take much time and we can rely on this 1035 //since its guaranteed to have same object id for same string. 1036 if(rawname == fElementArray[pointer]){ 1037 fShouldSkip = true ; 1038 fLastPointerLocation = pointer ; 1039 //reset the things and return. 1040 resetPointer((short)fElementStack.fDepth , column) ; 1041 fElementArray[fElementPointer] = null ; 1042 return ; 1043 }else{ 1044 fShouldSkip = false ; 1045 } 1046 } 1047 } 1048 fElementPointer++ ; 1049 } 1050 } 1051 1052 1053 void resetPointer(short depth, short column){ 1054 fPointerInfo[depth] [column] = (short)0; 1055 } 1056 1057 //returns column information at which pointer was stored. 1058 short storePointerForADepth(short elementPointer){ 1059 short depth = (short) fElementStack.fDepth ; 1060 1061 //Stores element pointer locations at particular depth , only 4 pointer locations 1062 //are stored at particular depth for now. 1063 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1064 1065 if(canStore(depth, i)){ 1066 fPointerInfo[depth][i] = elementPointer ; 1067 if(DEBUG_SKIP_ALGORITHM){ 1068 StringBuffer sb = new StringBuffer() ; 1069 sb.append(" Pointer information ") ; 1070 sb.append(" fElementPointer = " + fElementPointer) ; 1071 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1072 sb.append(" column = " + i ) ; 1073 System.out.println(sb.toString()) ; 1074 } 1075 return i; 1076 } 1077 //else 1078 //pointer was not stored because we reached the limit 1079 } 1080 return -1 ; 1081 } 1082 1083 boolean canStore(short depth, short column){ 1084 //colum = 0 , means first element at particular depth 1085 //column = 1, means second element at particular depth 1086 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1087 return fPointerInfo[depth][column] == 0 ? true : false ; 1088 } 1089 1090 1091 short getElementPointer(short depth, short column){ 1092 //colum = 0 , means first element at particular depth 1093 //column = 1, means second element at particular depth 1094 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1095 return fPointerInfo[depth][column] ; 1096 } 1097 1098 //this function assumes that string passed is not null and skips 1099 //the following string from the buffer this makes sure 1100 boolean skipFromTheBuffer(String rawname) throws IOException{ 1101 if(fEntityScanner.skipString(rawname)){ 1102 char c = (char)fEntityScanner.peekChar() ; 1103 //If the start element was completely skipped we should encounter either ' '(space), 1104 //or '/' (in case of empty element) or '>' 1105 if( c == ' ' || c == '/' || c == '>'){ 1106 fElementRawname = rawname ; 1107 return true ; 1108 } else{ 1109 return false; 1110 } 1111 } else 1112 return false ; 1113 } 1114 1115 boolean skipQElement(String rawname) throws IOException{ 1116 1117 final int c = fEntityScanner.getChar(rawname.length()); 1118 //if this character is still valid element name -- this means string can't match 1119 if(XMLChar.isName(c)){ 1120 return false; 1121 }else{ 1122 return fEntityScanner.skipString(rawname); 1123 } 1124 } 1125 1126 protected boolean skipElement() throws IOException { 1127 1128 if(!fShouldSkip) return false ; 1129 1130 if(fLastPointerLocation != 0){ 1131 //Look at the next element stored in the array list.. we might just get a match. 1132 String rawname = fElementArray[fLastPointerLocation + 1] ; 1133 if(rawname != null && skipFromTheBuffer(rawname)){ 1134 fLastPointerLocation++ ; 1135 if(DEBUG_SKIP_ALGORITHM){ 1136 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1137 } 1138 return true ; 1139 } else{ 1140 //reset it back to zero... we haven't got the correct subset yet. 1141 fLastPointerLocation = 0 ; 1142 1143 } 1144 } 1145 //xxx: we can put some logic here as from what column it should start looking 1146 //for now we always start at 0 1147 //fallback to tolerant algorithm, it would look for differnt element stored at different 1148 //depth and get us the pointer location. 1149 return fShouldSkip && skipElement((short)0); 1150 1151 } 1152 1153 //start of the column at which it should try searching 1154 boolean skipElement(short column) throws IOException { 1155 short depth = (short)fElementStack.fDepth ; 1156 1157 if(depth > MAX_DEPTH_LIMIT){ 1158 return fShouldSkip = false ; 1159 } 1160 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1161 short pointer = getElementPointer(depth , i ) ; 1162 1163 if(pointer == 0){ 1164 return fShouldSkip = false ; 1165 } 1166 1167 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1168 if(DEBUG_SKIP_ALGORITHM){ 1169 System.out.println(); 1170 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1171 System.out.println(); 1172 } 1173 fLastPointerLocation = pointer ; 1174 return fShouldSkip = true ; 1175 } 1176 } 1177 return fShouldSkip = false ; 1178 } 1179 1180 /** 1181 * Scans a start element. This method will handle the binding of 1182 * namespace information and notifying the handler of the start 1183 * of the element. 1184 * <p> 1185 * <pre> 1186 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1187 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1188 * </pre> 1189 * <p> 1190 * <strong>Note:</strong> This method assumes that the leading 1191 * '<' character has been consumed. 1192 * <p> 1193 * <strong>Note:</strong> This method uses the fElementQName and 1194 * fAttributes variables. The contents of these variables will be 1195 * destroyed. The caller should copy important information out of 1196 * these variables before calling this method. 1197 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1198 * 1199 * @return True if element is empty. (i.e. It matches 1200 * production [44]. 1201 */ 1202 // fElementQName will have the details of element just read.. 1203 // fAttributes will have the details of all the attributes. 1204 protected boolean scanStartElement() 1205 throws IOException, XNIException { 1206 1207 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1208 //when skipping is true and no more elements should be added 1209 if(fSkip && !fAdd){ 1210 //get the stored element -- if everything goes right this should match the 1211 //token in the buffer 1212 1213 QName name = fElementStack.getNext(); 1214 1215 if(DEBUG_SKIP_ALGORITHM){ 1216 System.out.println("Trying to skip String = " + name.rawname); 1217 } 1218 1219 //Be conservative -- if skipping fails -- stop. 1220 fSkip = fEntityScanner.skipString(name.rawname); 1221 1222 if(fSkip){ 1223 if(DEBUG_SKIP_ALGORITHM){ 1224 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1225 } 1226 fElementStack.push(); 1227 fElementQName = name; 1228 }else{ 1229 //if skipping fails reposition the stack or fallback to normal way of processing 1230 fElementStack.reposition(); 1231 if(DEBUG_SKIP_ALGORITHM){ 1232 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1233 } 1234 } 1235 } 1236 1237 //we are still at the stage of adding elements 1238 //the elements were not matched or 1239 //fSkip is not set to true 1240 if(!fSkip || fAdd){ 1241 //get the next element from the stack 1242 fElementQName = fElementStack.nextElement(); 1243 // name 1244 if (fNamespaces) { 1245 fEntityScanner.scanQName(fElementQName); 1246 } else { 1247 String name = fEntityScanner.scanName(); 1248 fElementQName.setValues(null, name, name, null); 1249 } 1250 1251 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1252 if(DEBUG_SKIP_ALGORITHM){ 1253 if(fAdd){ 1254 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1255 } 1256 } 1257 1258 } 1259 1260 //when the elements are being added , we need to check if we are set for skipping the elements 1261 if(fAdd){ 1262 //this sets the value of fAdd variable 1263 fElementStack.matchElement(fElementQName); 1264 } 1265 1266 1267 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1268 fCurrentElement = fElementQName; 1269 1270 String rawname = fElementQName.rawname; 1271 1272 fEmptyElement = false; 1273 1274 fAttributes.removeAllAttributes(); 1275 1276 if(!seekCloseOfStartTag()){ 1277 fReadingAttributes = true; 1278 fAttributeCacheUsedCount =0; 1279 fStringBufferIndex =0; 1280 fAddDefaultAttr = true; 1281 do { 1282 scanAttribute(fAttributes); 1283 if (fSecurityManager != null && fAttributes.getLength() > fElementAttributeLimit){ 1284 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1285 "ElementAttributeLimit", 1286 new Object[]{rawname, new Integer(fAttributes.getLength()) }, 1287 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1288 } 1289 1290 } while (!seekCloseOfStartTag()); 1291 fReadingAttributes=false; 1292 } 1293 1294 if (fEmptyElement) { 1295 //decrease the markup depth.. 1296 fMarkupDepth--; 1297 1298 // check that this element was opened in the same entity 1299 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1300 reportFatalError("ElementEntityMismatch", 1301 new Object[]{fCurrentElement.rawname}); 1302 } 1303 // call handler 1304 if (fDocumentHandler != null) { 1305 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1306 } 1307 1308 //We should not be popping out the context here in endELement becaause the namespace context is still 1309 //valid when parser is at the endElement state. 1310 //if (fNamespaces) { 1311 // fNamespaceContext.popContext(); 1312 //} 1313 1314 //pop the element off the stack.. 1315 fElementStack.popElement(); 1316 1317 } else { 1318 1319 if(dtdGrammarUtil != null) 1320 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1321 if(fDocumentHandler != null){ 1322 //complete element and attributes are traversed in this function so we can send a callback 1323 //here. 1324 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1325 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1326 } 1327 } 1328 1329 1330 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1331 return fEmptyElement; 1332 1333 } // scanStartElement():boolean 1334 1335 /** 1336 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1337 * Characters are consumed. 1338 */ 1339 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1340 // spaces 1341 boolean sawSpace = fEntityScanner.skipSpaces(); 1342 1343 // end tag? 1344 final int c = fEntityScanner.peekChar(); 1345 if (c == '>') { 1346 fEntityScanner.scanChar(); 1347 return true; 1348 } else if (c == '/') { 1349 fEntityScanner.scanChar(); 1350 if (!fEntityScanner.skipChar('>')) { 1351 reportFatalError("ElementUnterminated", 1352 new Object[]{fElementQName.rawname}); 1353 } 1354 fEmptyElement = true; 1355 return true; 1356 } else if (!isValidNameStartChar(c) || !sawSpace) { 1357 reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); 1358 } 1359 1360 return false; 1361 } 1362 1363 public boolean hasAttributes(){ 1364 return fAttributes.getLength() > 0 ? true : false ; 1365 } 1366 1367 1368 /** 1369 * Scans an attribute. 1370 * <p> 1371 * <pre> 1372 * [41] Attribute ::= Name Eq AttValue 1373 * </pre> 1374 * <p> 1375 * <strong>Note:</strong> This method assumes that the next 1376 * character on the stream is the first character of the attribute 1377 * name. 1378 * <p> 1379 * <strong>Note:</strong> This method uses the fAttributeQName and 1380 * fQName variables. The contents of these variables will be 1381 * destroyed. 1382 * 1383 * @param attributes The attributes list for the scanned attribute. 1384 */ 1385 1386 /** 1387 * protected void scanAttribute(AttributeIteratorImpl attributes) 1388 * throws IOException, XNIException { 1389 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1390 * 1391 * 1392 * // name 1393 * if (fNamespaces) { 1394 * fEntityScanner.scanQName(fAttributeQName); 1395 * } 1396 * else { 1397 * String name = fEntityScanner.scanName(); 1398 * fAttributeQName.setValues(null, name, name, null); 1399 * } 1400 * 1401 * // equals 1402 * fEntityScanner.skipSpaces(); 1403 * if (!fEntityScanner.skipChar('=')) { 1404 * reportFatalError("EqRequiredInAttribute", 1405 * new Object[]{fAttributeQName.rawname}); 1406 * } 1407 * fEntityScanner.skipSpaces(); 1408 * 1409 * 1410 * // content 1411 * int oldLen = attributes.getLength(); 1412 */ 1413 /**xxx there is one check of duplicate attribute that has been removed. 1414 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1415 * 1416 * // WFC: Unique Att Spec 1417 * if (oldLen == attributes.getLength()) { 1418 * reportFatalError("AttributeNotUnique", 1419 * new Object[]{fCurrentElement.rawname, 1420 * fAttributeQName.rawname}); 1421 * } 1422 */ 1423 1424 /* 1425 //REVISIT: one more case needs to be included: external PE and standalone is no 1426 boolean isVC = fHasExternalDTD && !fStandalone; 1427 scanAttributeValue(fTempString, fTempString2, 1428 fAttributeQName.rawname, attributes, 1429 oldLen, isVC); 1430 1431 //attributes.setValue(oldLen, fTempString.toString()); 1432 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1433 //attributes.setSpecified(oldLen, true); 1434 1435 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1436 fAttributes.addAttribute(attribute); 1437 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1438 } // scanAttribute(XMLAttributes) 1439 1440 */ 1441 1442 /** return the attribute iterator implementation */ 1443 public XMLAttributesIteratorImpl getAttributeIterator(){ 1444 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1445 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1446 fAddDefaultAttr = false; 1447 } 1448 return fAttributes; 1449 } 1450 1451 /** return if standalone is set */ 1452 public boolean standaloneSet(){ 1453 return fStandaloneSet; 1454 } 1455 /** return if the doucment is standalone */ 1456 public boolean isStandAlone(){ 1457 return fStandalone ; 1458 } 1459 /** 1460 * Scans an attribute name value pair. 1461 * <p> 1462 * <pre> 1463 * [41] Attribute ::= Name Eq AttValue 1464 * </pre> 1465 * <p> 1466 * <strong>Note:</strong> This method assumes that the next 1467 * character on the stream is the first character of the attribute 1468 * name. 1469 * <p> 1470 * <strong>Note:</strong> This method uses the fAttributeQName and 1471 * fQName variables. The contents of these variables will be 1472 * destroyed. 1473 * 1474 * @param attributes The attributes list for the scanned attribute. 1475 */ 1476 1477 protected void scanAttribute(XMLAttributes attributes) 1478 throws IOException, XNIException { 1479 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1480 1481 // name 1482 if (fNamespaces) { 1483 fEntityScanner.scanQName(fAttributeQName); 1484 } else { 1485 String name = fEntityScanner.scanName(); 1486 fAttributeQName.setValues(null, name, name, null); 1487 } 1488 1489 // equals 1490 fEntityScanner.skipSpaces(); 1491 if (!fEntityScanner.skipChar('=')) { 1492 reportFatalError("EqRequiredInAttribute", 1493 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1494 } 1495 fEntityScanner.skipSpaces(); 1496 1497 int attIndex = 0 ; 1498 //REVISIT: one more case needs to be included: external PE and standalone is no 1499 boolean isVC = fHasExternalDTD && !fStandalone; 1500 //fTempString would store attribute value 1501 ///fTempString2 would store attribute non-normalized value 1502 1503 //this function doesn't use 'attIndex'. We are adding the attribute later 1504 //after we have figured out that current attribute is not namespace declaration 1505 //since scanAttributeValue doesn't use attIndex parameter therefore we 1506 //can safely add the attribute later.. 1507 XMLString tmpStr = getString(); 1508 1509 scanAttributeValue(tmpStr, fTempString2, 1510 fAttributeQName.rawname, attributes, 1511 attIndex, isVC); 1512 1513 // content 1514 int oldLen = attributes.getLength(); 1515 //if the attribute name already exists.. new value is replaced with old value 1516 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1517 1518 // WFC: Unique Att Spec 1519 //attributes count will be same if the current attribute name already exists for this element name. 1520 //this means there are two duplicate attributes. 1521 if (oldLen == attributes.getLength()) { 1522 reportFatalError("AttributeNotUnique", 1523 new Object[]{fCurrentElement.rawname, 1524 fAttributeQName.rawname}); 1525 } 1526 1527 //tmpString contains attribute value 1528 //we are passing null as the attribute value 1529 attributes.setValue(attIndex, null, tmpStr); 1530 1531 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1532 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1533 attributes.setSpecified(attIndex, true); 1534 1535 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1536 1537 } // scanAttribute(XMLAttributes) 1538 1539 /** 1540 * Scans element content. 1541 * 1542 * @return Returns the next character on the stream. 1543 */ 1544 //CHANGED: 1545 //EARLIER: scanContent() 1546 //NOW: scanContent(XMLStringBuffer) 1547 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1548 //this function appends the data to the buffer. 1549 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1550 //set the fTempString length to 0 before passing it on to scanContent 1551 //scanContent sets the correct co-ordinates as per the content read 1552 fTempString.length = 0; 1553 int c = fEntityScanner.scanContent(fTempString); 1554 content.append(fTempString); 1555 fTempString.length = 0; 1556 if (c == '\r') { 1557 // happens when there is the character reference 1558 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1559 fEntityScanner.scanChar(); 1560 content.append((char)c); 1561 c = -1; 1562 } else if (c == ']') { 1563 //fStringBuffer.clear(); 1564 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1565 content.append((char)fEntityScanner.scanChar()); 1566 // remember where we are in case we get an endEntity before we 1567 // could flush the buffer out - this happens when we're parsing an 1568 // entity which ends with a ] 1569 fInScanContent = true; 1570 // 1571 // We work on a single character basis to handle cases such as: 1572 // ']]]>' which we might otherwise miss. 1573 // 1574 if (fEntityScanner.skipChar(']')) { 1575 content.append(']'); 1576 while (fEntityScanner.skipChar(']')) { 1577 content.append(']'); 1578 } 1579 if (fEntityScanner.skipChar('>')) { 1580 reportFatalError("CDEndInContent", null); 1581 } 1582 } 1583 fInScanContent = false; 1584 c = -1; 1585 } 1586 if (fDocumentHandler != null && content.length > 0) { 1587 //fDocumentHandler.characters(content, null); 1588 } 1589 return c; 1590 1591 } // scanContent():int 1592 1593 1594 /** 1595 * Scans a CDATA section. 1596 * <p> 1597 * <strong>Note:</strong> This method uses the fTempString and 1598 * fStringBuffer variables. 1599 * 1600 * @param complete True if the CDATA section is to be scanned 1601 * completely. 1602 * 1603 * @return True if CDATA is completely scanned. 1604 */ 1605 //CHANGED: 1606 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1607 throws IOException, XNIException { 1608 1609 // call handler 1610 if (fDocumentHandler != null) { 1611 //fDocumentHandler.startCDATA(null); 1612 } 1613 1614 while (true) { 1615 //scanData will fill the contentBuffer 1616 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1617 break ; 1618 /** We dont need all this code if we pass ']]>' as delimeter.. 1619 * int brackets = 2; 1620 * while (fEntityScanner.skipChar(']')) { 1621 * brackets++; 1622 * } 1623 * 1624 * //When we find more than 2 square brackets 1625 * if (fDocumentHandler != null && brackets > 2) { 1626 * //we dont need to clear the buffer.. 1627 * //contentBuffer.clear(); 1628 * for (int i = 2; i < brackets; i++) { 1629 * contentBuffer.append(']'); 1630 * } 1631 * fDocumentHandler.characters(contentBuffer, null); 1632 * } 1633 * 1634 * if (fEntityScanner.skipChar('>')) { 1635 * break; 1636 * } 1637 * if (fDocumentHandler != null) { 1638 * //we dont need to clear the buffer now.. 1639 * //contentBuffer.clear(); 1640 * contentBuffer.append("]]"); 1641 * fDocumentHandler.characters(contentBuffer, null); 1642 * } 1643 **/ 1644 } else { 1645 int c = fEntityScanner.peekChar(); 1646 if (c != -1 && isInvalidLiteral(c)) { 1647 if (XMLChar.isHighSurrogate(c)) { 1648 //contentBuffer.clear(); 1649 //scan surrogates if any.... 1650 scanSurrogates(contentBuffer); 1651 } else { 1652 reportFatalError("InvalidCharInCDSect", 1653 new Object[]{Integer.toString(c,16)}); 1654 fEntityScanner.scanChar(); 1655 } 1656 } 1657 //by this time we have also read surrogate contents if any... 1658 if (fDocumentHandler != null) { 1659 //fDocumentHandler.characters(contentBuffer, null); 1660 } 1661 } 1662 } 1663 fMarkupDepth--; 1664 1665 if (fDocumentHandler != null && contentBuffer.length > 0) { 1666 //fDocumentHandler.characters(contentBuffer, null); 1667 } 1668 1669 // call handler 1670 if (fDocumentHandler != null) { 1671 //fDocumentHandler.endCDATA(null); 1672 } 1673 1674 return true; 1675 1676 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1677 1678 /** 1679 * Scans an end element. 1680 * <p> 1681 * <pre> 1682 * [42] ETag ::= '</' Name S? '>' 1683 * </pre> 1684 * <p> 1685 * <strong>Note:</strong> This method uses the fElementQName variable. 1686 * The contents of this variable will be destroyed. The caller should 1687 * copy the needed information out of this variable before calling 1688 * this method. 1689 * 1690 * @return The element depth. 1691 */ 1692 protected int scanEndElement() throws IOException, XNIException { 1693 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1694 1695 // pop context 1696 QName endElementName = fElementStack.popElement(); 1697 1698 String rawname = endElementName.rawname; 1699 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1700 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1701 //In scanners most of the time is consumed on checks done for XML characters, we can 1702 // optimize on it and avoid the checks done for endElement, 1703 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1704 1705 // this should work both for namespace processing true or false... 1706 1707 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1708 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1709 1710 if (!fEntityScanner.skipString(endElementName.rawname)) { 1711 reportFatalError("ETagRequired", new Object[]{rawname}); 1712 } 1713 1714 // end 1715 fEntityScanner.skipSpaces(); 1716 if (!fEntityScanner.skipChar('>')) { 1717 reportFatalError("ETagUnterminated", 1718 new Object[]{rawname}); 1719 } 1720 fMarkupDepth--; 1721 1722 //we have increased the depth for two markup "<" characters 1723 fMarkupDepth--; 1724 1725 // check that this element was opened in the same entity 1726 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1727 reportFatalError("ElementEntityMismatch", 1728 new Object[]{rawname}); 1729 } 1730 1731 //We should not be popping out the context here in endELement becaause the namespace context is still 1732 //valid when parser is at the endElement state. 1733 1734 //if (fNamespaces) { 1735 // fNamespaceContext.popContext(); 1736 //} 1737 1738 // call handler 1739 if (fDocumentHandler != null ) { 1740 //end element is scanned in this function so we can send a callback 1741 //here. 1742 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1743 1744 fDocumentHandler.endElement(endElementName, null); 1745 } 1746 if(dtdGrammarUtil != null) 1747 dtdGrammarUtil.endElement(endElementName); 1748 1749 return fMarkupDepth; 1750 1751 } // scanEndElement():int 1752 1753 /** 1754 * Scans a character reference. 1755 * <p> 1756 * <pre> 1757 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1758 * </pre> 1759 */ 1760 protected void scanCharReference() 1761 throws IOException, XNIException { 1762 1763 fStringBuffer2.clear(); 1764 int ch = scanCharReferenceValue(fStringBuffer2, null); 1765 fMarkupDepth--; 1766 if (ch != -1) { 1767 // call handler 1768 1769 if (fDocumentHandler != null) { 1770 if (fNotifyCharRefs) { 1771 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1772 } 1773 Augmentations augs = null; 1774 if (fValidation && ch <= 0x20) { 1775 if (fTempAugmentations != null) { 1776 fTempAugmentations.removeAllItems(); 1777 } 1778 else { 1779 fTempAugmentations = new AugmentationsImpl(); 1780 } 1781 augs = fTempAugmentations; 1782 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1783 } 1784 //xxx: How do we deal with this - how to return charReferenceValues 1785 //now this is being commented because this is taken care in scanDocument() 1786 //fDocumentHandler.characters(fStringBuffer2, null); 1787 if (fNotifyCharRefs) { 1788 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1789 } 1790 } 1791 } 1792 1793 } // scanCharReference() 1794 1795 1796 /** 1797 * Scans an entity reference. 1798 * 1799 * @return returns true if the new entity is started. If it was built-in entity 1800 * 'false' is returned. 1801 * @throws IOException Thrown if i/o error occurs. 1802 * @throws XNIException Thrown if handler throws exception upon 1803 * notification. 1804 */ 1805 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1806 String name = fEntityScanner.scanName(); 1807 if (name == null) { 1808 reportFatalError("NameRequiredInReference", null); 1809 return; 1810 } 1811 if (!fEntityScanner.skipChar(';')) { 1812 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1813 } 1814 if (fEntityStore.isUnparsedEntity(name)) { 1815 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1816 } 1817 fMarkupDepth--; 1818 fCurrentEntityName = name; 1819 1820 // handle built-in entities 1821 if (name == fAmpSymbol) { 1822 handleCharacter('&', fAmpSymbol, content); 1823 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1824 return ; 1825 } else if (name == fLtSymbol) { 1826 handleCharacter('<', fLtSymbol, content); 1827 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1828 return ; 1829 } else if (name == fGtSymbol) { 1830 handleCharacter('>', fGtSymbol, content); 1831 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1832 return ; 1833 } else if (name == fQuotSymbol) { 1834 handleCharacter('"', fQuotSymbol, content); 1835 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1836 return ; 1837 } else if (name == fAposSymbol) { 1838 handleCharacter('\'', fAposSymbol, content); 1839 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1840 return ; 1841 } 1842 1843 //1. if the entity is external and support to external entities is not required 1844 // 2. or entities should not be replaced 1845 //3. or if it is built in entity reference. 1846 if((fEntityStore.isExternalEntity(name) && !fSupportExternalEntities) || (!fEntityStore.isExternalEntity(name) && !fReplaceEntityReferences) || foundBuiltInRefs){ 1847 fScannerState = SCANNER_STATE_REFERENCE; 1848 return ; 1849 } 1850 // start general entity 1851 if (!fEntityStore.isDeclaredEntity(name)) { 1852 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1853 if (!fSupportDTD && fReplaceEntityReferences) { 1854 reportFatalError("EntityNotDeclared", new Object[]{name}); 1855 return; 1856 } 1857 //REVISIT: one more case needs to be included: external PE and standalone is no 1858 if ( fHasExternalDTD && !fStandalone) { 1859 if (fValidation) 1860 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1861 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1862 } else 1863 reportFatalError("EntityNotDeclared", new Object[]{name}); 1864 } 1865 //we are starting the entity even if the entity was not declared 1866 //if that was the case it its taken care in XMLEntityManager.startEntity() 1867 //we immediately call the endEntity. Application gets to know if there was 1868 //any entity that was not declared. 1869 fEntityManager.startEntity(name, false); 1870 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1871 //setScannerState(SCANNER_STATE_CONTENT); 1872 //return true ; 1873 } // scanEntityReference() 1874 1875 // utility methods 1876 1877 /** 1878 * Calls document handler with a single character resulting from 1879 * built-in entity resolution. 1880 * 1881 * @param c 1882 * @param entity built-in name 1883 * @param XMLStringBuffer append the character to buffer 1884 * 1885 * we really dont need to call this function -- this function is only required when 1886 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1887 * calling this function to hanlde built-in entity reference. 1888 * 1889 */ 1890 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1891 foundBuiltInRefs = true; 1892 content.append(c); 1893 if (fDocumentHandler != null) { 1894 fSingleChar[0] = c; 1895 if (fNotifyBuiltInRefs) { 1896 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1897 } 1898 fTempString.setValues(fSingleChar, 0, 1); 1899 //fDocumentHandler.characters(fTempString, null); 1900 1901 if (fNotifyBuiltInRefs) { 1902 fDocumentHandler.endGeneralEntity(entity, null); 1903 } 1904 } 1905 } // handleCharacter(char) 1906 1907 // helper methods 1908 1909 /** 1910 * Sets the scanner state. 1911 * 1912 * @param state The new scanner state. 1913 */ 1914 protected final void setScannerState(int state) { 1915 1916 fScannerState = state; 1917 if (DEBUG_SCANNER_STATE) { 1918 System.out.print("### setScannerState: "); 1919 //System.out.print(fScannerState); 1920 System.out.print(getScannerStateName(state)); 1921 System.out.println(); 1922 } 1923 1924 } // setScannerState(int) 1925 1926 1927 /** 1928 * Sets the Driver. 1929 * 1930 * @param Driver The new Driver. 1931 */ 1932 protected final void setDriver(Driver driver) { 1933 fDriver = driver; 1934 if (DEBUG_DISPATCHER) { 1935 System.out.print("%%% setDriver: "); 1936 System.out.print(getDriverName(driver)); 1937 System.out.println(); 1938 } 1939 } 1940 1941 // 1942 // Private methods 1943 // 1944 1945 /** Returns the scanner state name. */ 1946 protected String getScannerStateName(int state) { 1947 1948 switch (state) { 1949 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1950 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1951 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1952 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1953 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1954 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1955 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1956 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1957 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1958 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1959 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1960 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1961 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 1962 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 1963 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 1964 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 1965 } 1966 1967 return "??? ("+state+')'; 1968 1969 } // getScannerStateName(int):String 1970 public String getEntityName(){ 1971 //return the cached name 1972 return fCurrentEntityName; 1973 } 1974 1975 /** Returns the driver name. */ 1976 public String getDriverName(Driver driver) { 1977 1978 if (DEBUG_DISPATCHER) { 1979 if (driver != null) { 1980 String name = driver.getClass().getName(); 1981 int index = name.lastIndexOf('.'); 1982 if (index != -1) { 1983 name = name.substring(index + 1); 1984 index = name.lastIndexOf('$'); 1985 if (index != -1) { 1986 name = name.substring(index + 1); 1987 } 1988 } 1989 return name; 1990 } 1991 } 1992 return "null"; 1993 1994 } // getDriverName():String 1995 1996 // 1997 // Classes 1998 // 1999 2000 /** 2001 * @author Neeraj Bajaj, Sun Microsystems. 2002 */ 2003 protected static final class Element { 2004 2005 // 2006 // Data 2007 // 2008 2009 /** Symbol. */ 2010 public QName qname; 2011 2012 //raw name stored as characters 2013 public char[] fRawname; 2014 2015 /** The next Element entry. */ 2016 public Element next; 2017 2018 // 2019 // Constructors 2020 // 2021 2022 /** 2023 * Constructs a new Element from the given QName and next Element 2024 * reference. 2025 */ 2026 public Element(QName qname, Element next) { 2027 this.qname.setValues(qname); 2028 this.fRawname = qname.rawname.toCharArray(); 2029 this.next = next; 2030 } 2031 2032 } // class Element 2033 2034 /** 2035 * Element stack. 2036 * 2037 * @author Neeraj Bajaj, Sun Microsystems. 2038 */ 2039 protected class ElementStack2 { 2040 2041 // 2042 // Data 2043 // 2044 2045 /** The stack data. */ 2046 protected QName [] fQName = new QName[20]; 2047 2048 //Element depth 2049 protected int fDepth; 2050 //total number of elements 2051 protected int fCount; 2052 //current position 2053 protected int fPosition; 2054 //Mark refers to the position 2055 protected int fMark; 2056 2057 protected int fLastDepth ; 2058 2059 // 2060 // Constructors 2061 // 2062 2063 /** Default constructor. */ 2064 public ElementStack2() { 2065 for (int i = 0; i < fQName.length; i++) { 2066 fQName[i] = new QName(); 2067 } 2068 fMark = fPosition = 1; 2069 } // <init>() 2070 2071 public void resize(){ 2072 /** 2073 * int length = fElements.length; 2074 * Element [] temp = new Element[length * 2]; 2075 * System.arraycopy(fElements, 0, temp, 0, length); 2076 * fElements = temp; 2077 */ 2078 //resize QNames 2079 int oldLength = fQName.length; 2080 QName [] tmp = new QName[oldLength * 2]; 2081 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2082 fQName = tmp; 2083 2084 for (int i = oldLength; i < fQName.length; i++) { 2085 fQName[i] = new QName(); 2086 } 2087 2088 } 2089 2090 2091 // 2092 // Public methods 2093 // 2094 2095 /** Check if the element scanned during the start element 2096 *matches the stored element. 2097 * 2098 *@return true if the match suceeds. 2099 */ 2100 public boolean matchElement(QName element) { 2101 //last depth is the depth when last elemnt was pushed 2102 //if last depth is greater than current depth 2103 if(DEBUG_SKIP_ALGORITHM){ 2104 System.out.println("fLastDepth = " + fLastDepth); 2105 System.out.println("fDepth = " + fDepth); 2106 } 2107 boolean match = false; 2108 if(fLastDepth > fDepth && fDepth <= 2){ 2109 if(DEBUG_SKIP_ALGORITHM){ 2110 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2111 } 2112 if(element.rawname == fQName[fDepth].rawname){ 2113 fAdd = false; 2114 //mark this position 2115 //decrease the depth by 1 as arrays are 0 based 2116 fMark = fDepth - 1; 2117 //we found the match and from next element skipping will start, add 1 2118 fPosition = fMark + 1 ; 2119 match = true; 2120 //Once we get match decrease the count -- this was increased by nextElement() 2121 --fCount; 2122 if(DEBUG_SKIP_ALGORITHM){ 2123 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2124 System.out.println("fMark = " + fMark); 2125 System.out.println("fPosition = " + fPosition); 2126 System.out.println("fDepth = " + fDepth); 2127 System.out.println("fCount = " + fCount); 2128 } 2129 }else{ 2130 fAdd = true; 2131 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2132 } 2133 } 2134 //store the last depth 2135 fLastDepth = fDepth++; 2136 return match; 2137 } // pushElement(QName):QName 2138 2139 /** 2140 * This function doesn't increase depth. The function in this function is 2141 *broken down into two functions for efficiency. <@see>matchElement</see>. 2142 * This function just returns the pointer to the object and its values are set. 2143 * 2144 *@return QName reference to the next element in the list 2145 */ 2146 public QName nextElement() { 2147 2148 //if number of elements becomes equal to the length of array -- stop the skipping 2149 if (fCount == fQName.length) { 2150 fShouldSkip = false; 2151 fAdd = false; 2152 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2153 //xxx: this is not correct, we are returning the last element 2154 //this wont make any difference since flag has been set to 'false' 2155 return fQName[--fCount]; 2156 } 2157 if(DEBUG_SKIP_ALGORITHM){ 2158 System.out.println("fCount = " + fCount); 2159 } 2160 return fQName[fCount++]; 2161 2162 } 2163 2164 /** Note that this function is considerably different than nextElement() 2165 * This function just returns the previously stored elements 2166 */ 2167 public QName getNext(){ 2168 //when position reaches number of elements in the list.. 2169 //set the position back to mark, making it a circular linked list. 2170 if(fPosition == fCount){ 2171 fPosition = fMark; 2172 } 2173 return fQName[fPosition++]; 2174 } 2175 2176 /** returns the current depth 2177 */ 2178 public int popElement(){ 2179 return fDepth--; 2180 } 2181 2182 2183 /** Clears the stack without throwing away existing QName objects. */ 2184 public void clear() { 2185 fLastDepth = 0; 2186 fDepth = 0; 2187 fCount = 0 ; 2188 fPosition = fMark = 1; 2189 } // clear() 2190 2191 } // class ElementStack 2192 2193 /** 2194 * Element stack. This stack operates without synchronization, error 2195 * checking, and it re-uses objects instead of throwing popped items 2196 * away. 2197 * 2198 * @author Andy Clark, IBM 2199 */ 2200 protected class ElementStack { 2201 2202 // 2203 // Data 2204 // 2205 2206 /** The stack data. */ 2207 protected QName[] fElements; 2208 protected int [] fInt = new int[20]; 2209 2210 2211 //Element depth 2212 protected int fDepth; 2213 //total number of elements 2214 protected int fCount; 2215 //current position 2216 protected int fPosition; 2217 //Mark refers to the position 2218 protected int fMark; 2219 2220 protected int fLastDepth ; 2221 2222 // 2223 // Constructors 2224 // 2225 2226 /** Default constructor. */ 2227 public ElementStack() { 2228 fElements = new QName[20]; 2229 for (int i = 0; i < fElements.length; i++) { 2230 fElements[i] = new QName(); 2231 } 2232 } // <init>() 2233 2234 // 2235 // Public methods 2236 // 2237 2238 /** 2239 * Pushes an element on the stack. 2240 * <p> 2241 * <strong>Note:</strong> The QName values are copied into the 2242 * stack. In other words, the caller does <em>not</em> orphan 2243 * the element to the stack. Also, the QName object returned 2244 * is <em>not</em> orphaned to the caller. It should be 2245 * considered read-only. 2246 * 2247 * @param element The element to push onto the stack. 2248 * 2249 * @return Returns the actual QName object that stores the 2250 */ 2251 //XXX: THIS FUNCTION IS NOT USED 2252 public QName pushElement(QName element) { 2253 if (fDepth == fElements.length) { 2254 QName[] array = new QName[fElements.length * 2]; 2255 System.arraycopy(fElements, 0, array, 0, fDepth); 2256 fElements = array; 2257 for (int i = fDepth; i < fElements.length; i++) { 2258 fElements[i] = new QName(); 2259 } 2260 } 2261 fElements[fDepth].setValues(element); 2262 return fElements[fDepth++]; 2263 } // pushElement(QName):QName 2264 2265 2266 /** Note that this function is considerably different than nextElement() 2267 * This function just returns the previously stored elements 2268 */ 2269 public QName getNext(){ 2270 //when position reaches number of elements in the list.. 2271 //set the position back to mark, making it a circular linked list. 2272 if(fPosition == fCount){ 2273 fPosition = fMark; 2274 } 2275 //store the position of last opened tag at particular depth 2276 //fInt[++fDepth] = fPosition; 2277 if(DEBUG_SKIP_ALGORITHM){ 2278 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2279 } 2280 //return fElements[fPosition++]; 2281 return fElements[fPosition]; 2282 } 2283 2284 /** This function should be called only when element was skipped sucessfully. 2285 * 1. Increase the depth - because element was sucessfully skipped. 2286 *2. Store the position of the element token in array "last opened tag" at depth. 2287 *3. increase the position counter so as to point to the next element in the array 2288 */ 2289 public void push(){ 2290 2291 fInt[++fDepth] = fPosition++; 2292 } 2293 2294 /** Check if the element scanned during the start element 2295 *matches the stored element. 2296 * 2297 *@return true if the match suceeds. 2298 */ 2299 public boolean matchElement(QName element) { 2300 //last depth is the depth when last elemnt was pushed 2301 //if last depth is greater than current depth 2302 //if(DEBUG_SKIP_ALGORITHM){ 2303 // System.out.println("Check if the element " + element.rawname + " matches"); 2304 // System.out.println("fLastDepth = " + fLastDepth); 2305 // System.out.println("fDepth = " + fDepth); 2306 //} 2307 boolean match = false; 2308 if(fLastDepth > fDepth && fDepth <= 3){ 2309 if(DEBUG_SKIP_ALGORITHM){ 2310 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2311 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2312 } 2313 if(element.rawname == fElements[fDepth - 1].rawname){ 2314 fAdd = false; 2315 //mark this position 2316 //decrease the depth by 1 as arrays are 0 based 2317 fMark = fDepth - 1; 2318 //we found the match 2319 fPosition = fMark; 2320 match = true; 2321 //Once we get match decrease the count -- this was increased by nextElement() 2322 --fCount; 2323 if(DEBUG_SKIP_ALGORITHM){ 2324 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2325 System.out.println("fMark = " + fMark); 2326 System.out.println("fPosition = " + fPosition); 2327 System.out.println("fDepth = " + fDepth); 2328 System.out.println("fCount = " + fCount); 2329 System.out.println("---------MATCH SUCEEDED-----------------"); 2330 System.out.println(""); 2331 } 2332 }else{ 2333 fAdd = true; 2334 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2335 } 2336 } 2337 //store the position for the current depth 2338 //when we are adding the elements, when skipping 2339 //starts even then this should be tracked ie. when 2340 //calling getNext() 2341 if(match){ 2342 //from next element skipping will start, add 1 2343 fInt[fDepth] = fPosition++; 2344 } else{ 2345 if(DEBUG_SKIP_ALGORITHM){ 2346 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2347 } 2348 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2349 fInt[fDepth] = fCount - 1; 2350 } 2351 2352 //if number of elements becomes equal to the length of array -- stop the skipping 2353 //xxx: should we do "fCount == fInt.length" 2354 if (fCount == fElements.length) { 2355 fSkip = false; 2356 fAdd = false; 2357 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2358 reposition(); 2359 if(DEBUG_SKIP_ALGORITHM){ 2360 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2361 System.out.println("REPOSITIONING THE STACK"); 2362 System.out.println("-----------SKIPPING STOPPED----------"); 2363 System.out.println(""); 2364 } 2365 return false; 2366 } 2367 if(DEBUG_SKIP_ALGORITHM){ 2368 if(match){ 2369 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2370 }else{ 2371 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2372 } 2373 } 2374 //store the last depth 2375 fLastDepth = fDepth; 2376 return match; 2377 } // matchElement(QName):QName 2378 2379 2380 /** 2381 * Returns the next element on the stack. 2382 * 2383 * @return Returns the actual QName object. Callee should 2384 * use this object to store the details of next element encountered. 2385 */ 2386 public QName nextElement() { 2387 if(fSkip){ 2388 fDepth++; 2389 //boundary checks are done in matchElement() 2390 return fElements[fCount++]; 2391 } else if (fDepth == fElements.length) { 2392 QName[] array = new QName[fElements.length * 2]; 2393 System.arraycopy(fElements, 0, array, 0, fDepth); 2394 fElements = array; 2395 for (int i = fDepth; i < fElements.length; i++) { 2396 fElements[i] = new QName(); 2397 } 2398 } 2399 2400 return fElements[fDepth++]; 2401 2402 } // pushElement(QName):QName 2403 2404 2405 /** 2406 * Pops an element off of the stack by setting the values of 2407 * the specified QName. 2408 * <p> 2409 * <strong>Note:</strong> The object returned is <em>not</em> 2410 * orphaned to the caller. Therefore, the caller should consider 2411 * the object to be read-only. 2412 */ 2413 public QName popElement() { 2414 //return the same object that was pushed -- this would avoid 2415 //setting the values for every end element. 2416 //STRONG: this object is read only -- this object reference shouldn't be stored. 2417 if(fSkip || fAdd ){ 2418 if(DEBUG_SKIP_ALGORITHM){ 2419 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2420 System.out.println(""); 2421 } 2422 return fElements[fInt[fDepth--]]; 2423 } else{ 2424 if(DEBUG_SKIP_ALGORITHM){ 2425 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2426 } 2427 return fElements[--fDepth] ; 2428 } 2429 //element.setValues(fElements[--fDepth]); 2430 } // popElement(QName) 2431 2432 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2433 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2434 *as per the depth. 2435 */ 2436 public void reposition(){ 2437 for( int i = 2 ; i <= fDepth ; i++){ 2438 fElements[i-1] = fElements[fInt[i]]; 2439 } 2440 if(DEBUG_SKIP_ALGORITHM){ 2441 for( int i = 0 ; i < fDepth ; i++){ 2442 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2443 } 2444 } 2445 } 2446 2447 /** Clears the stack without throwing away existing QName objects. */ 2448 public void clear() { 2449 fDepth = 0; 2450 fLastDepth = 0; 2451 fCount = 0 ; 2452 fPosition = fMark = 1; 2453 2454 } // clear() 2455 2456 /** 2457 * This function is as a result of optimization done for endElement -- 2458 * we dont need to set the value for every end element encouterd. 2459 * For Well formedness checks we can have the same QName object that was pushed. 2460 * the values will be set only if application need to know about the endElement 2461 * -- neeraj.bajaj@sun.com 2462 */ 2463 2464 public QName getLastPoppedElement(){ 2465 return fElements[fDepth]; 2466 } 2467 } // class ElementStack 2468 2469 /** 2470 * Drives the parser to the next state/event on the input. Parser is guaranteed 2471 * to stop at the next state/event. 2472 * 2473 * Internally XML document is divided into several states. Each state represents 2474 * a sections of XML document. When this functions returns normally, it has read 2475 * the section of XML document and returns the state corresponding to section of 2476 * document which has been read. For optimizations, a particular driver 2477 * can read ahead of the section of document (state returned) just read and 2478 * can maintain a different internal state. 2479 * 2480 * 2481 * @author Neeraj Bajaj, Sun Microsystems 2482 */ 2483 protected interface Driver { 2484 2485 2486 /** 2487 * Drives the parser to the next state/event on the input. Parser is guaranteed 2488 * to stop at the next state/event. 2489 * 2490 * Internally XML document is divided into several states. Each state represents 2491 * a sections of XML document. When this functions returns normally, it has read 2492 * the section of XML document and returns the state corresponding to section of 2493 * document which has been read. For optimizations, a particular driver 2494 * can read ahead of the section of document (state returned) just read and 2495 * can maintain a different internal state. 2496 * 2497 * @return state representing the section of document just read. 2498 * 2499 * @throws IOException Thrown on i/o error. 2500 * @throws XNIException Thrown on parse error. 2501 */ 2502 2503 public int next() throws IOException, XNIException; 2504 2505 } // interface Driver 2506 2507 /** 2508 * Driver to handle content scanning. This driver is capable of reading 2509 * the fragment of XML document. When it has finished reading fragment 2510 * of XML documents, it can pass the job of reading to another driver. 2511 * 2512 * This class has been modified as per the new design which is more suited to 2513 * efficiently build pull parser. Lot of performance improvements have been done and 2514 * the code has been added to support stax functionality/features. 2515 * 2516 * @author Neeraj Bajaj, Sun Microsystems 2517 * 2518 * 2519 * @author Andy Clark, IBM 2520 * @author Eric Ye, IBM 2521 */ 2522 protected class FragmentContentDriver 2523 implements Driver { 2524 2525 // 2526 // Driver methods 2527 // 2528 private boolean fContinueDispatching = true; 2529 private boolean fScanningForMarkup = true; 2530 2531 /** 2532 * decides the appropriate state of the parser 2533 */ 2534 private void startOfMarkup() throws IOException { 2535 fMarkupDepth++; 2536 final int ch = fEntityScanner.peekChar(); 2537 2538 switch(ch){ 2539 case '?' :{ 2540 setScannerState(SCANNER_STATE_PI); 2541 fEntityScanner.skipChar(ch); 2542 break; 2543 } 2544 case '!' :{ 2545 fEntityScanner.skipChar(ch); 2546 if (fEntityScanner.skipChar('-')) { 2547 if (!fEntityScanner.skipChar('-')) { 2548 reportFatalError("InvalidCommentStart", 2549 null); 2550 } 2551 setScannerState(SCANNER_STATE_COMMENT); 2552 } else if (fEntityScanner.skipString(cdata)) { 2553 setScannerState(SCANNER_STATE_CDATA ); 2554 } else if (!scanForDoctypeHook()) { 2555 reportFatalError("MarkupNotRecognizedInContent", 2556 null); 2557 } 2558 break; 2559 } 2560 case '/' :{ 2561 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2562 fEntityScanner.skipChar(ch); 2563 break; 2564 } 2565 default :{ 2566 if (isValidNameStartChar(ch)) { 2567 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2568 } else { 2569 reportFatalError("MarkupNotRecognizedInContent", 2570 null); 2571 } 2572 } 2573 } 2574 2575 }//startOfMarkup 2576 2577 private void startOfContent() throws IOException { 2578 if (fEntityScanner.skipChar('<')) { 2579 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2580 } else if (fEntityScanner.skipChar('&')) { 2581 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2582 } else { 2583 //element content is there.. 2584 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2585 } 2586 }//startOfContent 2587 2588 2589 /** 2590 * 2591 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2592 * At any point of time when in doubt over the current state of the parser, the state should be 2593 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2594 * the parser to one of its sub state. 2595 * sub states are defined in the parser on the basis of different XML component like 2596 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2597 * These sub states help the parser to have fine control over the parsing. These are the 2598 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2599 * decided if paresr needs to stop at next milepost ?? 2600 * 2601 */ 2602 public void decideSubState() throws IOException { 2603 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2604 2605 switch (fScannerState) { 2606 2607 case SCANNER_STATE_CONTENT: { 2608 startOfContent() ; 2609 break; 2610 } 2611 2612 case SCANNER_STATE_START_OF_MARKUP: { 2613 startOfMarkup() ; 2614 break; 2615 } 2616 } 2617 } 2618 }//decideSubState 2619 2620 /** 2621 * Drives the parser to the next state/event on the input. Parser is guaranteed 2622 * to stop at the next state/event. Internally XML document 2623 * is divided into several states. Each state represents a sections of XML 2624 * document. When this functions returns normally, it has read the section 2625 * of XML document and returns the state corresponding to section of 2626 * document which has been read. For optimizations, a particular driver 2627 * can read ahead of the section of document (state returned) just read and 2628 * can maintain a different internal state. 2629 * 2630 * State returned corresponds to Stax states. 2631 * 2632 * @return state representing the section of document just read. 2633 * 2634 * @throws IOException Thrown on i/o error. 2635 * @throws XNIException Thrown on parse error. 2636 */ 2637 2638 public int next() throws IOException, XNIException { 2639 while (true) { 2640 try { 2641 if(DEBUG_NEXT){ 2642 System.out.println("NOW IN FragmentContentDriver"); 2643 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2644 } 2645 2646 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2647 //decideSubState. 2648 2649 switch (fScannerState) { 2650 case SCANNER_STATE_CONTENT: { 2651 final int ch = fEntityScanner.peekChar(); 2652 if (ch == '<') { 2653 fEntityScanner.scanChar(); 2654 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2655 } else if (ch == '&') { 2656 fEntityScanner.scanChar(); 2657 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2658 break; 2659 } else { 2660 //element content is there.. 2661 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2662 break; 2663 } 2664 } 2665 2666 case SCANNER_STATE_START_OF_MARKUP: { 2667 startOfMarkup(); 2668 break; 2669 }//case: SCANNER_STATE_START_OF_MARKUP 2670 2671 }//end of switch 2672 //decideSubState() ; 2673 2674 //do some special handling if isCoalesce is set to true. 2675 if(fIsCoalesce){ 2676 fUsebuffer = true ; 2677 //if the last section was character data 2678 if(fLastSectionWasCharacterData){ 2679 2680 //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2681 //return the last scanned charactrer data. 2682 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2683 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2684 fLastSectionWasCharacterData = false; 2685 return XMLEvent.CHARACTERS; 2686 } 2687 }//if last section was CDATA or ENTITY REFERENCE 2688 //xxx: there might be another entity reference or CDATA after this 2689 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2690 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2691 //and current state is not SCANNER_STATE_CHARACTER_DATA 2692 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2693 //this means there is nothing more to be coalesced. 2694 //return the CHARACTERS event. 2695 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2696 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2697 2698 fLastSectionWasCData = false; 2699 fLastSectionWasEntityReference = false; 2700 return XMLEvent.CHARACTERS; 2701 } 2702 } 2703 } 2704 2705 2706 if(DEBUG_NEXT){ 2707 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2708 } 2709 2710 switch(fScannerState){ 2711 2712 case XMLEvent.START_DOCUMENT : 2713 return XMLEvent.START_DOCUMENT; 2714 2715 case SCANNER_STATE_START_ELEMENT_TAG :{ 2716 2717 //xxx this function returns true when element is empty.. can be linked to end element event. 2718 //returns true if the element is empty 2719 fEmptyElement = scanStartElement() ; 2720 //if the element is empty the next event is "end element" 2721 if(fEmptyElement){ 2722 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2723 }else{ 2724 //set the next possible state 2725 setScannerState(SCANNER_STATE_CONTENT); 2726 } 2727 return XMLEvent.START_ELEMENT ; 2728 } 2729 2730 case SCANNER_STATE_CHARACTER_DATA: { 2731 if(DEBUG_COALESCE){ 2732 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2733 System.out.println("fIsCoalesce = " + fIsCoalesce); 2734 } 2735 //if last section was either entity reference or cdata or character data we should be using buffer 2736 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2737 2738 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2739 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2740 fLastSectionWasEntityReference = false; 2741 fLastSectionWasCData = false; 2742 fLastSectionWasCharacterData = true ; 2743 fUsebuffer = true; 2744 }else{ 2745 //clear the buffer 2746 fContentBuffer.clear(); 2747 } 2748 2749 //set the fTempString length to 0 before passing it on to scanContent 2750 //scanContent sets the correct co-ordinates as per the content read 2751 fTempString.length = 0; 2752 int c = fEntityScanner.scanContent(fTempString); 2753 if(DEBUG){ 2754 System.out.println("fTempString = " + fTempString); 2755 } 2756 if(fEntityScanner.skipChar('<')){ 2757 //check if we have reached end of element 2758 if(fEntityScanner.skipChar('/')){ 2759 //increase the mark up depth 2760 fMarkupDepth++; 2761 fLastSectionWasCharacterData = false; 2762 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2763 //check if its start of new element 2764 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2765 fMarkupDepth++; 2766 fLastSectionWasCharacterData = false; 2767 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2768 }else{ 2769 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2770 //there can be cdata ahead if coalesce is true we should call again 2771 if(fIsCoalesce){ 2772 fUsebuffer = true; 2773 fLastSectionWasCharacterData = true; 2774 fContentBuffer.append(fTempString); 2775 fTempString.length = 0; 2776 continue; 2777 } 2778 } 2779 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2780 if(fUsebuffer){ 2781 fContentBuffer.append(fTempString); 2782 fTempString.length = 0; 2783 } 2784 if(DEBUG){ 2785 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2786 } 2787 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2788 if(DEBUG)System.out.println("Return SPACE EVENT"); 2789 return XMLEvent.SPACE; 2790 }else 2791 return XMLEvent.CHARACTERS; 2792 2793 } else{ 2794 fUsebuffer = true ; 2795 if(DEBUG){ 2796 System.out.println("fContentBuffer = " + fContentBuffer); 2797 System.out.println("fTempString = " + fTempString); 2798 } 2799 fContentBuffer.append(fTempString); 2800 fTempString.length = 0; 2801 } 2802 if (c == '\r') { 2803 if(DEBUG){ 2804 System.out.println("'\r' character found"); 2805 } 2806 // happens when there is the character reference 2807 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2808 fEntityScanner.scanChar(); 2809 fUsebuffer = true; 2810 fContentBuffer.append((char)c); 2811 c = -1 ; 2812 } else if (c == ']') { 2813 //fStringBuffer.clear(); 2814 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2815 fUsebuffer = true; 2816 fContentBuffer.append((char)fEntityScanner.scanChar()); 2817 // remember where we are in case we get an endEntity before we 2818 // could flush the buffer out - this happens when we're parsing an 2819 // entity which ends with a ] 2820 fInScanContent = true; 2821 2822 // We work on a single character basis to handle cases such as: 2823 // ']]]>' which we might otherwise miss. 2824 // 2825 if (fEntityScanner.skipChar(']')) { 2826 fContentBuffer.append(']'); 2827 while (fEntityScanner.skipChar(']')) { 2828 fContentBuffer.append(']'); 2829 } 2830 if (fEntityScanner.skipChar('>')) { 2831 reportFatalError("CDEndInContent", null); 2832 } 2833 } 2834 c = -1 ; 2835 fInScanContent = false; 2836 } 2837 2838 do{ 2839 //xxx: we should be using only one buffer.. 2840 // we need not to grow the buffer only when isCoalesce() is not true; 2841 2842 if (c == '<') { 2843 fEntityScanner.scanChar(); 2844 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2845 break; 2846 }//xxx what should be the behavior if entity reference is present in the content ? 2847 else if (c == '&') { 2848 fEntityScanner.scanChar(); 2849 setScannerState(SCANNER_STATE_REFERENCE); 2850 break; 2851 }///xxx since this part is also characters, it should be merged... 2852 else if (c != -1 && isInvalidLiteral(c)) { 2853 if (XMLChar.isHighSurrogate(c)) { 2854 // special case: surrogates 2855 scanSurrogates(fContentBuffer) ; 2856 setScannerState(SCANNER_STATE_CONTENT); 2857 } else { 2858 reportFatalError("InvalidCharInContent", 2859 new Object[] { 2860 Integer.toString(c, 16)}); 2861 fEntityScanner.scanChar(); 2862 } 2863 break; 2864 } 2865 //xxx: scanContent also gives character callback. 2866 c = scanContent(fContentBuffer) ; 2867 //we should not be iterating again if fIsCoalesce is not set to true 2868 2869 if(!fIsCoalesce){ 2870 setScannerState(SCANNER_STATE_CONTENT); 2871 break; 2872 } 2873 2874 }while(true); 2875 2876 //if (fDocumentHandler != null) { 2877 // fDocumentHandler.characters(fContentBuffer, null); 2878 //} 2879 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2880 //if fIsCoalesce is true there might be more data so call fDriver.next() 2881 if(fIsCoalesce){ 2882 fLastSectionWasCharacterData = true ; 2883 continue; 2884 }else{ 2885 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2886 if(DEBUG)System.out.println("Return SPACE EVENT"); 2887 return XMLEvent.SPACE; 2888 } else 2889 return XMLEvent.CHARACTERS ; 2890 } 2891 } 2892 2893 case SCANNER_STATE_END_ELEMENT_TAG :{ 2894 if(fEmptyElement){ 2895 //set it back to false. 2896 fEmptyElement = false; 2897 setScannerState(SCANNER_STATE_CONTENT); 2898 //check the case when there is comment after single element document 2899 //<foo/> and some comment after this 2900 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2901 2902 } else if(scanEndElement() == 0) { 2903 //It is last element of the document 2904 if (elementDepthIsZeroHook()) { 2905 //if element depth is zero , it indicates the end of the document 2906 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2907 //xxx understand this point once again.. 2908 return XMLEvent.END_ELEMENT ; 2909 } 2910 2911 } 2912 setScannerState(SCANNER_STATE_CONTENT); 2913 return XMLEvent.END_ELEMENT ; 2914 } 2915 2916 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2917 scanComment(); 2918 setScannerState(SCANNER_STATE_CONTENT); 2919 return XMLEvent.COMMENT; 2920 //break; 2921 } 2922 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2923 //clear the buffer first 2924 fContentBuffer.clear() ; 2925 //xxx: which buffer should be passed. Ideally we shouldn't have 2926 //more than two buffers -- 2927 //xxx: where should we add the switch for buffering. 2928 scanPI(fContentBuffer); 2929 setScannerState(SCANNER_STATE_CONTENT); 2930 return XMLEvent.PROCESSING_INSTRUCTION; 2931 //break; 2932 } 2933 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2934 //xxx: What if CDATA is the first event 2935 //<foo><![CDATA[hello<><>]]>append</foo> 2936 2937 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 2938 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2939 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 2940 fLastSectionWasCData = true ; 2941 fLastSectionWasEntityReference = false; 2942 fLastSectionWasCharacterData = false; 2943 }//if we dont need to coalesce clear the buffer 2944 else{ 2945 fContentBuffer.clear(); 2946 } 2947 fUsebuffer = true; 2948 //CDATA section is completely read in all the case. 2949 scanCDATASection(fContentBuffer , true); 2950 setScannerState(SCANNER_STATE_CONTENT); 2951 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 2952 //and just call fDispatche.next(). Since we have set the scanner state to 2953 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 2954 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 2955 //2. Check if application has set for reporting CDATA event 2956 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 2957 //return the cdata event as characters. 2958 if(fIsCoalesce){ 2959 fLastSectionWasCData = true ; 2960 //there might be more data to coalesce. 2961 continue; 2962 }else if(fReportCdataEvent){ 2963 return XMLEvent.CDATA; 2964 } else{ 2965 return XMLEvent.CHARACTERS; 2966 } 2967 } 2968 2969 case SCANNER_STATE_REFERENCE :{ 2970 fMarkupDepth++; 2971 foundBuiltInRefs = false; 2972 2973 //we should not clear the buffer only when the last state was either CDATA or 2974 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2975 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 2976 //fLastSectionWasEntityReference or fLastSectionWasCData are only 2977 //used when fIsCoalesce is set to true. 2978 fLastSectionWasEntityReference = true ; 2979 fLastSectionWasCData = false; 2980 fLastSectionWasCharacterData = false; 2981 }//if we dont need to coalesce clear the buffer 2982 else{ 2983 fContentBuffer.clear(); 2984 } 2985 fUsebuffer = true ; 2986 //take care of character reference 2987 if (fEntityScanner.skipChar('#')) { 2988 scanCharReferenceValue(fContentBuffer, null); 2989 fMarkupDepth--; 2990 if(!fIsCoalesce){ 2991 setScannerState(SCANNER_STATE_CONTENT); 2992 return XMLEvent.CHARACTERS; 2993 } 2994 } else { 2995 // this function also starts new entity 2996 scanEntityReference(fContentBuffer); 2997 //if there was built-in entity reference & coalesce is not true 2998 //return CHARACTERS 2999 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3000 setScannerState(SCANNER_STATE_CONTENT); 3001 return XMLEvent.CHARACTERS; 3002 } 3003 3004 //if there was a text declaration, call next() it will be taken care. 3005 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3006 fLastSectionWasEntityReference = true ; 3007 continue; 3008 } 3009 3010 if(fScannerState == SCANNER_STATE_REFERENCE){ 3011 setScannerState(SCANNER_STATE_CONTENT); 3012 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3013 // Skip the entity reference, we don't care 3014 continue; 3015 } 3016 return XMLEvent.ENTITY_REFERENCE; 3017 } 3018 } 3019 //Wether it was character reference, entity reference or built-in entity 3020 //set the next possible state to SCANNER_STATE_CONTENT 3021 setScannerState(SCANNER_STATE_CONTENT); 3022 fLastSectionWasEntityReference = true ; 3023 continue; 3024 } 3025 3026 case SCANNER_STATE_TEXT_DECL: { 3027 // scan text decl 3028 if (fEntityScanner.skipString("<?xml")) { 3029 fMarkupDepth++; 3030 // NOTE: special case where entity starts with a PI 3031 // whose name starts with "xml" (e.g. "xmlfoo") 3032 if (isValidNameChar(fEntityScanner.peekChar())) { 3033 fStringBuffer.clear(); 3034 fStringBuffer.append("xml"); 3035 3036 if (fNamespaces) { 3037 while (isValidNCName(fEntityScanner.peekChar())) { 3038 fStringBuffer.append((char)fEntityScanner.scanChar()); 3039 } 3040 } else { 3041 while (isValidNameChar(fEntityScanner.peekChar())) { 3042 fStringBuffer.append((char)fEntityScanner.scanChar()); 3043 } 3044 } 3045 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3046 fContentBuffer.clear(); 3047 scanPIData(target, fContentBuffer); 3048 } 3049 3050 // standard text declaration 3051 else { 3052 //xxx: this function gives callback 3053 scanXMLDeclOrTextDecl(true); 3054 } 3055 } 3056 // now that we've straightened out the readers, we can read in chunks: 3057 fEntityManager.fCurrentEntity.mayReadChunks = true; 3058 setScannerState(SCANNER_STATE_CONTENT); 3059 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3060 //it seems we have to careful when to allow function issue a callback 3061 //and when to allow adapter issue a callback. 3062 continue; 3063 } 3064 3065 3066 case SCANNER_STATE_ROOT_ELEMENT: { 3067 if (scanRootElementHook()) { 3068 fEmptyElement = true; 3069 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3070 return XMLEvent.START_ELEMENT; 3071 } 3072 setScannerState(SCANNER_STATE_CONTENT); 3073 return XMLEvent.START_ELEMENT ; 3074 } 3075 case SCANNER_STATE_CHAR_REFERENCE : { 3076 fContentBuffer.clear(); 3077 scanCharReferenceValue(fContentBuffer, null); 3078 fMarkupDepth--; 3079 setScannerState(SCANNER_STATE_CONTENT); 3080 return XMLEvent.CHARACTERS; 3081 } 3082 default: 3083 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3084 3085 }//switch 3086 } 3087 // premature end of file 3088 catch (EOFException e) { 3089 endOfFileHook(e); 3090 return -1; 3091 } 3092 } //while loop 3093 }//next 3094 3095 3096 // 3097 // Protected methods 3098 // 3099 3100 // hooks 3101 3102 // NOTE: These hook methods are added so that the full document 3103 // scanner can share the majority of code with this class. 3104 3105 /** 3106 * Scan for DOCTYPE hook. This method is a hook for subclasses 3107 * to add code to handle scanning for a the "DOCTYPE" string 3108 * after the string "<!" has been scanned. 3109 * 3110 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3111 * was not scanned. 3112 */ 3113 protected boolean scanForDoctypeHook() 3114 throws IOException, XNIException { 3115 return false; 3116 } // scanForDoctypeHook():boolean 3117 3118 /** 3119 * Element depth iz zero. This methos is a hook for subclasses 3120 * to add code to handle when the element depth hits zero. When 3121 * scanning a document fragment, an element depth of zero is 3122 * normal. However, when scanning a full XML document, the 3123 * scanner must handle the trailing miscellanous section of 3124 * the document after the end of the document's root element. 3125 * 3126 * @return True if the caller should stop and return true which 3127 * allows the scanner to switch to a new scanning 3128 * driver. A return value of false indicates that 3129 * the content driver should continue as normal. 3130 */ 3131 protected boolean elementDepthIsZeroHook() 3132 throws IOException, XNIException { 3133 return false; 3134 } // elementDepthIsZeroHook():boolean 3135 3136 /** 3137 * Scan for root element hook. This method is a hook for 3138 * subclasses to add code that handles scanning for the root 3139 * element. When scanning a document fragment, there is no 3140 * "root" element. However, when scanning a full XML document, 3141 * the scanner must handle the root element specially. 3142 * 3143 * @return True if the caller should stop and return true which 3144 * allows the scanner to switch to a new scanning 3145 * driver. A return value of false indicates that 3146 * the content driver should continue as normal. 3147 */ 3148 protected boolean scanRootElementHook() 3149 throws IOException, XNIException { 3150 return false; 3151 } // scanRootElementHook():boolean 3152 3153 /** 3154 * End of file hook. This method is a hook for subclasses to 3155 * add code that handles the end of file. The end of file in 3156 * a document fragment is OK if the markup depth is zero. 3157 * However, when scanning a full XML document, an end of file 3158 * is always premature. 3159 */ 3160 protected void endOfFileHook(EOFException e) 3161 throws IOException, XNIException { 3162 3163 // NOTE: An end of file is only only an error if we were 3164 // in the middle of scanning some markup. -Ac 3165 if (fMarkupDepth != 0) { 3166 reportFatalError("PrematureEOF", null); 3167 } 3168 3169 } // endOfFileHook() 3170 3171 } // class FragmentContentDriver 3172 3173 static void pr(String str) { 3174 System.out.println(str) ; 3175 } 3176 3177 protected boolean fUsebuffer ; 3178 3179 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3180 * maintained for attributes. 3181 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3182 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3183 * XMLString. 3184 * 3185 * @return XMLString XMLString used to store an attribute value. 3186 */ 3187 3188 protected XMLString getString(){ 3189 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3190 return (XMLString)attributeValueCache.get(fAttributeCacheUsedCount++); 3191 } else{ 3192 XMLString str = new XMLString(); 3193 fAttributeCacheUsedCount++; 3194 attributeValueCache.add(str); 3195 return str; 3196 } 3197 } 3198 3199 /** 3200 * Implements XMLBufferListener interface. 3201 */ 3202 3203 public void refresh(){ 3204 refresh(0); 3205 } 3206 3207 /** 3208 * receives callbacks from {@link XMLEntityReader } when buffer 3209 * is being changed. 3210 * @param refreshPosition 3211 */ 3212 public void refresh(int refreshPosition){ 3213 //If you are reading attributes and you got a callback 3214 //cache available attributes. 3215 if(fReadingAttributes){ 3216 fAttributes.refresh(); 3217 } 3218 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3219 //since fTempString directly matches to the underlying main buffer 3220 //store the data into buffer 3221 fContentBuffer.append(fTempString); 3222 //clear the XMLString so that data can't be added again. 3223 fTempString.length = 0; 3224 fUsebuffer = true; 3225 } 3226 } 3227 3228 } // class XMLDocumentFragmentScannerImpl