1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.xml.internal.stream.XMLBufferListener; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import com.sun.xml.internal.stream.XMLInputFactoryImpl; 27 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 28 29 import java.io.EOFException; 30 import java.io.IOException; 31 import javax.xml.stream.XMLInputFactory; 32 import javax.xml.stream.events.XMLEvent; 33 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 34 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 35 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 36 import com.sun.org.apache.xerces.internal.util.XMLChar; 37 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 38 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 39 import com.sun.org.apache.xerces.internal.xni.QName; 40 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 41 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 42 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 43 import com.sun.org.apache.xerces.internal.xni.XMLString; 44 import com.sun.org.apache.xerces.internal.xni.XNIException; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 46 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 47 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 50 import com.sun.org.apache.xerces.internal.xni.Augmentations; 51 import com.sun.org.apache.xerces.internal.impl.Constants; 52 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 53 import com.sun.org.apache.xerces.internal.util.SecurityManager; 54 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 55 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 56 import javax.xml.stream.XMLStreamConstants; 57 import javax.xml.stream.events.XMLEvent; 58 59 /** 60 * 61 * This class is responsible for scanning the structure and content 62 * of document fragments. 63 * 64 * This class has been modified as per the new design which is more suited to 65 * efficiently build pull parser. Lot of improvements have been done and 66 * the code has been added to support stax functionality/features. 67 * 68 * @author Neeraj Bajaj SUN Microsystems 69 * @author K.Venugopal SUN Microsystems 70 * @author Glenn Marcy, IBM 71 * @author Andy Clark, IBM 72 * @author Arnaud Le Hors, IBM 73 * @author Eric Ye, IBM 74 * @author Sunitha Reddy, SUN Microsystems 75 * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.19 2010-11-02 19:54:55 joehw Exp $ 76 * 77 */ 78 public class XMLDocumentFragmentScannerImpl 79 extends XMLScanner 80 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 81 82 // 83 // Constants 84 // 85 86 protected int fElementAttributeLimit; 87 88 /** External subset resolver. **/ 89 protected ExternalSubsetResolver fExternalSubsetResolver; 90 91 // scanner states 92 93 //XXX this should be divided into more states. 94 /** Scanner state: start of markup. */ 95 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 96 97 /** Scanner state: content. */ 98 protected static final int SCANNER_STATE_CONTENT = 22; 99 100 /** Scanner state: processing instruction. */ 101 protected static final int SCANNER_STATE_PI = 23; 102 103 /** Scanner state: DOCTYPE. */ 104 protected static final int SCANNER_STATE_DOCTYPE = 24; 105 106 /** Scanner state: XML Declaration */ 107 protected static final int SCANNER_STATE_XML_DECL = 25; 108 109 /** Scanner state: root element. */ 110 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 111 112 /** Scanner state: comment. */ 113 protected static final int SCANNER_STATE_COMMENT = 27; 114 115 /** Scanner state: reference. */ 116 protected static final int SCANNER_STATE_REFERENCE = 28; 117 118 // <book type="hard"> reading attribute name 'type' 119 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 120 121 // <book type="hard"> //reading attribute value. 122 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 123 124 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 125 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 126 127 /** Scanner state: end of input. */ 128 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 129 130 /** Scanner state: terminated. */ 131 protected static final int SCANNER_STATE_TERMINATED = 34; 132 133 /** Scanner state: CDATA section. */ 134 protected static final int SCANNER_STATE_CDATA = 35; 135 136 /** Scanner state: Text declaration. */ 137 protected static final int SCANNER_STATE_TEXT_DECL = 36; 138 139 /** Scanner state: Text declaration. */ 140 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 141 142 //<book type="hard">foo</book> 143 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 144 145 //<book type="hard">foo</book> reading </book> 146 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 147 148 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 149 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 150 151 // feature identifiers 152 153 154 /** Feature identifier: notify built-in refereces. */ 155 protected static final String NOTIFY_BUILTIN_REFS = 156 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 157 158 /** Property identifier: entity resolver. */ 159 protected static final String ENTITY_RESOLVER = 160 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 161 162 // recognized features and properties 163 164 /** Recognized features. */ 165 private static final String[] RECOGNIZED_FEATURES = { 166 NAMESPACES, 167 VALIDATION, 168 NOTIFY_BUILTIN_REFS, 169 NOTIFY_CHAR_REFS, 170 Constants.STAX_REPORT_CDATA_EVENT 171 }; 172 173 /** Feature defaults. */ 174 private static final Boolean[] FEATURE_DEFAULTS = { 175 Boolean.TRUE, 176 null, 177 Boolean.FALSE, 178 Boolean.FALSE, 179 Boolean.TRUE 180 }; 181 182 /** Recognized properties. */ 183 private static final String[] RECOGNIZED_PROPERTIES = { 184 SYMBOL_TABLE, 185 ERROR_REPORTER, 186 ENTITY_MANAGER, 187 }; 188 189 /** Property defaults. */ 190 private static final Object[] PROPERTY_DEFAULTS = { 191 null, 192 null, 193 null, 194 }; 195 196 protected static final char [] cdata = {'[','C','D','A','T','A','['}; 197 protected static final char [] xmlDecl = {'<','?','x','m','l'}; 198 protected static final char [] endTag = {'<','/'}; 199 // debugging 200 201 /** Debug scanner state. */ 202 private static final boolean DEBUG_SCANNER_STATE = false; 203 204 /** Debug driver. */ 205 private static final boolean DEBUG_DISPATCHER = false; 206 207 /** Debug content driver scanning. */ 208 protected static final boolean DEBUG_START_END_ELEMENT = false; 209 210 211 /** Debug driver next */ 212 protected static final boolean DEBUG_NEXT = false ; 213 214 /** Debug driver next */ 215 protected static final boolean DEBUG = false; 216 protected static final boolean DEBUG_COALESCE = false; 217 // 218 // Data 219 // 220 221 // protected data 222 223 /** Document handler. */ 224 protected XMLDocumentHandler fDocumentHandler; 225 protected int fScannerLastState ; 226 227 /** Entity Storage */ 228 protected XMLEntityStorage fEntityStore; 229 230 /** Entity stack. */ 231 protected int[] fEntityStack = new int[4]; 232 233 /** Markup depth. */ 234 protected int fMarkupDepth; 235 236 //is the element empty 237 protected boolean fEmptyElement ; 238 239 //track if we are reading attributes, this is usefule while 240 //there is a callback 241 protected boolean fReadingAttributes = false; 242 243 /** Scanner state. */ 244 protected int fScannerState; 245 246 /** SubScanner state: inside scanContent method. */ 247 protected boolean fInScanContent = false; 248 protected boolean fLastSectionWasCData = false; 249 protected boolean fLastSectionWasEntityReference = false; 250 protected boolean fLastSectionWasCharacterData = false; 251 252 /** has external dtd */ 253 protected boolean fHasExternalDTD; 254 255 /** Standalone. */ 256 protected boolean fStandaloneSet; 257 protected boolean fStandalone; 258 protected String fVersion; 259 260 // element information 261 262 /** Current element. */ 263 protected QName fCurrentElement; 264 265 /** Element stack. */ 266 protected ElementStack fElementStack = new ElementStack(); 267 protected ElementStack2 fElementStack2 = new ElementStack2(); 268 269 // other info 270 271 /** Document system identifier. 272 * REVISIT: So what's this used for? - NG 273 * protected String fDocumentSystemId; 274 ******/ 275 276 protected String fPITarget ; 277 278 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 279 protected XMLString fPIData = new XMLString(); 280 281 // features 282 283 284 /** Notify built-in references. */ 285 protected boolean fNotifyBuiltInRefs = false; 286 287 //STAX related properties 288 //defaultValues. 289 protected boolean fReplaceEntityReferences = true; 290 protected boolean fSupportExternalEntities = false; 291 protected boolean fReportCdataEvent = false ; 292 protected boolean fIsCoalesce = false ; 293 protected String fDeclaredEncoding = null; 294 /** Disallow doctype declaration. */ 295 protected boolean fDisallowDoctype = false; 296 297 // drivers 298 299 /** Active driver. */ 300 protected Driver fDriver; 301 302 /** Content driver. */ 303 protected Driver fContentDriver = createContentDriver(); 304 305 // temporary variables 306 307 /** Element QName. */ 308 protected QName fElementQName = new QName(); 309 310 /** Attribute QName. */ 311 protected QName fAttributeQName = new QName(); 312 313 /** 314 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 315 * implements Iterator interface so we can directly give Attributes in the form of 316 * iterator. 317 */ 318 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 319 320 321 /** String. */ 322 protected XMLString fTempString = new XMLString(); 323 324 /** String. */ 325 protected XMLString fTempString2 = new XMLString(); 326 327 /** Array of 3 strings. */ 328 private String[] fStrings = new String[3]; 329 330 /** Making the buffer accesible to derived class -- String buffer. */ 331 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 332 333 /** Making the buffer accesible to derived class -- String buffer. */ 334 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 335 336 /** stores character data. */ 337 /** Making the buffer accesible to derived class -- stores PI data */ 338 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 339 340 /** Single character array. */ 341 private final char[] fSingleChar = new char[1]; 342 private String fCurrentEntityName = null; 343 344 // New members 345 protected boolean fScanToEnd = false; 346 347 protected DTDGrammarUtil dtdGrammarUtil= null; 348 349 protected boolean fAddDefaultAttr = false; 350 351 protected boolean foundBuiltInRefs = false; 352 353 protected SecurityManager fSecurityManager = null; 354 355 //skip element algorithm 356 static final short MAX_DEPTH_LIMIT = 5 ; 357 static final short ELEMENT_ARRAY_LENGTH = 200 ; 358 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 359 static final boolean DEBUG_SKIP_ALGORITHM = false; 360 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 361 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 362 //pointer location where last element was skipped 363 short fLastPointerLocation = 0 ; 364 short fElementPointer = 0 ; 365 //2D array to store pointer info 366 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 367 protected String fElementRawname ; 368 protected boolean fShouldSkip = false; 369 protected boolean fAdd = false ; 370 protected boolean fSkip = false; 371 372 /** Reusable Augmentations. */ 373 private Augmentations fTempAugmentations = null; 374 // 375 // Constructors 376 // 377 378 /** Default constructor. */ 379 public XMLDocumentFragmentScannerImpl() { 380 } // <init>() 381 382 // 383 // XMLDocumentScanner methods 384 // 385 386 /** 387 * Sets the input source. 388 * 389 * @param inputSource The input source. 390 * 391 * @throws IOException Thrown on i/o error. 392 */ 393 public void setInputSource(XMLInputSource inputSource) throws IOException { 394 fEntityManager.setEntityHandler(this); 395 fEntityManager.startEntity("$fragment$", inputSource, false, true); 396 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 397 } // setInputSource(XMLInputSource) 398 399 /** 400 * Scans a document. 401 * 402 * @param complete True if the scanner should scan the document 403 * completely, pushing all events to the registered 404 * document handler. A value of false indicates that 405 * that the scanner should only scan the next portion 406 * of the document and return. A scanner instance is 407 * permitted to completely scan a document if it does 408 * not support this "pull" scanning model. 409 * 410 * @return True if there is more to scan, false otherwise. 411 */ 412 /* public boolean scanDocument(boolean complete) 413 throws IOException, XNIException { 414 415 // keep dispatching "events" 416 fEntityManager.setEntityHandler(this); 417 418 return true; 419 420 } // scanDocument(boolean):boolean 421 */ 422 423 public boolean scanDocument(boolean complete) 424 throws IOException, XNIException { 425 426 // keep dispatching "events" 427 fEntityManager.setEntityHandler(this); 428 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 429 430 int event = next(); 431 do { 432 switch (event) { 433 case XMLStreamConstants.START_DOCUMENT : 434 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 435 break; 436 case XMLStreamConstants.START_ELEMENT : 437 //System.out.println(" in scann element"); 438 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 439 break; 440 case XMLStreamConstants.CHARACTERS : 441 fDocumentHandler.characters(getCharacterData(),null); 442 break; 443 case XMLStreamConstants.SPACE: 444 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 445 //System.out.println("in the space"); 446 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 447 break; 448 case XMLStreamConstants.ENTITY_REFERENCE : 449 //entity reference callback are given in startEntity 450 break; 451 case XMLStreamConstants.PROCESSING_INSTRUCTION : 452 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 453 break; 454 case XMLStreamConstants.COMMENT : 455 //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 456 fDocumentHandler.comment(getCharacterData(),null); 457 break; 458 case XMLStreamConstants.DTD : 459 //all DTD related callbacks are handled in DTDScanner. 460 //1. Stax doesn't define DTD states as it does for XML Document. 461 //therefore we don't need to take care of anything here. So Just break; 462 break; 463 case XMLStreamConstants.CDATA: 464 fDocumentHandler.startCDATA(null); 465 //xxx: check if CDATA values comes from getCharacterData() function 466 fDocumentHandler.characters(getCharacterData(),null); 467 fDocumentHandler.endCDATA(null); 468 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 469 break; 470 case XMLStreamConstants.NOTATION_DECLARATION : 471 break; 472 case XMLStreamConstants.ENTITY_DECLARATION : 473 break; 474 case XMLStreamConstants.NAMESPACE : 475 break; 476 case XMLStreamConstants.ATTRIBUTE : 477 break; 478 case XMLStreamConstants.END_ELEMENT : 479 //do not give callback here. 480 //this callback is given in scanEndElement function. 481 //fDocumentHandler.endElement(getElementQName(),null); 482 break; 483 default : 484 throw new InternalError("processing event: " + event); 485 486 } 487 //System.out.println("here in before calling next"); 488 event = next(); 489 //System.out.println("here in after calling next"); 490 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 491 492 if(event == XMLStreamConstants.END_DOCUMENT) { 493 fDocumentHandler.endDocument(null); 494 return false; 495 } 496 497 return true; 498 499 } // scanDocument(boolean):boolean 500 501 502 503 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 504 if(fScannerLastState == XMLEvent.END_ELEMENT){ 505 fElementQName.setValues(fElementStack.getLastPoppedElement()); 506 } 507 return fElementQName ; 508 } 509 510 /** return the next state on the input 511 * @return int 512 */ 513 514 public int next() throws IOException, XNIException { 515 return fDriver.next(); 516 } 517 518 // 519 // XMLComponent methods 520 // 521 522 /** 523 * Resets the component. The component can query the component manager 524 * about any features and properties that affect the operation of the 525 * component. 526 * 527 * @param componentManager The component manager. 528 * 529 * @throws SAXException Thrown by component on initialization error. 530 * For example, if a feature or property is 531 * required for the operation of the component, the 532 * component manager may throw a 533 * SAXNotRecognizedException or a 534 * SAXNotSupportedException. 535 */ 536 537 public void reset(XMLComponentManager componentManager) 538 throws XMLConfigurationException { 539 540 super.reset(componentManager); 541 542 // other settings 543 // fDocumentSystemId = null; 544 545 // sax features 546 //fAttributes.setNamespaces(fNamespaces); 547 548 // xerces features 549 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 550 551 fSecurityManager = (SecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 552 fElementAttributeLimit = (fSecurityManager != null)?fSecurityManager.getElementAttrLimit():0; 553 554 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 555 556 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 557 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 558 (ExternalSubsetResolver) resolver : null; 559 560 // initialize vars 561 fMarkupDepth = 0; 562 fCurrentElement = null; 563 fElementStack.clear(); 564 fHasExternalDTD = false; 565 fStandaloneSet = false; 566 fStandalone = false; 567 fInScanContent = false; 568 //skipping algorithm 569 fShouldSkip = false; 570 fAdd = false; 571 fSkip = false; 572 573 //attribute 574 fReadingAttributes = false; 575 //xxx: external entities are supported in Xerces 576 // it would be good to define feature for this case 577 fSupportExternalEntities = true; 578 fReplaceEntityReferences = true; 579 fIsCoalesce = false; 580 581 // setup Driver 582 setScannerState(SCANNER_STATE_CONTENT); 583 setDriver(fContentDriver); 584 fEntityStore = fEntityManager.getEntityStore(); 585 586 dtdGrammarUtil = null; 587 588 589 //fEntityManager.test(); 590 } // reset(XMLComponentManager) 591 592 593 public void reset(PropertyManager propertyManager){ 594 595 super.reset(propertyManager); 596 597 // other settings 598 // fDocumentSystemId = null; 599 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 600 fNotifyBuiltInRefs = false ; 601 602 // initialize vars 603 fMarkupDepth = 0; 604 fCurrentElement = null; 605 fShouldSkip = false; 606 fAdd = false; 607 fSkip = false; 608 fElementStack.clear(); 609 //fElementStack2.clear(); 610 fHasExternalDTD = false; 611 fStandaloneSet = false; 612 fStandalone = false; 613 //fReplaceEntityReferences = true; 614 //fSupportExternalEntities = true; 615 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_REPLACING_ENTITY_REFERENCES); 616 fReplaceEntityReferences = bo.booleanValue(); 617 bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_SUPPORTING_EXTERNAL_ENTITIES); 618 fSupportExternalEntities = bo.booleanValue(); 619 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 620 if(cdata != null) 621 fReportCdataEvent = cdata.booleanValue() ; 622 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 623 if(coalesce != null) 624 fIsCoalesce = coalesce.booleanValue(); 625 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 626 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 627 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 628 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 629 // setup Driver 630 //we dont need to do this -- nb. 631 //setScannerState(SCANNER_STATE_CONTENT); 632 //setDriver(fContentDriver); 633 fEntityStore = fEntityManager.getEntityStore(); 634 //fEntityManager.test(); 635 636 dtdGrammarUtil = null; 637 638 } // reset(XMLComponentManager) 639 640 /** 641 * Returns a list of feature identifiers that are recognized by 642 * this component. This method may return null if no features 643 * are recognized by this component. 644 */ 645 public String[] getRecognizedFeatures() { 646 return (String[])(RECOGNIZED_FEATURES.clone()); 647 } // getRecognizedFeatures():String[] 648 649 /** 650 * Sets the state of a feature. This method is called by the component 651 * manager any time after reset when a feature changes state. 652 * <p> 653 * <strong>Note:</strong> Components should silently ignore features 654 * that do not affect the operation of the component. 655 * 656 * @param featureId The feature identifier. 657 * @param state The state of the feature. 658 * 659 * @throws SAXNotRecognizedException The component should not throw 660 * this exception. 661 * @throws SAXNotSupportedException The component should not throw 662 * this exception. 663 */ 664 public void setFeature(String featureId, boolean state) 665 throws XMLConfigurationException { 666 667 super.setFeature(featureId, state); 668 669 // Xerces properties 670 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 671 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 672 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 673 fNotifyBuiltInRefs = state; 674 } 675 } 676 677 } // setFeature(String,boolean) 678 679 /** 680 * Returns a list of property identifiers that are recognized by 681 * this component. This method may return null if no properties 682 * are recognized by this component. 683 */ 684 public String[] getRecognizedProperties() { 685 return (String[])(RECOGNIZED_PROPERTIES.clone()); 686 } // getRecognizedProperties():String[] 687 688 /** 689 * Sets the value of a property. This method is called by the component 690 * manager any time after reset when a property changes value. 691 * <p> 692 * <strong>Note:</strong> Components should silently ignore properties 693 * that do not affect the operation of the component. 694 * 695 * @param propertyId The property identifier. 696 * @param value The value of the property. 697 * 698 * @throws SAXNotRecognizedException The component should not throw 699 * this exception. 700 * @throws SAXNotSupportedException The component should not throw 701 * this exception. 702 */ 703 public void setProperty(String propertyId, Object value) 704 throws XMLConfigurationException { 705 706 super.setProperty(propertyId, value); 707 708 // Xerces properties 709 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 710 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 711 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 712 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 713 fEntityManager = (XMLEntityManager)value; 714 return; 715 } 716 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 717 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 718 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 719 (ExternalSubsetResolver) value : null; 720 return; 721 } 722 } 723 724 725 // Xerces properties 726 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 727 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 728 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 729 fEntityManager = (XMLEntityManager)value; 730 } 731 return; 732 } 733 734 } // setProperty(String,Object) 735 736 /** 737 * Returns the default state for a feature, or null if this 738 * component does not want to report a default value for this 739 * feature. 740 * 741 * @param featureId The feature identifier. 742 * 743 * @since Xerces 2.2.0 744 */ 745 public Boolean getFeatureDefault(String featureId) { 746 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 747 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 748 return FEATURE_DEFAULTS[i]; 749 } 750 } 751 return null; 752 } // getFeatureDefault(String):Boolean 753 754 /** 755 * Returns the default state for a property, or null if this 756 * component does not want to report a default value for this 757 * property. 758 * 759 * @param propertyId The property identifier. 760 * 761 * @since Xerces 2.2.0 762 */ 763 public Object getPropertyDefault(String propertyId) { 764 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 765 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 766 return PROPERTY_DEFAULTS[i]; 767 } 768 } 769 return null; 770 } // getPropertyDefault(String):Object 771 772 // 773 // XMLDocumentSource methods 774 // 775 776 /** 777 * setDocumentHandler 778 * 779 * @param documentHandler 780 */ 781 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 782 fDocumentHandler = documentHandler; 783 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 784 } // setDocumentHandler(XMLDocumentHandler) 785 786 787 /** Returns the document handler */ 788 public XMLDocumentHandler getDocumentHandler(){ 789 return fDocumentHandler; 790 } 791 792 // 793 // XMLEntityHandler methods 794 // 795 796 /** 797 * This method notifies of the start of an entity. The DTD has the 798 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 799 * general entities are just specified by their name. 800 * 801 * @param name The name of the entity. 802 * @param identifier The resource identifier. 803 * @param encoding The auto-detected IANA encoding name of the entity 804 * stream. This value will be null in those situations 805 * where the entity encoding is not auto-detected (e.g. 806 * internal entities or a document entity that is 807 * parsed from a java.io.Reader). 808 * 809 * @throws XNIException Thrown by handler to signal an error. 810 */ 811 public void startEntity(String name, 812 XMLResourceIdentifier identifier, 813 String encoding, Augmentations augs) throws XNIException { 814 815 // keep track of this entity before fEntityDepth is increased 816 if (fEntityDepth == fEntityStack.length) { 817 int[] entityarray = new int[fEntityStack.length * 2]; 818 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 819 fEntityStack = entityarray; 820 } 821 fEntityStack[fEntityDepth] = fMarkupDepth; 822 823 super.startEntity(name, identifier, encoding, augs); 824 825 // WFC: entity declared in external subset in standalone doc 826 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 827 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 828 new Object[]{name}); 829 } 830 831 /** we are not calling the handlers yet.. */ 832 // call handler 833 if (fDocumentHandler != null && !fScanningAttribute) { 834 if (!name.equals("[xml]")) { 835 fDocumentHandler.startGeneralEntity(name, identifier, encoding, null); 836 } 837 } 838 839 } // startEntity(String,XMLResourceIdentifier,String) 840 841 /** 842 * This method notifies the end of an entity. The DTD has the pseudo-name 843 * of "[dtd]" parameter entity names start with '%'; and general entities 844 * are just specified by their name. 845 * 846 * @param name The name of the entity. 847 * 848 * @throws XNIException Thrown by handler to signal an error. 849 */ 850 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 851 852 /** 853 * // flush possible pending output buffer - see scanContent 854 * if (fInScanContent && fStringBuffer.length != 0 855 * && fDocumentHandler != null) { 856 * fDocumentHandler.characters(fStringBuffer, null); 857 * fStringBuffer.length = 0; // make sure we know it's been flushed 858 * } 859 */ 860 super.endEntity(name, augs); 861 862 // make sure markup is properly balanced 863 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 864 reportFatalError("MarkupEntityMismatch", null); 865 } 866 867 /**/ 868 // call handler 869 if (fDocumentHandler != null && !fScanningAttribute) { 870 if (!name.equals("[xml]")) { 871 fDocumentHandler.endGeneralEntity(name, null); 872 } 873 } 874 875 876 } // endEntity(String) 877 878 // 879 // Protected methods 880 // 881 882 // Driver factory methods 883 884 /** Creates a content Driver. */ 885 protected Driver createContentDriver() { 886 return new FragmentContentDriver(); 887 } // createContentDriver():Driver 888 889 // scanning methods 890 891 /** 892 * Scans an XML or text declaration. 893 * <p> 894 * <pre> 895 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 896 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 897 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 898 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 899 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 900 * | ('"' ('yes' | 'no') '"')) 901 * 902 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 903 * </pre> 904 * 905 * @param scanningTextDecl True if a text declaration is to 906 * be scanned instead of an XML 907 * declaration. 908 */ 909 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 910 throws IOException, XNIException { 911 912 // scan decl 913 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 914 fMarkupDepth--; 915 916 // pseudo-attribute values 917 String version = fStrings[0]; 918 String encoding = fStrings[1]; 919 String standalone = fStrings[2]; 920 fDeclaredEncoding = encoding; 921 // set standalone 922 fStandaloneSet = standalone != null; 923 fStandalone = fStandaloneSet && standalone.equals("yes"); 924 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 925 //but this information is only related with Document Entity. 926 fEntityManager.setStandalone(fStandalone); 927 928 929 // call handler 930 if (fDocumentHandler != null) { 931 if (scanningTextDecl) { 932 fDocumentHandler.textDecl(version, encoding, null); 933 } else { 934 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 935 } 936 } 937 938 if(version != null){ 939 fEntityScanner.setVersion(version); 940 fEntityScanner.setXMLVersion(version); 941 } 942 // set encoding on reader, only if encoding was not specified by the application explicitly 943 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 944 fEntityScanner.setEncoding(encoding); 945 } 946 947 } // scanXMLDeclOrTextDecl(boolean) 948 949 public String getPITarget(){ 950 return fPITarget ; 951 } 952 953 public XMLStringBuffer getPIData(){ 954 return fContentBuffer ; 955 } 956 957 //XXX: why not this function behave as per the state of the parser? 958 public XMLString getCharacterData(){ 959 if(fUsebuffer){ 960 return fContentBuffer ; 961 }else{ 962 return fTempString; 963 } 964 965 } 966 967 968 /** 969 * Scans a processing data. This is needed to handle the situation 970 * where a document starts with a processing instruction whose 971 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 972 * 973 * @param target The PI target 974 * @param data The XMLStringBuffer to fill in with the data 975 */ 976 protected void scanPIData(String target, XMLStringBuffer data) 977 throws IOException, XNIException { 978 979 super.scanPIData(target, data); 980 981 //set the PI target and values 982 fPITarget = target ; 983 984 fMarkupDepth--; 985 986 } // scanPIData(String) 987 988 /** 989 * Scans a comment. 990 * <p> 991 * <pre> 992 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 993 * </pre> 994 * <p> 995 * <strong>Note:</strong> Called after scanning past '<!--' 996 */ 997 protected void scanComment() throws IOException, XNIException { 998 fContentBuffer.clear(); 999 scanComment(fContentBuffer); 1000 //getTextCharacters can also be called for reading comments 1001 fUsebuffer = true; 1002 fMarkupDepth--; 1003 1004 } // scanComment() 1005 1006 //xxx value returned by this function may not remain valid if another event is scanned. 1007 public String getComment(){ 1008 return fContentBuffer.toString(); 1009 } 1010 1011 void addElement(String rawname){ 1012 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1013 //storing element raw name in a linear list of array 1014 fElementArray[fElementPointer] = rawname ; 1015 //storing elemnetPointer for particular element depth 1016 1017 if(DEBUG_SKIP_ALGORITHM){ 1018 StringBuffer sb = new StringBuffer() ; 1019 sb.append(" Storing element information ") ; 1020 sb.append(" fElementPointer = " + fElementPointer) ; 1021 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1022 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1023 System.out.println(sb.toString()) ; 1024 } 1025 1026 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1027 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1028 short column = storePointerForADepth(fElementPointer); 1029 if(column > 0){ 1030 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1031 //identity comparison shouldn't take much time and we can rely on this 1032 //since its guaranteed to have same object id for same string. 1033 if(rawname == fElementArray[pointer]){ 1034 fShouldSkip = true ; 1035 fLastPointerLocation = pointer ; 1036 //reset the things and return. 1037 resetPointer((short)fElementStack.fDepth , column) ; 1038 fElementArray[fElementPointer] = null ; 1039 return ; 1040 }else{ 1041 fShouldSkip = false ; 1042 } 1043 } 1044 } 1045 fElementPointer++ ; 1046 } 1047 } 1048 1049 1050 void resetPointer(short depth, short column){ 1051 fPointerInfo[depth] [column] = (short)0; 1052 } 1053 1054 //returns column information at which pointer was stored. 1055 short storePointerForADepth(short elementPointer){ 1056 short depth = (short) fElementStack.fDepth ; 1057 1058 //Stores element pointer locations at particular depth , only 4 pointer locations 1059 //are stored at particular depth for now. 1060 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1061 1062 if(canStore(depth, i)){ 1063 fPointerInfo[depth][i] = elementPointer ; 1064 if(DEBUG_SKIP_ALGORITHM){ 1065 StringBuffer sb = new StringBuffer() ; 1066 sb.append(" Pointer information ") ; 1067 sb.append(" fElementPointer = " + fElementPointer) ; 1068 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1069 sb.append(" column = " + i ) ; 1070 System.out.println(sb.toString()) ; 1071 } 1072 return i; 1073 } 1074 //else 1075 //pointer was not stored because we reached the limit 1076 } 1077 return -1 ; 1078 } 1079 1080 boolean canStore(short depth, short column){ 1081 //colum = 0 , means first element at particular depth 1082 //column = 1, means second element at particular depth 1083 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1084 return fPointerInfo[depth][column] == 0 ? true : false ; 1085 } 1086 1087 1088 short getElementPointer(short depth, short column){ 1089 //colum = 0 , means first element at particular depth 1090 //column = 1, means second element at particular depth 1091 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1092 return fPointerInfo[depth][column] ; 1093 } 1094 1095 //this function assumes that string passed is not null and skips 1096 //the following string from the buffer this makes sure 1097 boolean skipFromTheBuffer(String rawname) throws IOException{ 1098 if(fEntityScanner.skipString(rawname)){ 1099 char c = (char)fEntityScanner.peekChar() ; 1100 //If the start element was completely skipped we should encounter either ' '(space), 1101 //or '/' (in case of empty element) or '>' 1102 if( c == ' ' || c == '/' || c == '>'){ 1103 fElementRawname = rawname ; 1104 return true ; 1105 } else{ 1106 return false; 1107 } 1108 } else 1109 return false ; 1110 } 1111 1112 boolean skipQElement(String rawname) throws IOException{ 1113 1114 final int c = fEntityScanner.getChar(rawname.length()); 1115 //if this character is still valid element name -- this means string can't match 1116 if(XMLChar.isName(c)){ 1117 return false; 1118 }else{ 1119 return fEntityScanner.skipString(rawname); 1120 } 1121 } 1122 1123 protected boolean skipElement() throws IOException { 1124 1125 if(!fShouldSkip) return false ; 1126 1127 if(fLastPointerLocation != 0){ 1128 //Look at the next element stored in the array list.. we might just get a match. 1129 String rawname = fElementArray[fLastPointerLocation + 1] ; 1130 if(rawname != null && skipFromTheBuffer(rawname)){ 1131 fLastPointerLocation++ ; 1132 if(DEBUG_SKIP_ALGORITHM){ 1133 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1134 } 1135 return true ; 1136 } else{ 1137 //reset it back to zero... we haven't got the correct subset yet. 1138 fLastPointerLocation = 0 ; 1139 1140 } 1141 } 1142 //xxx: we can put some logic here as from what column it should start looking 1143 //for now we always start at 0 1144 //fallback to tolerant algorithm, it would look for differnt element stored at different 1145 //depth and get us the pointer location. 1146 return fShouldSkip && skipElement((short)0); 1147 1148 } 1149 1150 //start of the column at which it should try searching 1151 boolean skipElement(short column) throws IOException { 1152 short depth = (short)fElementStack.fDepth ; 1153 1154 if(depth > MAX_DEPTH_LIMIT){ 1155 return fShouldSkip = false ; 1156 } 1157 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1158 short pointer = getElementPointer(depth , i ) ; 1159 1160 if(pointer == 0){ 1161 return fShouldSkip = false ; 1162 } 1163 1164 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1165 if(DEBUG_SKIP_ALGORITHM){ 1166 System.out.println(); 1167 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1168 System.out.println(); 1169 } 1170 fLastPointerLocation = pointer ; 1171 return fShouldSkip = true ; 1172 } 1173 } 1174 return fShouldSkip = false ; 1175 } 1176 1177 /** 1178 * Scans a start element. This method will handle the binding of 1179 * namespace information and notifying the handler of the start 1180 * of the element. 1181 * <p> 1182 * <pre> 1183 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1184 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1185 * </pre> 1186 * <p> 1187 * <strong>Note:</strong> This method assumes that the leading 1188 * '<' character has been consumed. 1189 * <p> 1190 * <strong>Note:</strong> This method uses the fElementQName and 1191 * fAttributes variables. The contents of these variables will be 1192 * destroyed. The caller should copy important information out of 1193 * these variables before calling this method. 1194 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1195 * 1196 * @return True if element is empty. (i.e. It matches 1197 * production [44]. 1198 */ 1199 // fElementQName will have the details of element just read.. 1200 // fAttributes will have the details of all the attributes. 1201 protected boolean scanStartElement() 1202 throws IOException, XNIException { 1203 1204 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1205 //when skipping is true and no more elements should be added 1206 if(fSkip && !fAdd){ 1207 //get the stored element -- if everything goes right this should match the 1208 //token in the buffer 1209 1210 QName name = fElementStack.getNext(); 1211 1212 if(DEBUG_SKIP_ALGORITHM){ 1213 System.out.println("Trying to skip String = " + name.rawname); 1214 } 1215 1216 //Be conservative -- if skipping fails -- stop. 1217 fSkip = fEntityScanner.skipString(name.rawname); 1218 1219 if(fSkip){ 1220 if(DEBUG_SKIP_ALGORITHM){ 1221 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1222 } 1223 fElementStack.push(); 1224 fElementQName = name; 1225 }else{ 1226 //if skipping fails reposition the stack or fallback to normal way of processing 1227 fElementStack.reposition(); 1228 if(DEBUG_SKIP_ALGORITHM){ 1229 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1230 } 1231 } 1232 } 1233 1234 //we are still at the stage of adding elements 1235 //the elements were not matched or 1236 //fSkip is not set to true 1237 if(!fSkip || fAdd){ 1238 //get the next element from the stack 1239 fElementQName = fElementStack.nextElement(); 1240 // name 1241 if (fNamespaces) { 1242 fEntityScanner.scanQName(fElementQName); 1243 } else { 1244 String name = fEntityScanner.scanName(); 1245 fElementQName.setValues(null, name, name, null); 1246 } 1247 1248 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1249 if(DEBUG_SKIP_ALGORITHM){ 1250 if(fAdd){ 1251 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1252 } 1253 } 1254 1255 } 1256 1257 //when the elements are being added , we need to check if we are set for skipping the elements 1258 if(fAdd){ 1259 //this sets the value of fAdd variable 1260 fElementStack.matchElement(fElementQName); 1261 } 1262 1263 1264 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1265 fCurrentElement = fElementQName; 1266 1267 String rawname = fElementQName.rawname; 1268 1269 fEmptyElement = false; 1270 1271 fAttributes.removeAllAttributes(); 1272 1273 if(!seekCloseOfStartTag()){ 1274 fReadingAttributes = true; 1275 fAttributeCacheUsedCount =0; 1276 fStringBufferIndex =0; 1277 fAddDefaultAttr = true; 1278 do { 1279 scanAttribute(fAttributes); 1280 if (fSecurityManager != null && fAttributes.getLength() > fElementAttributeLimit){ 1281 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1282 "ElementAttributeLimit", 1283 new Object[]{rawname, new Integer(fAttributes.getLength()) }, 1284 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1285 } 1286 1287 } while (!seekCloseOfStartTag()); 1288 fReadingAttributes=false; 1289 } 1290 1291 if (fEmptyElement) { 1292 //decrease the markup depth.. 1293 fMarkupDepth--; 1294 1295 // check that this element was opened in the same entity 1296 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1297 reportFatalError("ElementEntityMismatch", 1298 new Object[]{fCurrentElement.rawname}); 1299 } 1300 // call handler 1301 if (fDocumentHandler != null) { 1302 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1303 } 1304 1305 //We should not be popping out the context here in endELement becaause the namespace context is still 1306 //valid when parser is at the endElement state. 1307 //if (fNamespaces) { 1308 // fNamespaceContext.popContext(); 1309 //} 1310 1311 //pop the element off the stack.. 1312 fElementStack.popElement(); 1313 1314 } else { 1315 1316 if(dtdGrammarUtil != null) 1317 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1318 if(fDocumentHandler != null){ 1319 //complete element and attributes are traversed in this function so we can send a callback 1320 //here. 1321 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1322 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1323 } 1324 } 1325 1326 1327 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1328 return fEmptyElement; 1329 1330 } // scanStartElement():boolean 1331 1332 /** 1333 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1334 * Characters are consumed. 1335 */ 1336 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1337 // spaces 1338 boolean sawSpace = fEntityScanner.skipSpaces(); 1339 1340 // end tag? 1341 final int c = fEntityScanner.peekChar(); 1342 if (c == '>') { 1343 fEntityScanner.scanChar(); 1344 return true; 1345 } else if (c == '/') { 1346 fEntityScanner.scanChar(); 1347 if (!fEntityScanner.skipChar('>')) { 1348 reportFatalError("ElementUnterminated", 1349 new Object[]{fElementQName.rawname}); 1350 } 1351 fEmptyElement = true; 1352 return true; 1353 } else if (!isValidNameStartChar(c) || !sawSpace) { 1354 reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); 1355 } 1356 1357 return false; 1358 } 1359 1360 public boolean hasAttributes(){ 1361 return fAttributes.getLength() > 0 ? true : false ; 1362 } 1363 1364 1365 /** 1366 * Scans an attribute. 1367 * <p> 1368 * <pre> 1369 * [41] Attribute ::= Name Eq AttValue 1370 * </pre> 1371 * <p> 1372 * <strong>Note:</strong> This method assumes that the next 1373 * character on the stream is the first character of the attribute 1374 * name. 1375 * <p> 1376 * <strong>Note:</strong> This method uses the fAttributeQName and 1377 * fQName variables. The contents of these variables will be 1378 * destroyed. 1379 * 1380 * @param attributes The attributes list for the scanned attribute. 1381 */ 1382 1383 /** 1384 * protected void scanAttribute(AttributeIteratorImpl attributes) 1385 * throws IOException, XNIException { 1386 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1387 * 1388 * 1389 * // name 1390 * if (fNamespaces) { 1391 * fEntityScanner.scanQName(fAttributeQName); 1392 * } 1393 * else { 1394 * String name = fEntityScanner.scanName(); 1395 * fAttributeQName.setValues(null, name, name, null); 1396 * } 1397 * 1398 * // equals 1399 * fEntityScanner.skipSpaces(); 1400 * if (!fEntityScanner.skipChar('=')) { 1401 * reportFatalError("EqRequiredInAttribute", 1402 * new Object[]{fAttributeQName.rawname}); 1403 * } 1404 * fEntityScanner.skipSpaces(); 1405 * 1406 * 1407 * // content 1408 * int oldLen = attributes.getLength(); 1409 */ 1410 /**xxx there is one check of duplicate attribute that has been removed. 1411 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1412 * 1413 * // WFC: Unique Att Spec 1414 * if (oldLen == attributes.getLength()) { 1415 * reportFatalError("AttributeNotUnique", 1416 * new Object[]{fCurrentElement.rawname, 1417 * fAttributeQName.rawname}); 1418 * } 1419 */ 1420 1421 /* 1422 //REVISIT: one more case needs to be included: external PE and standalone is no 1423 boolean isVC = fHasExternalDTD && !fStandalone; 1424 scanAttributeValue(fTempString, fTempString2, 1425 fAttributeQName.rawname, attributes, 1426 oldLen, isVC); 1427 1428 //attributes.setValue(oldLen, fTempString.toString()); 1429 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1430 //attributes.setSpecified(oldLen, true); 1431 1432 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1433 fAttributes.addAttribute(attribute); 1434 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1435 } // scanAttribute(XMLAttributes) 1436 1437 */ 1438 1439 /** return the attribute iterator implementation */ 1440 public XMLAttributesIteratorImpl getAttributeIterator(){ 1441 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1442 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1443 fAddDefaultAttr = false; 1444 } 1445 return fAttributes; 1446 } 1447 1448 /** return if standalone is set */ 1449 public boolean standaloneSet(){ 1450 return fStandaloneSet; 1451 } 1452 /** return if the doucment is standalone */ 1453 public boolean isStandAlone(){ 1454 return fStandalone ; 1455 } 1456 /** 1457 * Scans an attribute name value pair. 1458 * <p> 1459 * <pre> 1460 * [41] Attribute ::= Name Eq AttValue 1461 * </pre> 1462 * <p> 1463 * <strong>Note:</strong> This method assumes that the next 1464 * character on the stream is the first character of the attribute 1465 * name. 1466 * <p> 1467 * <strong>Note:</strong> This method uses the fAttributeQName and 1468 * fQName variables. The contents of these variables will be 1469 * destroyed. 1470 * 1471 * @param attributes The attributes list for the scanned attribute. 1472 */ 1473 1474 protected void scanAttribute(XMLAttributes attributes) 1475 throws IOException, XNIException { 1476 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1477 1478 // name 1479 if (fNamespaces) { 1480 fEntityScanner.scanQName(fAttributeQName); 1481 } else { 1482 String name = fEntityScanner.scanName(); 1483 fAttributeQName.setValues(null, name, name, null); 1484 } 1485 1486 // equals 1487 fEntityScanner.skipSpaces(); 1488 if (!fEntityScanner.skipChar('=')) { 1489 reportFatalError("EqRequiredInAttribute", 1490 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1491 } 1492 fEntityScanner.skipSpaces(); 1493 1494 int attIndex = 0 ; 1495 //REVISIT: one more case needs to be included: external PE and standalone is no 1496 boolean isVC = fHasExternalDTD && !fStandalone; 1497 //fTempString would store attribute value 1498 ///fTempString2 would store attribute non-normalized value 1499 1500 //this function doesn't use 'attIndex'. We are adding the attribute later 1501 //after we have figured out that current attribute is not namespace declaration 1502 //since scanAttributeValue doesn't use attIndex parameter therefore we 1503 //can safely add the attribute later.. 1504 XMLString tmpStr = getString(); 1505 1506 scanAttributeValue(tmpStr, fTempString2, 1507 fAttributeQName.rawname, attributes, 1508 attIndex, isVC); 1509 1510 // content 1511 int oldLen = attributes.getLength(); 1512 //if the attribute name already exists.. new value is replaced with old value 1513 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1514 1515 // WFC: Unique Att Spec 1516 //attributes count will be same if the current attribute name already exists for this element name. 1517 //this means there are two duplicate attributes. 1518 if (oldLen == attributes.getLength()) { 1519 reportFatalError("AttributeNotUnique", 1520 new Object[]{fCurrentElement.rawname, 1521 fAttributeQName.rawname}); 1522 } 1523 1524 //tmpString contains attribute value 1525 //we are passing null as the attribute value 1526 attributes.setValue(attIndex, null, tmpStr); 1527 1528 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1529 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1530 attributes.setSpecified(attIndex, true); 1531 1532 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1533 1534 } // scanAttribute(XMLAttributes) 1535 1536 /** 1537 * Scans element content. 1538 * 1539 * @return Returns the next character on the stream. 1540 */ 1541 //CHANGED: 1542 //EARLIER: scanContent() 1543 //NOW: scanContent(XMLStringBuffer) 1544 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1545 //this function appends the data to the buffer. 1546 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1547 //set the fTempString length to 0 before passing it on to scanContent 1548 //scanContent sets the correct co-ordinates as per the content read 1549 fTempString.length = 0; 1550 int c = fEntityScanner.scanContent(fTempString); 1551 content.append(fTempString); 1552 fTempString.length = 0; 1553 if (c == '\r') { 1554 // happens when there is the character reference 1555 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1556 fEntityScanner.scanChar(); 1557 content.append((char)c); 1558 c = -1; 1559 } else if (c == ']') { 1560 //fStringBuffer.clear(); 1561 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1562 content.append((char)fEntityScanner.scanChar()); 1563 // remember where we are in case we get an endEntity before we 1564 // could flush the buffer out - this happens when we're parsing an 1565 // entity which ends with a ] 1566 fInScanContent = true; 1567 // 1568 // We work on a single character basis to handle cases such as: 1569 // ']]]>' which we might otherwise miss. 1570 // 1571 if (fEntityScanner.skipChar(']')) { 1572 content.append(']'); 1573 while (fEntityScanner.skipChar(']')) { 1574 content.append(']'); 1575 } 1576 if (fEntityScanner.skipChar('>')) { 1577 reportFatalError("CDEndInContent", null); 1578 } 1579 } 1580 fInScanContent = false; 1581 c = -1; 1582 } 1583 if (fDocumentHandler != null && content.length > 0) { 1584 //fDocumentHandler.characters(content, null); 1585 } 1586 return c; 1587 1588 } // scanContent():int 1589 1590 1591 /** 1592 * Scans a CDATA section. 1593 * <p> 1594 * <strong>Note:</strong> This method uses the fTempString and 1595 * fStringBuffer variables. 1596 * 1597 * @param complete True if the CDATA section is to be scanned 1598 * completely. 1599 * 1600 * @return True if CDATA is completely scanned. 1601 */ 1602 //CHANGED: 1603 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1604 throws IOException, XNIException { 1605 1606 // call handler 1607 if (fDocumentHandler != null) { 1608 //fDocumentHandler.startCDATA(null); 1609 } 1610 1611 while (true) { 1612 //scanData will fill the contentBuffer 1613 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1614 break ; 1615 /** We dont need all this code if we pass ']]>' as delimeter.. 1616 * int brackets = 2; 1617 * while (fEntityScanner.skipChar(']')) { 1618 * brackets++; 1619 * } 1620 * 1621 * //When we find more than 2 square brackets 1622 * if (fDocumentHandler != null && brackets > 2) { 1623 * //we dont need to clear the buffer.. 1624 * //contentBuffer.clear(); 1625 * for (int i = 2; i < brackets; i++) { 1626 * contentBuffer.append(']'); 1627 * } 1628 * fDocumentHandler.characters(contentBuffer, null); 1629 * } 1630 * 1631 * if (fEntityScanner.skipChar('>')) { 1632 * break; 1633 * } 1634 * if (fDocumentHandler != null) { 1635 * //we dont need to clear the buffer now.. 1636 * //contentBuffer.clear(); 1637 * contentBuffer.append("]]"); 1638 * fDocumentHandler.characters(contentBuffer, null); 1639 * } 1640 **/ 1641 } else { 1642 int c = fEntityScanner.peekChar(); 1643 if (c != -1 && isInvalidLiteral(c)) { 1644 if (XMLChar.isHighSurrogate(c)) { 1645 //contentBuffer.clear(); 1646 //scan surrogates if any.... 1647 scanSurrogates(contentBuffer); 1648 } else { 1649 reportFatalError("InvalidCharInCDSect", 1650 new Object[]{Integer.toString(c,16)}); 1651 fEntityScanner.scanChar(); 1652 } 1653 } 1654 //by this time we have also read surrogate contents if any... 1655 if (fDocumentHandler != null) { 1656 //fDocumentHandler.characters(contentBuffer, null); 1657 } 1658 } 1659 } 1660 fMarkupDepth--; 1661 1662 if (fDocumentHandler != null && contentBuffer.length > 0) { 1663 //fDocumentHandler.characters(contentBuffer, null); 1664 } 1665 1666 // call handler 1667 if (fDocumentHandler != null) { 1668 //fDocumentHandler.endCDATA(null); 1669 } 1670 1671 return true; 1672 1673 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1674 1675 /** 1676 * Scans an end element. 1677 * <p> 1678 * <pre> 1679 * [42] ETag ::= '</' Name S? '>' 1680 * </pre> 1681 * <p> 1682 * <strong>Note:</strong> This method uses the fElementQName variable. 1683 * The contents of this variable will be destroyed. The caller should 1684 * copy the needed information out of this variable before calling 1685 * this method. 1686 * 1687 * @return The element depth. 1688 */ 1689 protected int scanEndElement() throws IOException, XNIException { 1690 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1691 1692 // pop context 1693 QName endElementName = fElementStack.popElement(); 1694 1695 String rawname = endElementName.rawname; 1696 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1697 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1698 //In scanners most of the time is consumed on checks done for XML characters, we can 1699 // optimize on it and avoid the checks done for endElement, 1700 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1701 1702 // this should work both for namespace processing true or false... 1703 1704 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1705 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1706 1707 if (!fEntityScanner.skipString(endElementName.rawname)) { 1708 reportFatalError("ETagRequired", new Object[]{rawname}); 1709 } 1710 1711 // end 1712 fEntityScanner.skipSpaces(); 1713 if (!fEntityScanner.skipChar('>')) { 1714 reportFatalError("ETagUnterminated", 1715 new Object[]{rawname}); 1716 } 1717 fMarkupDepth--; 1718 1719 //we have increased the depth for two markup "<" characters 1720 fMarkupDepth--; 1721 1722 // check that this element was opened in the same entity 1723 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1724 reportFatalError("ElementEntityMismatch", 1725 new Object[]{rawname}); 1726 } 1727 1728 //We should not be popping out the context here in endELement becaause the namespace context is still 1729 //valid when parser is at the endElement state. 1730 1731 //if (fNamespaces) { 1732 // fNamespaceContext.popContext(); 1733 //} 1734 1735 // call handler 1736 if (fDocumentHandler != null ) { 1737 //end element is scanned in this function so we can send a callback 1738 //here. 1739 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1740 1741 fDocumentHandler.endElement(endElementName, null); 1742 } 1743 if(dtdGrammarUtil != null) 1744 dtdGrammarUtil.endElement(endElementName); 1745 1746 return fMarkupDepth; 1747 1748 } // scanEndElement():int 1749 1750 /** 1751 * Scans a character reference. 1752 * <p> 1753 * <pre> 1754 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1755 * </pre> 1756 */ 1757 protected void scanCharReference() 1758 throws IOException, XNIException { 1759 1760 fStringBuffer2.clear(); 1761 int ch = scanCharReferenceValue(fStringBuffer2, null); 1762 fMarkupDepth--; 1763 if (ch != -1) { 1764 // call handler 1765 1766 if (fDocumentHandler != null) { 1767 if (fNotifyCharRefs) { 1768 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1769 } 1770 Augmentations augs = null; 1771 if (fValidation && ch <= 0x20) { 1772 if (fTempAugmentations != null) { 1773 fTempAugmentations.removeAllItems(); 1774 } 1775 else { 1776 fTempAugmentations = new AugmentationsImpl(); 1777 } 1778 augs = fTempAugmentations; 1779 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1780 } 1781 //xxx: How do we deal with this - how to return charReferenceValues 1782 //now this is being commented because this is taken care in scanDocument() 1783 //fDocumentHandler.characters(fStringBuffer2, null); 1784 if (fNotifyCharRefs) { 1785 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1786 } 1787 } 1788 } 1789 1790 } // scanCharReference() 1791 1792 1793 /** 1794 * Scans an entity reference. 1795 * 1796 * @return returns true if the new entity is started. If it was built-in entity 1797 * 'false' is returned. 1798 * @throws IOException Thrown if i/o error occurs. 1799 * @throws XNIException Thrown if handler throws exception upon 1800 * notification. 1801 */ 1802 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1803 String name = fEntityScanner.scanName(); 1804 if (name == null) { 1805 reportFatalError("NameRequiredInReference", null); 1806 return; 1807 } 1808 if (!fEntityScanner.skipChar(';')) { 1809 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1810 } 1811 if (fEntityStore.isUnparsedEntity(name)) { 1812 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1813 } 1814 fMarkupDepth--; 1815 fCurrentEntityName = name; 1816 1817 // handle built-in entities 1818 if (name == fAmpSymbol) { 1819 handleCharacter('&', fAmpSymbol, content); 1820 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1821 return ; 1822 } else if (name == fLtSymbol) { 1823 handleCharacter('<', fLtSymbol, content); 1824 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1825 return ; 1826 } else if (name == fGtSymbol) { 1827 handleCharacter('>', fGtSymbol, content); 1828 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1829 return ; 1830 } else if (name == fQuotSymbol) { 1831 handleCharacter('"', fQuotSymbol, content); 1832 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1833 return ; 1834 } else if (name == fAposSymbol) { 1835 handleCharacter('\'', fAposSymbol, content); 1836 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1837 return ; 1838 } 1839 1840 //1. if the entity is external and support to external entities is not required 1841 // 2. or entities should not be replaced 1842 //3. or if it is built in entity reference. 1843 if((fEntityStore.isExternalEntity(name) && !fSupportExternalEntities) || (!fEntityStore.isExternalEntity(name) && !fReplaceEntityReferences) || foundBuiltInRefs){ 1844 fScannerState = SCANNER_STATE_REFERENCE; 1845 return ; 1846 } 1847 // start general entity 1848 if (!fEntityStore.isDeclaredEntity(name)) { 1849 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1850 if (fDisallowDoctype && fReplaceEntityReferences) { 1851 reportFatalError("EntityNotDeclared", new Object[]{name}); 1852 return; 1853 } 1854 //REVISIT: one more case needs to be included: external PE and standalone is no 1855 if ( fHasExternalDTD && !fStandalone) { 1856 if (fValidation) 1857 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1858 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1859 } else 1860 reportFatalError("EntityNotDeclared", new Object[]{name}); 1861 } 1862 //we are starting the entity even if the entity was not declared 1863 //if that was the case it its taken care in XMLEntityManager.startEntity() 1864 //we immediately call the endEntity. Application gets to know if there was 1865 //any entity that was not declared. 1866 fEntityManager.startEntity(name, false); 1867 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1868 //setScannerState(SCANNER_STATE_CONTENT); 1869 //return true ; 1870 } // scanEntityReference() 1871 1872 // utility methods 1873 1874 /** 1875 * Calls document handler with a single character resulting from 1876 * built-in entity resolution. 1877 * 1878 * @param c 1879 * @param entity built-in name 1880 * @param XMLStringBuffer append the character to buffer 1881 * 1882 * we really dont need to call this function -- this function is only required when 1883 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1884 * calling this function to hanlde built-in entity reference. 1885 * 1886 */ 1887 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1888 foundBuiltInRefs = true; 1889 content.append(c); 1890 if (fDocumentHandler != null) { 1891 fSingleChar[0] = c; 1892 if (fNotifyBuiltInRefs) { 1893 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1894 } 1895 fTempString.setValues(fSingleChar, 0, 1); 1896 //fDocumentHandler.characters(fTempString, null); 1897 1898 if (fNotifyBuiltInRefs) { 1899 fDocumentHandler.endGeneralEntity(entity, null); 1900 } 1901 } 1902 } // handleCharacter(char) 1903 1904 // helper methods 1905 1906 /** 1907 * Sets the scanner state. 1908 * 1909 * @param state The new scanner state. 1910 */ 1911 protected final void setScannerState(int state) { 1912 1913 fScannerState = state; 1914 if (DEBUG_SCANNER_STATE) { 1915 System.out.print("### setScannerState: "); 1916 //System.out.print(fScannerState); 1917 System.out.print(getScannerStateName(state)); 1918 System.out.println(); 1919 } 1920 1921 } // setScannerState(int) 1922 1923 1924 /** 1925 * Sets the Driver. 1926 * 1927 * @param Driver The new Driver. 1928 */ 1929 protected final void setDriver(Driver driver) { 1930 fDriver = driver; 1931 if (DEBUG_DISPATCHER) { 1932 System.out.print("%%% setDriver: "); 1933 System.out.print(getDriverName(driver)); 1934 System.out.println(); 1935 } 1936 } 1937 1938 // 1939 // Private methods 1940 // 1941 1942 /** Returns the scanner state name. */ 1943 protected String getScannerStateName(int state) { 1944 1945 switch (state) { 1946 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1947 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1948 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1949 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1950 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1951 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1952 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1953 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1954 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1955 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1956 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1957 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1958 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 1959 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 1960 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 1961 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 1962 } 1963 1964 return "??? ("+state+')'; 1965 1966 } // getScannerStateName(int):String 1967 public String getEntityName(){ 1968 //return the cached name 1969 return fCurrentEntityName; 1970 } 1971 1972 /** Returns the driver name. */ 1973 public String getDriverName(Driver driver) { 1974 1975 if (DEBUG_DISPATCHER) { 1976 if (driver != null) { 1977 String name = driver.getClass().getName(); 1978 int index = name.lastIndexOf('.'); 1979 if (index != -1) { 1980 name = name.substring(index + 1); 1981 index = name.lastIndexOf('$'); 1982 if (index != -1) { 1983 name = name.substring(index + 1); 1984 } 1985 } 1986 return name; 1987 } 1988 } 1989 return "null"; 1990 1991 } // getDriverName():String 1992 1993 // 1994 // Classes 1995 // 1996 1997 /** 1998 * @author Neeraj Bajaj, Sun Microsystems. 1999 */ 2000 protected static final class Element { 2001 2002 // 2003 // Data 2004 // 2005 2006 /** Symbol. */ 2007 public QName qname; 2008 2009 //raw name stored as characters 2010 public char[] fRawname; 2011 2012 /** The next Element entry. */ 2013 public Element next; 2014 2015 // 2016 // Constructors 2017 // 2018 2019 /** 2020 * Constructs a new Element from the given QName and next Element 2021 * reference. 2022 */ 2023 public Element(QName qname, Element next) { 2024 this.qname.setValues(qname); 2025 this.fRawname = qname.rawname.toCharArray(); 2026 this.next = next; 2027 } 2028 2029 } // class Element 2030 2031 /** 2032 * Element stack. 2033 * 2034 * @author Neeraj Bajaj, Sun Microsystems. 2035 */ 2036 protected class ElementStack2 { 2037 2038 // 2039 // Data 2040 // 2041 2042 /** The stack data. */ 2043 protected QName [] fQName = new QName[20]; 2044 2045 //Element depth 2046 protected int fDepth; 2047 //total number of elements 2048 protected int fCount; 2049 //current position 2050 protected int fPosition; 2051 //Mark refers to the position 2052 protected int fMark; 2053 2054 protected int fLastDepth ; 2055 2056 // 2057 // Constructors 2058 // 2059 2060 /** Default constructor. */ 2061 public ElementStack2() { 2062 for (int i = 0; i < fQName.length; i++) { 2063 fQName[i] = new QName(); 2064 } 2065 fMark = fPosition = 1; 2066 } // <init>() 2067 2068 public void resize(){ 2069 /** 2070 * int length = fElements.length; 2071 * Element [] temp = new Element[length * 2]; 2072 * System.arraycopy(fElements, 0, temp, 0, length); 2073 * fElements = temp; 2074 */ 2075 //resize QNames 2076 int oldLength = fQName.length; 2077 QName [] tmp = new QName[oldLength * 2]; 2078 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2079 fQName = tmp; 2080 2081 for (int i = oldLength; i < fQName.length; i++) { 2082 fQName[i] = new QName(); 2083 } 2084 2085 } 2086 2087 2088 // 2089 // Public methods 2090 // 2091 2092 /** Check if the element scanned during the start element 2093 *matches the stored element. 2094 * 2095 *@return true if the match suceeds. 2096 */ 2097 public boolean matchElement(QName element) { 2098 //last depth is the depth when last elemnt was pushed 2099 //if last depth is greater than current depth 2100 if(DEBUG_SKIP_ALGORITHM){ 2101 System.out.println("fLastDepth = " + fLastDepth); 2102 System.out.println("fDepth = " + fDepth); 2103 } 2104 boolean match = false; 2105 if(fLastDepth > fDepth && fDepth <= 2){ 2106 if(DEBUG_SKIP_ALGORITHM){ 2107 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2108 } 2109 if(element.rawname == fQName[fDepth].rawname){ 2110 fAdd = false; 2111 //mark this position 2112 //decrease the depth by 1 as arrays are 0 based 2113 fMark = fDepth - 1; 2114 //we found the match and from next element skipping will start, add 1 2115 fPosition = fMark + 1 ; 2116 match = true; 2117 //Once we get match decrease the count -- this was increased by nextElement() 2118 --fCount; 2119 if(DEBUG_SKIP_ALGORITHM){ 2120 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2121 System.out.println("fMark = " + fMark); 2122 System.out.println("fPosition = " + fPosition); 2123 System.out.println("fDepth = " + fDepth); 2124 System.out.println("fCount = " + fCount); 2125 } 2126 }else{ 2127 fAdd = true; 2128 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2129 } 2130 } 2131 //store the last depth 2132 fLastDepth = fDepth++; 2133 return match; 2134 } // pushElement(QName):QName 2135 2136 /** 2137 * This function doesn't increase depth. The function in this function is 2138 *broken down into two functions for efficiency. <@see>matchElement</see>. 2139 * This function just returns the pointer to the object and its values are set. 2140 * 2141 *@return QName reference to the next element in the list 2142 */ 2143 public QName nextElement() { 2144 2145 //if number of elements becomes equal to the length of array -- stop the skipping 2146 if (fCount == fQName.length) { 2147 fShouldSkip = false; 2148 fAdd = false; 2149 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2150 //xxx: this is not correct, we are returning the last element 2151 //this wont make any difference since flag has been set to 'false' 2152 return fQName[--fCount]; 2153 } 2154 if(DEBUG_SKIP_ALGORITHM){ 2155 System.out.println("fCount = " + fCount); 2156 } 2157 return fQName[fCount++]; 2158 2159 } 2160 2161 /** Note that this function is considerably different than nextElement() 2162 * This function just returns the previously stored elements 2163 */ 2164 public QName getNext(){ 2165 //when position reaches number of elements in the list.. 2166 //set the position back to mark, making it a circular linked list. 2167 if(fPosition == fCount){ 2168 fPosition = fMark; 2169 } 2170 return fQName[fPosition++]; 2171 } 2172 2173 /** returns the current depth 2174 */ 2175 public int popElement(){ 2176 return fDepth--; 2177 } 2178 2179 2180 /** Clears the stack without throwing away existing QName objects. */ 2181 public void clear() { 2182 fLastDepth = 0; 2183 fDepth = 0; 2184 fCount = 0 ; 2185 fPosition = fMark = 1; 2186 } // clear() 2187 2188 } // class ElementStack 2189 2190 /** 2191 * Element stack. This stack operates without synchronization, error 2192 * checking, and it re-uses objects instead of throwing popped items 2193 * away. 2194 * 2195 * @author Andy Clark, IBM 2196 */ 2197 protected class ElementStack { 2198 2199 // 2200 // Data 2201 // 2202 2203 /** The stack data. */ 2204 protected QName[] fElements; 2205 protected int [] fInt = new int[20]; 2206 2207 2208 //Element depth 2209 protected int fDepth; 2210 //total number of elements 2211 protected int fCount; 2212 //current position 2213 protected int fPosition; 2214 //Mark refers to the position 2215 protected int fMark; 2216 2217 protected int fLastDepth ; 2218 2219 // 2220 // Constructors 2221 // 2222 2223 /** Default constructor. */ 2224 public ElementStack() { 2225 fElements = new QName[20]; 2226 for (int i = 0; i < fElements.length; i++) { 2227 fElements[i] = new QName(); 2228 } 2229 } // <init>() 2230 2231 // 2232 // Public methods 2233 // 2234 2235 /** 2236 * Pushes an element on the stack. 2237 * <p> 2238 * <strong>Note:</strong> The QName values are copied into the 2239 * stack. In other words, the caller does <em>not</em> orphan 2240 * the element to the stack. Also, the QName object returned 2241 * is <em>not</em> orphaned to the caller. It should be 2242 * considered read-only. 2243 * 2244 * @param element The element to push onto the stack. 2245 * 2246 * @return Returns the actual QName object that stores the 2247 */ 2248 //XXX: THIS FUNCTION IS NOT USED 2249 public QName pushElement(QName element) { 2250 if (fDepth == fElements.length) { 2251 QName[] array = new QName[fElements.length * 2]; 2252 System.arraycopy(fElements, 0, array, 0, fDepth); 2253 fElements = array; 2254 for (int i = fDepth; i < fElements.length; i++) { 2255 fElements[i] = new QName(); 2256 } 2257 } 2258 fElements[fDepth].setValues(element); 2259 return fElements[fDepth++]; 2260 } // pushElement(QName):QName 2261 2262 2263 /** Note that this function is considerably different than nextElement() 2264 * This function just returns the previously stored elements 2265 */ 2266 public QName getNext(){ 2267 //when position reaches number of elements in the list.. 2268 //set the position back to mark, making it a circular linked list. 2269 if(fPosition == fCount){ 2270 fPosition = fMark; 2271 } 2272 //store the position of last opened tag at particular depth 2273 //fInt[++fDepth] = fPosition; 2274 if(DEBUG_SKIP_ALGORITHM){ 2275 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2276 } 2277 //return fElements[fPosition++]; 2278 return fElements[fPosition]; 2279 } 2280 2281 /** This function should be called only when element was skipped sucessfully. 2282 * 1. Increase the depth - because element was sucessfully skipped. 2283 *2. Store the position of the element token in array "last opened tag" at depth. 2284 *3. increase the position counter so as to point to the next element in the array 2285 */ 2286 public void push(){ 2287 2288 fInt[++fDepth] = fPosition++; 2289 } 2290 2291 /** Check if the element scanned during the start element 2292 *matches the stored element. 2293 * 2294 *@return true if the match suceeds. 2295 */ 2296 public boolean matchElement(QName element) { 2297 //last depth is the depth when last elemnt was pushed 2298 //if last depth is greater than current depth 2299 //if(DEBUG_SKIP_ALGORITHM){ 2300 // System.out.println("Check if the element " + element.rawname + " matches"); 2301 // System.out.println("fLastDepth = " + fLastDepth); 2302 // System.out.println("fDepth = " + fDepth); 2303 //} 2304 boolean match = false; 2305 if(fLastDepth > fDepth && fDepth <= 3){ 2306 if(DEBUG_SKIP_ALGORITHM){ 2307 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2308 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2309 } 2310 if(element.rawname == fElements[fDepth - 1].rawname){ 2311 fAdd = false; 2312 //mark this position 2313 //decrease the depth by 1 as arrays are 0 based 2314 fMark = fDepth - 1; 2315 //we found the match 2316 fPosition = fMark; 2317 match = true; 2318 //Once we get match decrease the count -- this was increased by nextElement() 2319 --fCount; 2320 if(DEBUG_SKIP_ALGORITHM){ 2321 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2322 System.out.println("fMark = " + fMark); 2323 System.out.println("fPosition = " + fPosition); 2324 System.out.println("fDepth = " + fDepth); 2325 System.out.println("fCount = " + fCount); 2326 System.out.println("---------MATCH SUCEEDED-----------------"); 2327 System.out.println(""); 2328 } 2329 }else{ 2330 fAdd = true; 2331 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2332 } 2333 } 2334 //store the position for the current depth 2335 //when we are adding the elements, when skipping 2336 //starts even then this should be tracked ie. when 2337 //calling getNext() 2338 if(match){ 2339 //from next element skipping will start, add 1 2340 fInt[fDepth] = fPosition++; 2341 } else{ 2342 if(DEBUG_SKIP_ALGORITHM){ 2343 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2344 } 2345 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2346 fInt[fDepth] = fCount - 1; 2347 } 2348 2349 //if number of elements becomes equal to the length of array -- stop the skipping 2350 //xxx: should we do "fCount == fInt.length" 2351 if (fCount == fElements.length) { 2352 fSkip = false; 2353 fAdd = false; 2354 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2355 reposition(); 2356 if(DEBUG_SKIP_ALGORITHM){ 2357 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2358 System.out.println("REPOSITIONING THE STACK"); 2359 System.out.println("-----------SKIPPING STOPPED----------"); 2360 System.out.println(""); 2361 } 2362 return false; 2363 } 2364 if(DEBUG_SKIP_ALGORITHM){ 2365 if(match){ 2366 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2367 }else{ 2368 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2369 } 2370 } 2371 //store the last depth 2372 fLastDepth = fDepth; 2373 return match; 2374 } // matchElement(QName):QName 2375 2376 2377 /** 2378 * Returns the next element on the stack. 2379 * 2380 * @return Returns the actual QName object. Callee should 2381 * use this object to store the details of next element encountered. 2382 */ 2383 public QName nextElement() { 2384 if(fSkip){ 2385 fDepth++; 2386 //boundary checks are done in matchElement() 2387 return fElements[fCount++]; 2388 } else if (fDepth == fElements.length) { 2389 QName[] array = new QName[fElements.length * 2]; 2390 System.arraycopy(fElements, 0, array, 0, fDepth); 2391 fElements = array; 2392 for (int i = fDepth; i < fElements.length; i++) { 2393 fElements[i] = new QName(); 2394 } 2395 } 2396 2397 return fElements[fDepth++]; 2398 2399 } // pushElement(QName):QName 2400 2401 2402 /** 2403 * Pops an element off of the stack by setting the values of 2404 * the specified QName. 2405 * <p> 2406 * <strong>Note:</strong> The object returned is <em>not</em> 2407 * orphaned to the caller. Therefore, the caller should consider 2408 * the object to be read-only. 2409 */ 2410 public QName popElement() { 2411 //return the same object that was pushed -- this would avoid 2412 //setting the values for every end element. 2413 //STRONG: this object is read only -- this object reference shouldn't be stored. 2414 if(fSkip || fAdd ){ 2415 if(DEBUG_SKIP_ALGORITHM){ 2416 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2417 System.out.println(""); 2418 } 2419 return fElements[fInt[fDepth--]]; 2420 } else{ 2421 if(DEBUG_SKIP_ALGORITHM){ 2422 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2423 } 2424 return fElements[--fDepth] ; 2425 } 2426 //element.setValues(fElements[--fDepth]); 2427 } // popElement(QName) 2428 2429 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2430 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2431 *as per the depth. 2432 */ 2433 public void reposition(){ 2434 for( int i = 2 ; i <= fDepth ; i++){ 2435 fElements[i-1] = fElements[fInt[i]]; 2436 } 2437 if(DEBUG_SKIP_ALGORITHM){ 2438 for( int i = 0 ; i < fDepth ; i++){ 2439 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2440 } 2441 } 2442 } 2443 2444 /** Clears the stack without throwing away existing QName objects. */ 2445 public void clear() { 2446 fDepth = 0; 2447 fLastDepth = 0; 2448 fCount = 0 ; 2449 fPosition = fMark = 1; 2450 2451 } // clear() 2452 2453 /** 2454 * This function is as a result of optimization done for endElement -- 2455 * we dont need to set the value for every end element encouterd. 2456 * For Well formedness checks we can have the same QName object that was pushed. 2457 * the values will be set only if application need to know about the endElement 2458 * -- neeraj.bajaj@sun.com 2459 */ 2460 2461 public QName getLastPoppedElement(){ 2462 return fElements[fDepth]; 2463 } 2464 } // class ElementStack 2465 2466 /** 2467 * Drives the parser to the next state/event on the input. Parser is guaranteed 2468 * to stop at the next state/event. 2469 * 2470 * Internally XML document is divided into several states. Each state represents 2471 * a sections of XML document. When this functions returns normally, it has read 2472 * the section of XML document and returns the state corresponding to section of 2473 * document which has been read. For optimizations, a particular driver 2474 * can read ahead of the section of document (state returned) just read and 2475 * can maintain a different internal state. 2476 * 2477 * 2478 * @author Neeraj Bajaj, Sun Microsystems 2479 */ 2480 protected interface Driver { 2481 2482 2483 /** 2484 * Drives the parser to the next state/event on the input. Parser is guaranteed 2485 * to stop at the next state/event. 2486 * 2487 * Internally XML document is divided into several states. Each state represents 2488 * a sections of XML document. When this functions returns normally, it has read 2489 * the section of XML document and returns the state corresponding to section of 2490 * document which has been read. For optimizations, a particular driver 2491 * can read ahead of the section of document (state returned) just read and 2492 * can maintain a different internal state. 2493 * 2494 * @return state representing the section of document just read. 2495 * 2496 * @throws IOException Thrown on i/o error. 2497 * @throws XNIException Thrown on parse error. 2498 */ 2499 2500 public int next() throws IOException, XNIException; 2501 2502 } // interface Driver 2503 2504 /** 2505 * Driver to handle content scanning. This driver is capable of reading 2506 * the fragment of XML document. When it has finished reading fragment 2507 * of XML documents, it can pass the job of reading to another driver. 2508 * 2509 * This class has been modified as per the new design which is more suited to 2510 * efficiently build pull parser. Lot of performance improvements have been done and 2511 * the code has been added to support stax functionality/features. 2512 * 2513 * @author Neeraj Bajaj, Sun Microsystems 2514 * 2515 * 2516 * @author Andy Clark, IBM 2517 * @author Eric Ye, IBM 2518 */ 2519 protected class FragmentContentDriver 2520 implements Driver { 2521 2522 // 2523 // Driver methods 2524 // 2525 private boolean fContinueDispatching = true; 2526 private boolean fScanningForMarkup = true; 2527 2528 /** 2529 * decides the appropriate state of the parser 2530 */ 2531 private void startOfMarkup() throws IOException { 2532 fMarkupDepth++; 2533 final int ch = fEntityScanner.peekChar(); 2534 2535 switch(ch){ 2536 case '?' :{ 2537 setScannerState(SCANNER_STATE_PI); 2538 fEntityScanner.skipChar(ch); 2539 break; 2540 } 2541 case '!' :{ 2542 fEntityScanner.skipChar(ch); 2543 if (fEntityScanner.skipChar('-')) { 2544 if (!fEntityScanner.skipChar('-')) { 2545 reportFatalError("InvalidCommentStart", 2546 null); 2547 } 2548 setScannerState(SCANNER_STATE_COMMENT); 2549 } else if (fEntityScanner.skipString(cdata)) { 2550 setScannerState(SCANNER_STATE_CDATA ); 2551 } else if (!scanForDoctypeHook()) { 2552 reportFatalError("MarkupNotRecognizedInContent", 2553 null); 2554 } 2555 break; 2556 } 2557 case '/' :{ 2558 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2559 fEntityScanner.skipChar(ch); 2560 break; 2561 } 2562 default :{ 2563 if (isValidNameStartChar(ch)) { 2564 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2565 } else { 2566 reportFatalError("MarkupNotRecognizedInContent", 2567 null); 2568 } 2569 } 2570 } 2571 2572 }//startOfMarkup 2573 2574 private void startOfContent() throws IOException { 2575 if (fEntityScanner.skipChar('<')) { 2576 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2577 } else if (fEntityScanner.skipChar('&')) { 2578 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2579 } else { 2580 //element content is there.. 2581 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2582 } 2583 }//startOfContent 2584 2585 2586 /** 2587 * 2588 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2589 * At any point of time when in doubt over the current state of the parser, the state should be 2590 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2591 * the parser to one of its sub state. 2592 * sub states are defined in the parser on the basis of different XML component like 2593 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2594 * These sub states help the parser to have fine control over the parsing. These are the 2595 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2596 * decided if paresr needs to stop at next milepost ?? 2597 * 2598 */ 2599 public void decideSubState() throws IOException { 2600 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2601 2602 switch (fScannerState) { 2603 2604 case SCANNER_STATE_CONTENT: { 2605 startOfContent() ; 2606 break; 2607 } 2608 2609 case SCANNER_STATE_START_OF_MARKUP: { 2610 startOfMarkup() ; 2611 break; 2612 } 2613 } 2614 } 2615 }//decideSubState 2616 2617 /** 2618 * Drives the parser to the next state/event on the input. Parser is guaranteed 2619 * to stop at the next state/event. Internally XML document 2620 * is divided into several states. Each state represents a sections of XML 2621 * document. When this functions returns normally, it has read the section 2622 * of XML document and returns the state corresponding to section of 2623 * document which has been read. For optimizations, a particular driver 2624 * can read ahead of the section of document (state returned) just read and 2625 * can maintain a different internal state. 2626 * 2627 * State returned corresponds to Stax states. 2628 * 2629 * @return state representing the section of document just read. 2630 * 2631 * @throws IOException Thrown on i/o error. 2632 * @throws XNIException Thrown on parse error. 2633 */ 2634 2635 public int next() throws IOException, XNIException { 2636 while (true) { 2637 try { 2638 if(DEBUG_NEXT){ 2639 System.out.println("NOW IN FragmentContentDriver"); 2640 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2641 } 2642 2643 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2644 //decideSubState. 2645 2646 switch (fScannerState) { 2647 case SCANNER_STATE_CONTENT: { 2648 final int ch = fEntityScanner.peekChar(); 2649 if (ch == '<') { 2650 fEntityScanner.scanChar(); 2651 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2652 } else if (ch == '&') { 2653 fEntityScanner.scanChar(); 2654 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2655 break; 2656 } else { 2657 //element content is there.. 2658 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2659 break; 2660 } 2661 } 2662 2663 case SCANNER_STATE_START_OF_MARKUP: { 2664 startOfMarkup(); 2665 break; 2666 }//case: SCANNER_STATE_START_OF_MARKUP 2667 2668 }//end of switch 2669 //decideSubState() ; 2670 2671 //do some special handling if isCoalesce is set to true. 2672 if(fIsCoalesce){ 2673 fUsebuffer = true ; 2674 //if the last section was character data 2675 if(fLastSectionWasCharacterData){ 2676 2677 //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2678 //return the last scanned charactrer data. 2679 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2680 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2681 fLastSectionWasCharacterData = false; 2682 return XMLEvent.CHARACTERS; 2683 } 2684 }//if last section was CDATA or ENTITY REFERENCE 2685 //xxx: there might be another entity reference or CDATA after this 2686 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2687 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2688 //and current state is not SCANNER_STATE_CHARACTER_DATA 2689 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2690 //this means there is nothing more to be coalesced. 2691 //return the CHARACTERS event. 2692 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2693 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2694 2695 fLastSectionWasCData = false; 2696 fLastSectionWasEntityReference = false; 2697 return XMLEvent.CHARACTERS; 2698 } 2699 } 2700 } 2701 2702 2703 if(DEBUG_NEXT){ 2704 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2705 } 2706 2707 switch(fScannerState){ 2708 2709 case XMLEvent.START_DOCUMENT : 2710 return XMLEvent.START_DOCUMENT; 2711 2712 case SCANNER_STATE_START_ELEMENT_TAG :{ 2713 2714 //xxx this function returns true when element is empty.. can be linked to end element event. 2715 //returns true if the element is empty 2716 fEmptyElement = scanStartElement() ; 2717 //if the element is empty the next event is "end element" 2718 if(fEmptyElement){ 2719 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2720 }else{ 2721 //set the next possible state 2722 setScannerState(SCANNER_STATE_CONTENT); 2723 } 2724 return XMLEvent.START_ELEMENT ; 2725 } 2726 2727 case SCANNER_STATE_CHARACTER_DATA: { 2728 if(DEBUG_COALESCE){ 2729 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2730 System.out.println("fIsCoalesce = " + fIsCoalesce); 2731 } 2732 //if last section was either entity reference or cdata or character data we should be using buffer 2733 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2734 2735 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2736 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2737 fLastSectionWasEntityReference = false; 2738 fLastSectionWasCData = false; 2739 fLastSectionWasCharacterData = true ; 2740 fUsebuffer = true; 2741 }else{ 2742 //clear the buffer 2743 fContentBuffer.clear(); 2744 } 2745 2746 //set the fTempString length to 0 before passing it on to scanContent 2747 //scanContent sets the correct co-ordinates as per the content read 2748 fTempString.length = 0; 2749 int c = fEntityScanner.scanContent(fTempString); 2750 if(DEBUG){ 2751 System.out.println("fTempString = " + fTempString); 2752 } 2753 if(fEntityScanner.skipChar('<')){ 2754 //check if we have reached end of element 2755 if(fEntityScanner.skipChar('/')){ 2756 //increase the mark up depth 2757 fMarkupDepth++; 2758 fLastSectionWasCharacterData = false; 2759 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2760 //check if its start of new element 2761 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2762 fMarkupDepth++; 2763 fLastSectionWasCharacterData = false; 2764 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2765 }else{ 2766 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2767 //there can be cdata ahead if coalesce is true we should call again 2768 if(fIsCoalesce){ 2769 fUsebuffer = true; 2770 fLastSectionWasCharacterData = true; 2771 fContentBuffer.append(fTempString); 2772 fTempString.length = 0; 2773 continue; 2774 } 2775 } 2776 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2777 if(fUsebuffer){ 2778 fContentBuffer.append(fTempString); 2779 fTempString.length = 0; 2780 } 2781 if(DEBUG){ 2782 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2783 } 2784 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2785 if(DEBUG)System.out.println("Return SPACE EVENT"); 2786 return XMLEvent.SPACE; 2787 }else 2788 return XMLEvent.CHARACTERS; 2789 2790 } else{ 2791 fUsebuffer = true ; 2792 if(DEBUG){ 2793 System.out.println("fContentBuffer = " + fContentBuffer); 2794 System.out.println("fTempString = " + fTempString); 2795 } 2796 fContentBuffer.append(fTempString); 2797 fTempString.length = 0; 2798 } 2799 if (c == '\r') { 2800 if(DEBUG){ 2801 System.out.println("'\r' character found"); 2802 } 2803 // happens when there is the character reference 2804 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2805 fEntityScanner.scanChar(); 2806 fUsebuffer = true; 2807 fContentBuffer.append((char)c); 2808 c = -1 ; 2809 } else if (c == ']') { 2810 //fStringBuffer.clear(); 2811 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2812 fUsebuffer = true; 2813 fContentBuffer.append((char)fEntityScanner.scanChar()); 2814 // remember where we are in case we get an endEntity before we 2815 // could flush the buffer out - this happens when we're parsing an 2816 // entity which ends with a ] 2817 fInScanContent = true; 2818 2819 // We work on a single character basis to handle cases such as: 2820 // ']]]>' which we might otherwise miss. 2821 // 2822 if (fEntityScanner.skipChar(']')) { 2823 fContentBuffer.append(']'); 2824 while (fEntityScanner.skipChar(']')) { 2825 fContentBuffer.append(']'); 2826 } 2827 if (fEntityScanner.skipChar('>')) { 2828 reportFatalError("CDEndInContent", null); 2829 } 2830 } 2831 c = -1 ; 2832 fInScanContent = false; 2833 } 2834 2835 do{ 2836 //xxx: we should be using only one buffer.. 2837 // we need not to grow the buffer only when isCoalesce() is not true; 2838 2839 if (c == '<') { 2840 fEntityScanner.scanChar(); 2841 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2842 break; 2843 }//xxx what should be the behavior if entity reference is present in the content ? 2844 else if (c == '&') { 2845 fEntityScanner.scanChar(); 2846 setScannerState(SCANNER_STATE_REFERENCE); 2847 break; 2848 }///xxx since this part is also characters, it should be merged... 2849 else if (c != -1 && isInvalidLiteral(c)) { 2850 if (XMLChar.isHighSurrogate(c)) { 2851 // special case: surrogates 2852 scanSurrogates(fContentBuffer) ; 2853 setScannerState(SCANNER_STATE_CONTENT); 2854 } else { 2855 reportFatalError("InvalidCharInContent", 2856 new Object[] { 2857 Integer.toString(c, 16)}); 2858 fEntityScanner.scanChar(); 2859 } 2860 break; 2861 } 2862 //xxx: scanContent also gives character callback. 2863 c = scanContent(fContentBuffer) ; 2864 //we should not be iterating again if fIsCoalesce is not set to true 2865 2866 if(!fIsCoalesce){ 2867 setScannerState(SCANNER_STATE_CONTENT); 2868 break; 2869 } 2870 2871 }while(true); 2872 2873 //if (fDocumentHandler != null) { 2874 // fDocumentHandler.characters(fContentBuffer, null); 2875 //} 2876 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2877 //if fIsCoalesce is true there might be more data so call fDriver.next() 2878 if(fIsCoalesce){ 2879 fLastSectionWasCharacterData = true ; 2880 continue; 2881 }else{ 2882 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2883 if(DEBUG)System.out.println("Return SPACE EVENT"); 2884 return XMLEvent.SPACE; 2885 } else 2886 return XMLEvent.CHARACTERS ; 2887 } 2888 } 2889 2890 case SCANNER_STATE_END_ELEMENT_TAG :{ 2891 if(fEmptyElement){ 2892 //set it back to false. 2893 fEmptyElement = false; 2894 setScannerState(SCANNER_STATE_CONTENT); 2895 //check the case when there is comment after single element document 2896 //<foo/> and some comment after this 2897 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2898 2899 } else if(scanEndElement() == 0) { 2900 //It is last element of the document 2901 if (elementDepthIsZeroHook()) { 2902 //if element depth is zero , it indicates the end of the document 2903 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2904 //xxx understand this point once again.. 2905 return XMLEvent.END_ELEMENT ; 2906 } 2907 2908 } 2909 setScannerState(SCANNER_STATE_CONTENT); 2910 return XMLEvent.END_ELEMENT ; 2911 } 2912 2913 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2914 scanComment(); 2915 setScannerState(SCANNER_STATE_CONTENT); 2916 return XMLEvent.COMMENT; 2917 //break; 2918 } 2919 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2920 //clear the buffer first 2921 fContentBuffer.clear() ; 2922 //xxx: which buffer should be passed. Ideally we shouldn't have 2923 //more than two buffers -- 2924 //xxx: where should we add the switch for buffering. 2925 scanPI(fContentBuffer); 2926 setScannerState(SCANNER_STATE_CONTENT); 2927 return XMLEvent.PROCESSING_INSTRUCTION; 2928 //break; 2929 } 2930 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2931 //xxx: What if CDATA is the first event 2932 //<foo><![CDATA[hello<><>]]>append</foo> 2933 2934 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 2935 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2936 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 2937 fLastSectionWasCData = true ; 2938 fLastSectionWasEntityReference = false; 2939 fLastSectionWasCharacterData = false; 2940 }//if we dont need to coalesce clear the buffer 2941 else{ 2942 fContentBuffer.clear(); 2943 } 2944 fUsebuffer = true; 2945 //CDATA section is completely read in all the case. 2946 scanCDATASection(fContentBuffer , true); 2947 setScannerState(SCANNER_STATE_CONTENT); 2948 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 2949 //and just call fDispatche.next(). Since we have set the scanner state to 2950 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 2951 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 2952 //2. Check if application has set for reporting CDATA event 2953 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 2954 //return the cdata event as characters. 2955 if(fIsCoalesce){ 2956 fLastSectionWasCData = true ; 2957 //there might be more data to coalesce. 2958 continue; 2959 }else if(fReportCdataEvent){ 2960 return XMLEvent.CDATA; 2961 } else{ 2962 return XMLEvent.CHARACTERS; 2963 } 2964 } 2965 2966 case SCANNER_STATE_REFERENCE :{ 2967 fMarkupDepth++; 2968 foundBuiltInRefs = false; 2969 2970 //we should not clear the buffer only when the last state was either CDATA or 2971 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2972 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 2973 //fLastSectionWasEntityReference or fLastSectionWasCData are only 2974 //used when fIsCoalesce is set to true. 2975 fLastSectionWasEntityReference = true ; 2976 fLastSectionWasCData = false; 2977 fLastSectionWasCharacterData = false; 2978 }//if we dont need to coalesce clear the buffer 2979 else{ 2980 fContentBuffer.clear(); 2981 } 2982 fUsebuffer = true ; 2983 //take care of character reference 2984 if (fEntityScanner.skipChar('#')) { 2985 scanCharReferenceValue(fContentBuffer, null); 2986 fMarkupDepth--; 2987 if(!fIsCoalesce){ 2988 setScannerState(SCANNER_STATE_CONTENT); 2989 return XMLEvent.CHARACTERS; 2990 } 2991 } else { 2992 // this function also starts new entity 2993 scanEntityReference(fContentBuffer); 2994 //if there was built-in entity reference & coalesce is not true 2995 //return CHARACTERS 2996 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 2997 setScannerState(SCANNER_STATE_CONTENT); 2998 return XMLEvent.CHARACTERS; 2999 } 3000 3001 //if there was a text declaration, call next() it will be taken care. 3002 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3003 fLastSectionWasEntityReference = true ; 3004 continue; 3005 } 3006 3007 if(fScannerState == SCANNER_STATE_REFERENCE){ 3008 setScannerState(SCANNER_STATE_CONTENT); 3009 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3010 // Skip the entity reference, we don't care 3011 continue; 3012 } 3013 return XMLEvent.ENTITY_REFERENCE; 3014 } 3015 } 3016 //Wether it was character reference, entity reference or built-in entity 3017 //set the next possible state to SCANNER_STATE_CONTENT 3018 setScannerState(SCANNER_STATE_CONTENT); 3019 fLastSectionWasEntityReference = true ; 3020 continue; 3021 } 3022 3023 case SCANNER_STATE_TEXT_DECL: { 3024 // scan text decl 3025 if (fEntityScanner.skipString("<?xml")) { 3026 fMarkupDepth++; 3027 // NOTE: special case where entity starts with a PI 3028 // whose name starts with "xml" (e.g. "xmlfoo") 3029 if (isValidNameChar(fEntityScanner.peekChar())) { 3030 fStringBuffer.clear(); 3031 fStringBuffer.append("xml"); 3032 3033 if (fNamespaces) { 3034 while (isValidNCName(fEntityScanner.peekChar())) { 3035 fStringBuffer.append((char)fEntityScanner.scanChar()); 3036 } 3037 } else { 3038 while (isValidNameChar(fEntityScanner.peekChar())) { 3039 fStringBuffer.append((char)fEntityScanner.scanChar()); 3040 } 3041 } 3042 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3043 fContentBuffer.clear(); 3044 scanPIData(target, fContentBuffer); 3045 } 3046 3047 // standard text declaration 3048 else { 3049 //xxx: this function gives callback 3050 scanXMLDeclOrTextDecl(true); 3051 } 3052 } 3053 // now that we've straightened out the readers, we can read in chunks: 3054 fEntityManager.fCurrentEntity.mayReadChunks = true; 3055 setScannerState(SCANNER_STATE_CONTENT); 3056 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3057 //it seems we have to careful when to allow function issue a callback 3058 //and when to allow adapter issue a callback. 3059 continue; 3060 } 3061 3062 3063 case SCANNER_STATE_ROOT_ELEMENT: { 3064 if (scanRootElementHook()) { 3065 fEmptyElement = true; 3066 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3067 return XMLEvent.START_ELEMENT; 3068 } 3069 setScannerState(SCANNER_STATE_CONTENT); 3070 return XMLEvent.START_ELEMENT ; 3071 } 3072 case SCANNER_STATE_CHAR_REFERENCE : { 3073 fContentBuffer.clear(); 3074 scanCharReferenceValue(fContentBuffer, null); 3075 fMarkupDepth--; 3076 setScannerState(SCANNER_STATE_CONTENT); 3077 return XMLEvent.CHARACTERS; 3078 } 3079 default: 3080 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3081 3082 }//switch 3083 } 3084 // premature end of file 3085 catch (EOFException e) { 3086 endOfFileHook(e); 3087 return -1; 3088 } 3089 } //while loop 3090 }//next 3091 3092 3093 // 3094 // Protected methods 3095 // 3096 3097 // hooks 3098 3099 // NOTE: These hook methods are added so that the full document 3100 // scanner can share the majority of code with this class. 3101 3102 /** 3103 * Scan for DOCTYPE hook. This method is a hook for subclasses 3104 * to add code to handle scanning for a the "DOCTYPE" string 3105 * after the string "<!" has been scanned. 3106 * 3107 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3108 * was not scanned. 3109 */ 3110 protected boolean scanForDoctypeHook() 3111 throws IOException, XNIException { 3112 return false; 3113 } // scanForDoctypeHook():boolean 3114 3115 /** 3116 * Element depth iz zero. This methos is a hook for subclasses 3117 * to add code to handle when the element depth hits zero. When 3118 * scanning a document fragment, an element depth of zero is 3119 * normal. However, when scanning a full XML document, the 3120 * scanner must handle the trailing miscellanous section of 3121 * the document after the end of the document's root element. 3122 * 3123 * @return True if the caller should stop and return true which 3124 * allows the scanner to switch to a new scanning 3125 * driver. A return value of false indicates that 3126 * the content driver should continue as normal. 3127 */ 3128 protected boolean elementDepthIsZeroHook() 3129 throws IOException, XNIException { 3130 return false; 3131 } // elementDepthIsZeroHook():boolean 3132 3133 /** 3134 * Scan for root element hook. This method is a hook for 3135 * subclasses to add code that handles scanning for the root 3136 * element. When scanning a document fragment, there is no 3137 * "root" element. However, when scanning a full XML document, 3138 * the scanner must handle the root element specially. 3139 * 3140 * @return True if the caller should stop and return true which 3141 * allows the scanner to switch to a new scanning 3142 * driver. A return value of false indicates that 3143 * the content driver should continue as normal. 3144 */ 3145 protected boolean scanRootElementHook() 3146 throws IOException, XNIException { 3147 return false; 3148 } // scanRootElementHook():boolean 3149 3150 /** 3151 * End of file hook. This method is a hook for subclasses to 3152 * add code that handles the end of file. The end of file in 3153 * a document fragment is OK if the markup depth is zero. 3154 * However, when scanning a full XML document, an end of file 3155 * is always premature. 3156 */ 3157 protected void endOfFileHook(EOFException e) 3158 throws IOException, XNIException { 3159 3160 // NOTE: An end of file is only only an error if we were 3161 // in the middle of scanning some markup. -Ac 3162 if (fMarkupDepth != 0) { 3163 reportFatalError("PrematureEOF", null); 3164 } 3165 3166 } // endOfFileHook() 3167 3168 } // class FragmentContentDriver 3169 3170 static void pr(String str) { 3171 System.out.println(str) ; 3172 } 3173 3174 protected boolean fUsebuffer ; 3175 3176 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3177 * maintained for attributes. 3178 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3179 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3180 * XMLString. 3181 * 3182 * @return XMLString XMLString used to store an attribute value. 3183 */ 3184 3185 protected XMLString getString(){ 3186 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3187 return (XMLString)attributeValueCache.get(fAttributeCacheUsedCount++); 3188 } else{ 3189 XMLString str = new XMLString(); 3190 fAttributeCacheUsedCount++; 3191 attributeValueCache.add(str); 3192 return str; 3193 } 3194 } 3195 3196 /** 3197 * Implements XMLBufferListener interface. 3198 */ 3199 3200 public void refresh(){ 3201 refresh(0); 3202 } 3203 3204 /** 3205 * receives callbacks from {@link XMLEntityReader } when buffer 3206 * is being changed. 3207 * @param refreshPosition 3208 */ 3209 public void refresh(int refreshPosition){ 3210 //If you are reading attributes and you got a callback 3211 //cache available attributes. 3212 if(fReadingAttributes){ 3213 fAttributes.refresh(); 3214 } 3215 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3216 //since fTempString directly matches to the underlying main buffer 3217 //store the data into buffer 3218 fContentBuffer.append(fTempString); 3219 //clear the XMLString so that data can't be added again. 3220 fTempString.length = 0; 3221 fUsebuffer = true; 3222 } 3223 } 3224 3225 } // class XMLDocumentFragmentScannerImpl