1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.xml.internal.stream.XMLBufferListener; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import com.sun.xml.internal.stream.XMLInputFactoryImpl; 27 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 28 29 import java.io.EOFException; 30 import java.io.IOException; 31 import javax.xml.stream.XMLInputFactory; 32 import javax.xml.stream.events.XMLEvent; 33 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 34 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 35 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 36 import com.sun.org.apache.xerces.internal.util.XMLChar; 37 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 38 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 39 import com.sun.org.apache.xerces.internal.xni.QName; 40 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 41 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 42 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 43 import com.sun.org.apache.xerces.internal.xni.XMLString; 44 import com.sun.org.apache.xerces.internal.xni.XNIException; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 46 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 47 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 50 import com.sun.org.apache.xerces.internal.xni.Augmentations; 51 import com.sun.org.apache.xerces.internal.impl.Constants; 52 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 53 import com.sun.org.apache.xerces.internal.util.SecurityManager; 54 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 55 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 56 import javax.xml.stream.XMLStreamConstants; 57 import javax.xml.stream.events.XMLEvent; 58 59 /** 60 * 61 * This class is responsible for scanning the structure and content 62 * of document fragments. 63 * 64 * This class has been modified as per the new design which is more suited to 65 * efficiently build pull parser. Lot of improvements have been done and 66 * the code has been added to support stax functionality/features. 67 * 68 * @author Neeraj Bajaj SUN Microsystems 69 * @author K.Venugopal SUN Microsystems 70 * @author Glenn Marcy, IBM 71 * @author Andy Clark, IBM 72 * @author Arnaud Le Hors, IBM 73 * @author Eric Ye, IBM 74 * @author Sunitha Reddy, SUN Microsystems 75 * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.19 2010-11-02 19:54:55 joehw Exp $ 76 * 77 */ 78 public class XMLDocumentFragmentScannerImpl 79 extends XMLScanner 80 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 81 82 // 83 // Constants 84 // 85 86 protected int fElementAttributeLimit; 87 88 /** External subset resolver. **/ 89 protected ExternalSubsetResolver fExternalSubsetResolver; 90 91 // scanner states 92 93 //XXX this should be divided into more states. 94 /** Scanner state: start of markup. */ 95 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 96 97 /** Scanner state: content. */ 98 protected static final int SCANNER_STATE_CONTENT = 22; 99 100 /** Scanner state: processing instruction. */ 101 protected static final int SCANNER_STATE_PI = 23; 102 103 /** Scanner state: DOCTYPE. */ 104 protected static final int SCANNER_STATE_DOCTYPE = 24; 105 106 /** Scanner state: XML Declaration */ 107 protected static final int SCANNER_STATE_XML_DECL = 25; 108 109 /** Scanner state: root element. */ 110 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 111 112 /** Scanner state: comment. */ 113 protected static final int SCANNER_STATE_COMMENT = 27; 114 115 /** Scanner state: reference. */ 116 protected static final int SCANNER_STATE_REFERENCE = 28; 117 118 // <book type="hard"> reading attribute name 'type' 119 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 120 121 // <book type="hard"> //reading attribute value. 122 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 123 124 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 125 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 126 127 /** Scanner state: end of input. */ 128 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 129 130 /** Scanner state: terminated. */ 131 protected static final int SCANNER_STATE_TERMINATED = 34; 132 133 /** Scanner state: CDATA section. */ 134 protected static final int SCANNER_STATE_CDATA = 35; 135 136 /** Scanner state: Text declaration. */ 137 protected static final int SCANNER_STATE_TEXT_DECL = 36; 138 139 /** Scanner state: Text declaration. */ 140 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 141 142 //<book type="hard">foo</book> 143 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 144 145 //<book type="hard">foo</book> reading </book> 146 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 147 148 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 149 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 150 151 // feature identifiers 152 153 154 /** Feature identifier: notify built-in refereces. */ 155 protected static final String NOTIFY_BUILTIN_REFS = 156 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 157 158 /** Property identifier: entity resolver. */ 159 protected static final String ENTITY_RESOLVER = 160 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 161 162 // recognized features and properties 163 164 /** Recognized features. */ 165 private static final String[] RECOGNIZED_FEATURES = { 166 NAMESPACES, 167 VALIDATION, 168 NOTIFY_BUILTIN_REFS, 169 NOTIFY_CHAR_REFS, 170 Constants.STAX_REPORT_CDATA_EVENT 171 }; 172 173 /** Feature defaults. */ 174 private static final Boolean[] FEATURE_DEFAULTS = { 175 Boolean.TRUE, 176 null, 177 Boolean.FALSE, 178 Boolean.FALSE, 179 Boolean.TRUE 180 }; 181 182 /** Recognized properties. */ 183 private static final String[] RECOGNIZED_PROPERTIES = { 184 SYMBOL_TABLE, 185 ERROR_REPORTER, 186 ENTITY_MANAGER, 187 }; 188 189 /** Property defaults. */ 190 private static final Object[] PROPERTY_DEFAULTS = { 191 null, 192 null, 193 null, 194 }; 195 196 private static final char [] cdata = {'[','C','D','A','T','A','['}; 197 private static final char [] endTag = {'<','/'}; 198 199 //this variable is also used by XMLDocumentScannerImpl in the same package 200 static final char [] xmlDecl = {'<','?','x','m','l'}; 201 202 // debugging 203 204 /** Debug scanner state. */ 205 private static final boolean DEBUG_SCANNER_STATE = false; 206 207 /** Debug driver. */ 208 private static final boolean DEBUG_DISPATCHER = false; 209 210 /** Debug content driver scanning. */ 211 protected static final boolean DEBUG_START_END_ELEMENT = false; 212 213 214 /** Debug driver next */ 215 protected static final boolean DEBUG_NEXT = false ; 216 217 /** Debug driver next */ 218 protected static final boolean DEBUG = false; 219 protected static final boolean DEBUG_COALESCE = false; 220 // 221 // Data 222 // 223 224 // protected data 225 226 /** Document handler. */ 227 protected XMLDocumentHandler fDocumentHandler; 228 protected int fScannerLastState ; 229 230 /** Entity Storage */ 231 protected XMLEntityStorage fEntityStore; 232 233 /** Entity stack. */ 234 protected int[] fEntityStack = new int[4]; 235 236 /** Markup depth. */ 237 protected int fMarkupDepth; 238 239 //is the element empty 240 protected boolean fEmptyElement ; 241 242 //track if we are reading attributes, this is usefule while 243 //there is a callback 244 protected boolean fReadingAttributes = false; 245 246 /** Scanner state. */ 247 protected int fScannerState; 248 249 /** SubScanner state: inside scanContent method. */ 250 protected boolean fInScanContent = false; 251 protected boolean fLastSectionWasCData = false; 252 protected boolean fLastSectionWasEntityReference = false; 253 protected boolean fLastSectionWasCharacterData = false; 254 255 /** has external dtd */ 256 protected boolean fHasExternalDTD; 257 258 /** Standalone. */ 259 protected boolean fStandaloneSet; 260 protected boolean fStandalone; 261 protected String fVersion; 262 263 // element information 264 265 /** Current element. */ 266 protected QName fCurrentElement; 267 268 /** Element stack. */ 269 protected ElementStack fElementStack = new ElementStack(); 270 protected ElementStack2 fElementStack2 = new ElementStack2(); 271 272 // other info 273 274 /** Document system identifier. 275 * REVISIT: So what's this used for? - NG 276 * protected String fDocumentSystemId; 277 ******/ 278 279 protected String fPITarget ; 280 281 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 282 protected XMLString fPIData = new XMLString(); 283 284 // features 285 286 287 /** Notify built-in references. */ 288 protected boolean fNotifyBuiltInRefs = false; 289 290 //STAX related properties 291 //defaultValues. 292 protected boolean fSupportDTD = true; 293 protected boolean fReplaceEntityReferences = true; 294 protected boolean fSupportExternalEntities = false; 295 protected boolean fReportCdataEvent = false ; 296 protected boolean fIsCoalesce = false ; 297 protected String fDeclaredEncoding = null; 298 /** Xerces Feature: Disallow doctype declaration. */ 299 protected boolean fDisallowDoctype = false; 300 301 // drivers 302 303 /** Active driver. */ 304 protected Driver fDriver; 305 306 /** Content driver. */ 307 protected Driver fContentDriver = createContentDriver(); 308 309 // temporary variables 310 311 /** Element QName. */ 312 protected QName fElementQName = new QName(); 313 314 /** Attribute QName. */ 315 protected QName fAttributeQName = new QName(); 316 317 /** 318 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 319 * implements Iterator interface so we can directly give Attributes in the form of 320 * iterator. 321 */ 322 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 323 324 325 /** String. */ 326 protected XMLString fTempString = new XMLString(); 327 328 /** String. */ 329 protected XMLString fTempString2 = new XMLString(); 330 331 /** Array of 3 strings. */ 332 private String[] fStrings = new String[3]; 333 334 /** Making the buffer accesible to derived class -- String buffer. */ 335 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 336 337 /** Making the buffer accesible to derived class -- String buffer. */ 338 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 339 340 /** stores character data. */ 341 /** Making the buffer accesible to derived class -- stores PI data */ 342 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 343 344 /** Single character array. */ 345 private final char[] fSingleChar = new char[1]; 346 private String fCurrentEntityName = null; 347 348 // New members 349 protected boolean fScanToEnd = false; 350 351 protected DTDGrammarUtil dtdGrammarUtil= null; 352 353 protected boolean fAddDefaultAttr = false; 354 355 protected boolean foundBuiltInRefs = false; 356 357 protected SecurityManager fSecurityManager = null; 358 359 //skip element algorithm 360 static final short MAX_DEPTH_LIMIT = 5 ; 361 static final short ELEMENT_ARRAY_LENGTH = 200 ; 362 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 363 static final boolean DEBUG_SKIP_ALGORITHM = false; 364 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 365 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 366 //pointer location where last element was skipped 367 short fLastPointerLocation = 0 ; 368 short fElementPointer = 0 ; 369 //2D array to store pointer info 370 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 371 protected String fElementRawname ; 372 protected boolean fShouldSkip = false; 373 protected boolean fAdd = false ; 374 protected boolean fSkip = false; 375 376 /** Reusable Augmentations. */ 377 private Augmentations fTempAugmentations = null; 378 // 379 // Constructors 380 // 381 382 /** Default constructor. */ 383 public XMLDocumentFragmentScannerImpl() { 384 } // <init>() 385 386 // 387 // XMLDocumentScanner methods 388 // 389 390 /** 391 * Sets the input source. 392 * 393 * @param inputSource The input source. 394 * 395 * @throws IOException Thrown on i/o error. 396 */ 397 public void setInputSource(XMLInputSource inputSource) throws IOException { 398 fEntityManager.setEntityHandler(this); 399 fEntityManager.startEntity("$fragment$", inputSource, false, true); 400 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 401 } // setInputSource(XMLInputSource) 402 403 /** 404 * Scans a document. 405 * 406 * @param complete True if the scanner should scan the document 407 * completely, pushing all events to the registered 408 * document handler. A value of false indicates that 409 * that the scanner should only scan the next portion 410 * of the document and return. A scanner instance is 411 * permitted to completely scan a document if it does 412 * not support this "pull" scanning model. 413 * 414 * @return True if there is more to scan, false otherwise. 415 */ 416 /* public boolean scanDocument(boolean complete) 417 throws IOException, XNIException { 418 419 // keep dispatching "events" 420 fEntityManager.setEntityHandler(this); 421 422 return true; 423 424 } // scanDocument(boolean):boolean 425 */ 426 427 public boolean scanDocument(boolean complete) 428 throws IOException, XNIException { 429 430 // keep dispatching "events" 431 fEntityManager.setEntityHandler(this); 432 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 433 434 int event = next(); 435 do { 436 switch (event) { 437 case XMLStreamConstants.START_DOCUMENT : 438 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 439 break; 440 case XMLStreamConstants.START_ELEMENT : 441 //System.out.println(" in scann element"); 442 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 443 break; 444 case XMLStreamConstants.CHARACTERS : 445 fDocumentHandler.characters(getCharacterData(),null); 446 break; 447 case XMLStreamConstants.SPACE: 448 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 449 //System.out.println("in the space"); 450 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 451 break; 452 case XMLStreamConstants.ENTITY_REFERENCE : 453 //entity reference callback are given in startEntity 454 break; 455 case XMLStreamConstants.PROCESSING_INSTRUCTION : 456 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 457 break; 458 case XMLStreamConstants.COMMENT : 459 //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 460 fDocumentHandler.comment(getCharacterData(),null); 461 break; 462 case XMLStreamConstants.DTD : 463 //all DTD related callbacks are handled in DTDScanner. 464 //1. Stax doesn't define DTD states as it does for XML Document. 465 //therefore we don't need to take care of anything here. So Just break; 466 break; 467 case XMLStreamConstants.CDATA: 468 fDocumentHandler.startCDATA(null); 469 //xxx: check if CDATA values comes from getCharacterData() function 470 fDocumentHandler.characters(getCharacterData(),null); 471 fDocumentHandler.endCDATA(null); 472 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 473 break; 474 case XMLStreamConstants.NOTATION_DECLARATION : 475 break; 476 case XMLStreamConstants.ENTITY_DECLARATION : 477 break; 478 case XMLStreamConstants.NAMESPACE : 479 break; 480 case XMLStreamConstants.ATTRIBUTE : 481 break; 482 case XMLStreamConstants.END_ELEMENT : 483 //do not give callback here. 484 //this callback is given in scanEndElement function. 485 //fDocumentHandler.endElement(getElementQName(),null); 486 break; 487 default : 488 throw new InternalError("processing event: " + event); 489 490 } 491 //System.out.println("here in before calling next"); 492 event = next(); 493 //System.out.println("here in after calling next"); 494 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 495 496 if(event == XMLStreamConstants.END_DOCUMENT) { 497 fDocumentHandler.endDocument(null); 498 return false; 499 } 500 501 return true; 502 503 } // scanDocument(boolean):boolean 504 505 506 507 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 508 if(fScannerLastState == XMLEvent.END_ELEMENT){ 509 fElementQName.setValues(fElementStack.getLastPoppedElement()); 510 } 511 return fElementQName ; 512 } 513 514 /** return the next state on the input 515 * @return int 516 */ 517 518 public int next() throws IOException, XNIException { 519 return fDriver.next(); 520 } 521 522 // 523 // XMLComponent methods 524 // 525 526 /** 527 * Resets the component. The component can query the component manager 528 * about any features and properties that affect the operation of the 529 * component. 530 * 531 * @param componentManager The component manager. 532 * 533 * @throws SAXException Thrown by component on initialization error. 534 * For example, if a feature or property is 535 * required for the operation of the component, the 536 * component manager may throw a 537 * SAXNotRecognizedException or a 538 * SAXNotSupportedException. 539 */ 540 541 public void reset(XMLComponentManager componentManager) 542 throws XMLConfigurationException { 543 544 super.reset(componentManager); 545 546 // other settings 547 // fDocumentSystemId = null; 548 549 // sax features 550 //fAttributes.setNamespaces(fNamespaces); 551 552 // xerces features 553 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 554 555 fSecurityManager = (SecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 556 fElementAttributeLimit = (fSecurityManager != null)?fSecurityManager.getElementAttrLimit():0; 557 558 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 559 560 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 561 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 562 (ExternalSubsetResolver) resolver : null; 563 564 // initialize vars 565 fMarkupDepth = 0; 566 fCurrentElement = null; 567 fElementStack.clear(); 568 fHasExternalDTD = false; 569 fStandaloneSet = false; 570 fStandalone = false; 571 fInScanContent = false; 572 //skipping algorithm 573 fShouldSkip = false; 574 fAdd = false; 575 fSkip = false; 576 577 //attribute 578 fReadingAttributes = false; 579 //xxx: external entities are supported in Xerces 580 // it would be good to define feature for this case 581 fSupportExternalEntities = true; 582 fReplaceEntityReferences = true; 583 fIsCoalesce = false; 584 585 // setup Driver 586 setScannerState(SCANNER_STATE_CONTENT); 587 setDriver(fContentDriver); 588 fEntityStore = fEntityManager.getEntityStore(); 589 590 dtdGrammarUtil = null; 591 592 593 //fEntityManager.test(); 594 } // reset(XMLComponentManager) 595 596 597 public void reset(PropertyManager propertyManager){ 598 599 super.reset(propertyManager); 600 601 // other settings 602 // fDocumentSystemId = null; 603 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 604 fNotifyBuiltInRefs = false ; 605 606 // initialize vars 607 fMarkupDepth = 0; 608 fCurrentElement = null; 609 fShouldSkip = false; 610 fAdd = false; 611 fSkip = false; 612 fElementStack.clear(); 613 //fElementStack2.clear(); 614 fHasExternalDTD = false; 615 fStandaloneSet = false; 616 fStandalone = false; 617 //fReplaceEntityReferences = true; 618 //fSupportExternalEntities = true; 619 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_REPLACING_ENTITY_REFERENCES); 620 fReplaceEntityReferences = bo.booleanValue(); 621 bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_SUPPORTING_EXTERNAL_ENTITIES); 622 fSupportExternalEntities = bo.booleanValue(); 623 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 624 if(cdata != null) 625 fReportCdataEvent = cdata.booleanValue() ; 626 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 627 if(coalesce != null) 628 fIsCoalesce = coalesce.booleanValue(); 629 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 630 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 631 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 632 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 633 // setup Driver 634 //we dont need to do this -- nb. 635 //setScannerState(SCANNER_STATE_CONTENT); 636 //setDriver(fContentDriver); 637 fEntityStore = fEntityManager.getEntityStore(); 638 //fEntityManager.test(); 639 640 dtdGrammarUtil = null; 641 642 } // reset(XMLComponentManager) 643 644 /** 645 * Returns a list of feature identifiers that are recognized by 646 * this component. This method may return null if no features 647 * are recognized by this component. 648 */ 649 public String[] getRecognizedFeatures() { 650 return (String[])(RECOGNIZED_FEATURES.clone()); 651 } // getRecognizedFeatures():String[] 652 653 /** 654 * Sets the state of a feature. This method is called by the component 655 * manager any time after reset when a feature changes state. 656 * <p> 657 * <strong>Note:</strong> Components should silently ignore features 658 * that do not affect the operation of the component. 659 * 660 * @param featureId The feature identifier. 661 * @param state The state of the feature. 662 * 663 * @throws SAXNotRecognizedException The component should not throw 664 * this exception. 665 * @throws SAXNotSupportedException The component should not throw 666 * this exception. 667 */ 668 public void setFeature(String featureId, boolean state) 669 throws XMLConfigurationException { 670 671 super.setFeature(featureId, state); 672 673 // Xerces properties 674 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 675 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 676 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 677 fNotifyBuiltInRefs = state; 678 } 679 } 680 681 } // setFeature(String,boolean) 682 683 /** 684 * Returns a list of property identifiers that are recognized by 685 * this component. This method may return null if no properties 686 * are recognized by this component. 687 */ 688 public String[] getRecognizedProperties() { 689 return (String[])(RECOGNIZED_PROPERTIES.clone()); 690 } // getRecognizedProperties():String[] 691 692 /** 693 * Sets the value of a property. This method is called by the component 694 * manager any time after reset when a property changes value. 695 * <p> 696 * <strong>Note:</strong> Components should silently ignore properties 697 * that do not affect the operation of the component. 698 * 699 * @param propertyId The property identifier. 700 * @param value The value of the property. 701 * 702 * @throws SAXNotRecognizedException The component should not throw 703 * this exception. 704 * @throws SAXNotSupportedException The component should not throw 705 * this exception. 706 */ 707 public void setProperty(String propertyId, Object value) 708 throws XMLConfigurationException { 709 710 super.setProperty(propertyId, value); 711 712 // Xerces properties 713 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 714 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 715 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 716 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 717 fEntityManager = (XMLEntityManager)value; 718 return; 719 } 720 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 721 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 722 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 723 (ExternalSubsetResolver) value : null; 724 return; 725 } 726 } 727 728 729 // Xerces properties 730 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 731 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 732 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 733 fEntityManager = (XMLEntityManager)value; 734 } 735 return; 736 } 737 738 } // setProperty(String,Object) 739 740 /** 741 * Returns the default state for a feature, or null if this 742 * component does not want to report a default value for this 743 * feature. 744 * 745 * @param featureId The feature identifier. 746 * 747 * @since Xerces 2.2.0 748 */ 749 public Boolean getFeatureDefault(String featureId) { 750 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 751 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 752 return FEATURE_DEFAULTS[i]; 753 } 754 } 755 return null; 756 } // getFeatureDefault(String):Boolean 757 758 /** 759 * Returns the default state for a property, or null if this 760 * component does not want to report a default value for this 761 * property. 762 * 763 * @param propertyId The property identifier. 764 * 765 * @since Xerces 2.2.0 766 */ 767 public Object getPropertyDefault(String propertyId) { 768 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 769 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 770 return PROPERTY_DEFAULTS[i]; 771 } 772 } 773 return null; 774 } // getPropertyDefault(String):Object 775 776 // 777 // XMLDocumentSource methods 778 // 779 780 /** 781 * setDocumentHandler 782 * 783 * @param documentHandler 784 */ 785 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 786 fDocumentHandler = documentHandler; 787 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 788 } // setDocumentHandler(XMLDocumentHandler) 789 790 791 /** Returns the document handler */ 792 public XMLDocumentHandler getDocumentHandler(){ 793 return fDocumentHandler; 794 } 795 796 // 797 // XMLEntityHandler methods 798 // 799 800 /** 801 * This method notifies of the start of an entity. The DTD has the 802 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 803 * general entities are just specified by their name. 804 * 805 * @param name The name of the entity. 806 * @param identifier The resource identifier. 807 * @param encoding The auto-detected IANA encoding name of the entity 808 * stream. This value will be null in those situations 809 * where the entity encoding is not auto-detected (e.g. 810 * internal entities or a document entity that is 811 * parsed from a java.io.Reader). 812 * @param augs Additional information that may include infoset augmentations 813 * 814 * @throws XNIException Thrown by handler to signal an error. 815 */ 816 public void startEntity(String name, 817 XMLResourceIdentifier identifier, 818 String encoding, Augmentations augs) throws XNIException { 819 820 // keep track of this entity before fEntityDepth is increased 821 if (fEntityDepth == fEntityStack.length) { 822 int[] entityarray = new int[fEntityStack.length * 2]; 823 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 824 fEntityStack = entityarray; 825 } 826 fEntityStack[fEntityDepth] = fMarkupDepth; 827 828 super.startEntity(name, identifier, encoding, augs); 829 830 // WFC: entity declared in external subset in standalone doc 831 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 832 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 833 new Object[]{name}); 834 } 835 836 /** we are not calling the handlers yet.. */ 837 // call handler 838 if (fDocumentHandler != null && !fScanningAttribute) { 839 if (!name.equals("[xml]")) { 840 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 841 } 842 } 843 844 } // startEntity(String,XMLResourceIdentifier,String) 845 846 /** 847 * This method notifies the end of an entity. The DTD has the pseudo-name 848 * of "[dtd]" parameter entity names start with '%'; and general entities 849 * are just specified by their name. 850 * 851 * @param name The name of the entity. 852 * @param augs Additional information that may include infoset augmentations 853 * 854 * @throws XNIException Thrown by handler to signal an error. 855 */ 856 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 857 858 /** 859 * // flush possible pending output buffer - see scanContent 860 * if (fInScanContent && fStringBuffer.length != 0 861 * && fDocumentHandler != null) { 862 * fDocumentHandler.characters(fStringBuffer, null); 863 * fStringBuffer.length = 0; // make sure we know it's been flushed 864 * } 865 */ 866 super.endEntity(name, augs); 867 868 // make sure markup is properly balanced 869 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 870 reportFatalError("MarkupEntityMismatch", null); 871 } 872 873 /**/ 874 // call handler 875 if (fDocumentHandler != null && !fScanningAttribute) { 876 if (!name.equals("[xml]")) { 877 fDocumentHandler.endGeneralEntity(name, augs); 878 } 879 } 880 881 882 } // endEntity(String) 883 884 // 885 // Protected methods 886 // 887 888 // Driver factory methods 889 890 /** Creates a content Driver. */ 891 protected Driver createContentDriver() { 892 return new FragmentContentDriver(); 893 } // createContentDriver():Driver 894 895 // scanning methods 896 897 /** 898 * Scans an XML or text declaration. 899 * <p> 900 * <pre> 901 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 902 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 903 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 904 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 905 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 906 * | ('"' ('yes' | 'no') '"')) 907 * 908 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 909 * </pre> 910 * 911 * @param scanningTextDecl True if a text declaration is to 912 * be scanned instead of an XML 913 * declaration. 914 */ 915 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 916 throws IOException, XNIException { 917 918 // scan decl 919 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 920 fMarkupDepth--; 921 922 // pseudo-attribute values 923 String version = fStrings[0]; 924 String encoding = fStrings[1]; 925 String standalone = fStrings[2]; 926 fDeclaredEncoding = encoding; 927 // set standalone 928 fStandaloneSet = standalone != null; 929 fStandalone = fStandaloneSet && standalone.equals("yes"); 930 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 931 //but this information is only related with Document Entity. 932 fEntityManager.setStandalone(fStandalone); 933 934 935 // call handler 936 if (fDocumentHandler != null) { 937 if (scanningTextDecl) { 938 fDocumentHandler.textDecl(version, encoding, null); 939 } else { 940 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 941 } 942 } 943 944 if(version != null){ 945 fEntityScanner.setVersion(version); 946 fEntityScanner.setXMLVersion(version); 947 } 948 // set encoding on reader, only if encoding was not specified by the application explicitly 949 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 950 fEntityScanner.setEncoding(encoding); 951 } 952 953 } // scanXMLDeclOrTextDecl(boolean) 954 955 public String getPITarget(){ 956 return fPITarget ; 957 } 958 959 public XMLStringBuffer getPIData(){ 960 return fContentBuffer ; 961 } 962 963 //XXX: why not this function behave as per the state of the parser? 964 public XMLString getCharacterData(){ 965 if(fUsebuffer){ 966 return fContentBuffer ; 967 }else{ 968 return fTempString; 969 } 970 971 } 972 973 974 /** 975 * Scans a processing data. This is needed to handle the situation 976 * where a document starts with a processing instruction whose 977 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 978 * 979 * @param target The PI target 980 * @param data The XMLStringBuffer to fill in with the data 981 */ 982 protected void scanPIData(String target, XMLStringBuffer data) 983 throws IOException, XNIException { 984 985 super.scanPIData(target, data); 986 987 //set the PI target and values 988 fPITarget = target ; 989 990 fMarkupDepth--; 991 992 } // scanPIData(String) 993 994 /** 995 * Scans a comment. 996 * <p> 997 * <pre> 998 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 999 * </pre> 1000 * <p> 1001 * <strong>Note:</strong> Called after scanning past '<!--' 1002 */ 1003 protected void scanComment() throws IOException, XNIException { 1004 fContentBuffer.clear(); 1005 scanComment(fContentBuffer); 1006 //getTextCharacters can also be called for reading comments 1007 fUsebuffer = true; 1008 fMarkupDepth--; 1009 1010 } // scanComment() 1011 1012 //xxx value returned by this function may not remain valid if another event is scanned. 1013 public String getComment(){ 1014 return fContentBuffer.toString(); 1015 } 1016 1017 void addElement(String rawname){ 1018 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1019 //storing element raw name in a linear list of array 1020 fElementArray[fElementPointer] = rawname ; 1021 //storing elemnetPointer for particular element depth 1022 1023 if(DEBUG_SKIP_ALGORITHM){ 1024 StringBuffer sb = new StringBuffer() ; 1025 sb.append(" Storing element information ") ; 1026 sb.append(" fElementPointer = " + fElementPointer) ; 1027 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1028 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1029 System.out.println(sb.toString()) ; 1030 } 1031 1032 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1033 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1034 short column = storePointerForADepth(fElementPointer); 1035 if(column > 0){ 1036 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1037 //identity comparison shouldn't take much time and we can rely on this 1038 //since its guaranteed to have same object id for same string. 1039 if(rawname == fElementArray[pointer]){ 1040 fShouldSkip = true ; 1041 fLastPointerLocation = pointer ; 1042 //reset the things and return. 1043 resetPointer((short)fElementStack.fDepth , column) ; 1044 fElementArray[fElementPointer] = null ; 1045 return ; 1046 }else{ 1047 fShouldSkip = false ; 1048 } 1049 } 1050 } 1051 fElementPointer++ ; 1052 } 1053 } 1054 1055 1056 void resetPointer(short depth, short column){ 1057 fPointerInfo[depth] [column] = (short)0; 1058 } 1059 1060 //returns column information at which pointer was stored. 1061 short storePointerForADepth(short elementPointer){ 1062 short depth = (short) fElementStack.fDepth ; 1063 1064 //Stores element pointer locations at particular depth , only 4 pointer locations 1065 //are stored at particular depth for now. 1066 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1067 1068 if(canStore(depth, i)){ 1069 fPointerInfo[depth][i] = elementPointer ; 1070 if(DEBUG_SKIP_ALGORITHM){ 1071 StringBuffer sb = new StringBuffer() ; 1072 sb.append(" Pointer information ") ; 1073 sb.append(" fElementPointer = " + fElementPointer) ; 1074 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1075 sb.append(" column = " + i ) ; 1076 System.out.println(sb.toString()) ; 1077 } 1078 return i; 1079 } 1080 //else 1081 //pointer was not stored because we reached the limit 1082 } 1083 return -1 ; 1084 } 1085 1086 boolean canStore(short depth, short column){ 1087 //colum = 0 , means first element at particular depth 1088 //column = 1, means second element at particular depth 1089 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1090 return fPointerInfo[depth][column] == 0 ? true : false ; 1091 } 1092 1093 1094 short getElementPointer(short depth, short column){ 1095 //colum = 0 , means first element at particular depth 1096 //column = 1, means second element at particular depth 1097 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1098 return fPointerInfo[depth][column] ; 1099 } 1100 1101 //this function assumes that string passed is not null and skips 1102 //the following string from the buffer this makes sure 1103 boolean skipFromTheBuffer(String rawname) throws IOException{ 1104 if(fEntityScanner.skipString(rawname)){ 1105 char c = (char)fEntityScanner.peekChar() ; 1106 //If the start element was completely skipped we should encounter either ' '(space), 1107 //or '/' (in case of empty element) or '>' 1108 if( c == ' ' || c == '/' || c == '>'){ 1109 fElementRawname = rawname ; 1110 return true ; 1111 } else{ 1112 return false; 1113 } 1114 } else 1115 return false ; 1116 } 1117 1118 boolean skipQElement(String rawname) throws IOException{ 1119 1120 final int c = fEntityScanner.getChar(rawname.length()); 1121 //if this character is still valid element name -- this means string can't match 1122 if(XMLChar.isName(c)){ 1123 return false; 1124 }else{ 1125 return fEntityScanner.skipString(rawname); 1126 } 1127 } 1128 1129 protected boolean skipElement() throws IOException { 1130 1131 if(!fShouldSkip) return false ; 1132 1133 if(fLastPointerLocation != 0){ 1134 //Look at the next element stored in the array list.. we might just get a match. 1135 String rawname = fElementArray[fLastPointerLocation + 1] ; 1136 if(rawname != null && skipFromTheBuffer(rawname)){ 1137 fLastPointerLocation++ ; 1138 if(DEBUG_SKIP_ALGORITHM){ 1139 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1140 } 1141 return true ; 1142 } else{ 1143 //reset it back to zero... we haven't got the correct subset yet. 1144 fLastPointerLocation = 0 ; 1145 1146 } 1147 } 1148 //xxx: we can put some logic here as from what column it should start looking 1149 //for now we always start at 0 1150 //fallback to tolerant algorithm, it would look for differnt element stored at different 1151 //depth and get us the pointer location. 1152 return fShouldSkip && skipElement((short)0); 1153 1154 } 1155 1156 //start of the column at which it should try searching 1157 boolean skipElement(short column) throws IOException { 1158 short depth = (short)fElementStack.fDepth ; 1159 1160 if(depth > MAX_DEPTH_LIMIT){ 1161 return fShouldSkip = false ; 1162 } 1163 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1164 short pointer = getElementPointer(depth , i ) ; 1165 1166 if(pointer == 0){ 1167 return fShouldSkip = false ; 1168 } 1169 1170 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1171 if(DEBUG_SKIP_ALGORITHM){ 1172 System.out.println(); 1173 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1174 System.out.println(); 1175 } 1176 fLastPointerLocation = pointer ; 1177 return fShouldSkip = true ; 1178 } 1179 } 1180 return fShouldSkip = false ; 1181 } 1182 1183 /** 1184 * Scans a start element. This method will handle the binding of 1185 * namespace information and notifying the handler of the start 1186 * of the element. 1187 * <p> 1188 * <pre> 1189 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1190 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1191 * </pre> 1192 * <p> 1193 * <strong>Note:</strong> This method assumes that the leading 1194 * '<' character has been consumed. 1195 * <p> 1196 * <strong>Note:</strong> This method uses the fElementQName and 1197 * fAttributes variables. The contents of these variables will be 1198 * destroyed. The caller should copy important information out of 1199 * these variables before calling this method. 1200 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1201 * 1202 * @return True if element is empty. (i.e. It matches 1203 * production [44]. 1204 */ 1205 // fElementQName will have the details of element just read.. 1206 // fAttributes will have the details of all the attributes. 1207 protected boolean scanStartElement() 1208 throws IOException, XNIException { 1209 1210 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1211 //when skipping is true and no more elements should be added 1212 if(fSkip && !fAdd){ 1213 //get the stored element -- if everything goes right this should match the 1214 //token in the buffer 1215 1216 QName name = fElementStack.getNext(); 1217 1218 if(DEBUG_SKIP_ALGORITHM){ 1219 System.out.println("Trying to skip String = " + name.rawname); 1220 } 1221 1222 //Be conservative -- if skipping fails -- stop. 1223 fSkip = fEntityScanner.skipString(name.rawname); 1224 1225 if(fSkip){ 1226 if(DEBUG_SKIP_ALGORITHM){ 1227 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1228 } 1229 fElementStack.push(); 1230 fElementQName = name; 1231 }else{ 1232 //if skipping fails reposition the stack or fallback to normal way of processing 1233 fElementStack.reposition(); 1234 if(DEBUG_SKIP_ALGORITHM){ 1235 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1236 } 1237 } 1238 } 1239 1240 //we are still at the stage of adding elements 1241 //the elements were not matched or 1242 //fSkip is not set to true 1243 if(!fSkip || fAdd){ 1244 //get the next element from the stack 1245 fElementQName = fElementStack.nextElement(); 1246 // name 1247 if (fNamespaces) { 1248 fEntityScanner.scanQName(fElementQName); 1249 } else { 1250 String name = fEntityScanner.scanName(); 1251 fElementQName.setValues(null, name, name, null); 1252 } 1253 1254 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1255 if(DEBUG_SKIP_ALGORITHM){ 1256 if(fAdd){ 1257 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1258 } 1259 } 1260 1261 } 1262 1263 //when the elements are being added , we need to check if we are set for skipping the elements 1264 if(fAdd){ 1265 //this sets the value of fAdd variable 1266 fElementStack.matchElement(fElementQName); 1267 } 1268 1269 1270 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1271 fCurrentElement = fElementQName; 1272 1273 String rawname = fElementQName.rawname; 1274 1275 fEmptyElement = false; 1276 1277 fAttributes.removeAllAttributes(); 1278 1279 if(!seekCloseOfStartTag()){ 1280 fReadingAttributes = true; 1281 fAttributeCacheUsedCount =0; 1282 fStringBufferIndex =0; 1283 fAddDefaultAttr = true; 1284 do { 1285 scanAttribute(fAttributes); 1286 if (fSecurityManager != null && fAttributes.getLength() > fElementAttributeLimit){ 1287 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1288 "ElementAttributeLimit", 1289 new Object[]{rawname, new Integer(fAttributes.getLength()) }, 1290 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1291 } 1292 1293 } while (!seekCloseOfStartTag()); 1294 fReadingAttributes=false; 1295 } 1296 1297 if (fEmptyElement) { 1298 //decrease the markup depth.. 1299 fMarkupDepth--; 1300 1301 // check that this element was opened in the same entity 1302 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1303 reportFatalError("ElementEntityMismatch", 1304 new Object[]{fCurrentElement.rawname}); 1305 } 1306 // call handler 1307 if (fDocumentHandler != null) { 1308 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1309 } 1310 1311 //We should not be popping out the context here in endELement becaause the namespace context is still 1312 //valid when parser is at the endElement state. 1313 //if (fNamespaces) { 1314 // fNamespaceContext.popContext(); 1315 //} 1316 1317 //pop the element off the stack.. 1318 fElementStack.popElement(); 1319 1320 } else { 1321 1322 if(dtdGrammarUtil != null) 1323 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1324 if(fDocumentHandler != null){ 1325 //complete element and attributes are traversed in this function so we can send a callback 1326 //here. 1327 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1328 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1329 } 1330 } 1331 1332 1333 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1334 return fEmptyElement; 1335 1336 } // scanStartElement():boolean 1337 1338 /** 1339 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1340 * Characters are consumed. 1341 */ 1342 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1343 // spaces 1344 boolean sawSpace = fEntityScanner.skipSpaces(); 1345 1346 // end tag? 1347 final int c = fEntityScanner.peekChar(); 1348 if (c == '>') { 1349 fEntityScanner.scanChar(); 1350 return true; 1351 } else if (c == '/') { 1352 fEntityScanner.scanChar(); 1353 if (!fEntityScanner.skipChar('>')) { 1354 reportFatalError("ElementUnterminated", 1355 new Object[]{fElementQName.rawname}); 1356 } 1357 fEmptyElement = true; 1358 return true; 1359 } else if (!isValidNameStartChar(c) || !sawSpace) { 1360 reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); 1361 } 1362 1363 return false; 1364 } 1365 1366 public boolean hasAttributes(){ 1367 return fAttributes.getLength() > 0 ? true : false ; 1368 } 1369 1370 1371 /** 1372 * Scans an attribute. 1373 * <p> 1374 * <pre> 1375 * [41] Attribute ::= Name Eq AttValue 1376 * </pre> 1377 * <p> 1378 * <strong>Note:</strong> This method assumes that the next 1379 * character on the stream is the first character of the attribute 1380 * name. 1381 * <p> 1382 * <strong>Note:</strong> This method uses the fAttributeQName and 1383 * fQName variables. The contents of these variables will be 1384 * destroyed. 1385 * 1386 * @param attributes The attributes list for the scanned attribute. 1387 */ 1388 1389 /** 1390 * protected void scanAttribute(AttributeIteratorImpl attributes) 1391 * throws IOException, XNIException { 1392 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1393 * 1394 * 1395 * // name 1396 * if (fNamespaces) { 1397 * fEntityScanner.scanQName(fAttributeQName); 1398 * } 1399 * else { 1400 * String name = fEntityScanner.scanName(); 1401 * fAttributeQName.setValues(null, name, name, null); 1402 * } 1403 * 1404 * // equals 1405 * fEntityScanner.skipSpaces(); 1406 * if (!fEntityScanner.skipChar('=')) { 1407 * reportFatalError("EqRequiredInAttribute", 1408 * new Object[]{fAttributeQName.rawname}); 1409 * } 1410 * fEntityScanner.skipSpaces(); 1411 * 1412 * 1413 * // content 1414 * int oldLen = attributes.getLength(); 1415 */ 1416 /**xxx there is one check of duplicate attribute that has been removed. 1417 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1418 * 1419 * // WFC: Unique Att Spec 1420 * if (oldLen == attributes.getLength()) { 1421 * reportFatalError("AttributeNotUnique", 1422 * new Object[]{fCurrentElement.rawname, 1423 * fAttributeQName.rawname}); 1424 * } 1425 */ 1426 1427 /* 1428 //REVISIT: one more case needs to be included: external PE and standalone is no 1429 boolean isVC = fHasExternalDTD && !fStandalone; 1430 scanAttributeValue(fTempString, fTempString2, 1431 fAttributeQName.rawname, attributes, 1432 oldLen, isVC); 1433 1434 //attributes.setValue(oldLen, fTempString.toString()); 1435 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1436 //attributes.setSpecified(oldLen, true); 1437 1438 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1439 fAttributes.addAttribute(attribute); 1440 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1441 } // scanAttribute(XMLAttributes) 1442 1443 */ 1444 1445 /** return the attribute iterator implementation */ 1446 public XMLAttributesIteratorImpl getAttributeIterator(){ 1447 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1448 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1449 fAddDefaultAttr = false; 1450 } 1451 return fAttributes; 1452 } 1453 1454 /** return if standalone is set */ 1455 public boolean standaloneSet(){ 1456 return fStandaloneSet; 1457 } 1458 /** return if the doucment is standalone */ 1459 public boolean isStandAlone(){ 1460 return fStandalone ; 1461 } 1462 /** 1463 * Scans an attribute name value pair. 1464 * <p> 1465 * <pre> 1466 * [41] Attribute ::= Name Eq AttValue 1467 * </pre> 1468 * <p> 1469 * <strong>Note:</strong> This method assumes that the next 1470 * character on the stream is the first character of the attribute 1471 * name. 1472 * <p> 1473 * <strong>Note:</strong> This method uses the fAttributeQName and 1474 * fQName variables. The contents of these variables will be 1475 * destroyed. 1476 * 1477 * @param attributes The attributes list for the scanned attribute. 1478 */ 1479 1480 protected void scanAttribute(XMLAttributes attributes) 1481 throws IOException, XNIException { 1482 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1483 1484 // name 1485 if (fNamespaces) { 1486 fEntityScanner.scanQName(fAttributeQName); 1487 } else { 1488 String name = fEntityScanner.scanName(); 1489 fAttributeQName.setValues(null, name, name, null); 1490 } 1491 1492 // equals 1493 fEntityScanner.skipSpaces(); 1494 if (!fEntityScanner.skipChar('=')) { 1495 reportFatalError("EqRequiredInAttribute", 1496 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1497 } 1498 fEntityScanner.skipSpaces(); 1499 1500 int attIndex = 0 ; 1501 //REVISIT: one more case needs to be included: external PE and standalone is no 1502 boolean isVC = fHasExternalDTD && !fStandalone; 1503 //fTempString would store attribute value 1504 ///fTempString2 would store attribute non-normalized value 1505 1506 //this function doesn't use 'attIndex'. We are adding the attribute later 1507 //after we have figured out that current attribute is not namespace declaration 1508 //since scanAttributeValue doesn't use attIndex parameter therefore we 1509 //can safely add the attribute later.. 1510 XMLString tmpStr = getString(); 1511 1512 scanAttributeValue(tmpStr, fTempString2, 1513 fAttributeQName.rawname, attributes, 1514 attIndex, isVC); 1515 1516 // content 1517 int oldLen = attributes.getLength(); 1518 //if the attribute name already exists.. new value is replaced with old value 1519 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1520 1521 // WFC: Unique Att Spec 1522 //attributes count will be same if the current attribute name already exists for this element name. 1523 //this means there are two duplicate attributes. 1524 if (oldLen == attributes.getLength()) { 1525 reportFatalError("AttributeNotUnique", 1526 new Object[]{fCurrentElement.rawname, 1527 fAttributeQName.rawname}); 1528 } 1529 1530 //tmpString contains attribute value 1531 //we are passing null as the attribute value 1532 attributes.setValue(attIndex, null, tmpStr); 1533 1534 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1535 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1536 attributes.setSpecified(attIndex, true); 1537 1538 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1539 1540 } // scanAttribute(XMLAttributes) 1541 1542 /** 1543 * Scans element content. 1544 * 1545 * @return Returns the next character on the stream. 1546 */ 1547 //CHANGED: 1548 //EARLIER: scanContent() 1549 //NOW: scanContent(XMLStringBuffer) 1550 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1551 //this function appends the data to the buffer. 1552 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1553 //set the fTempString length to 0 before passing it on to scanContent 1554 //scanContent sets the correct co-ordinates as per the content read 1555 fTempString.length = 0; 1556 int c = fEntityScanner.scanContent(fTempString); 1557 content.append(fTempString); 1558 fTempString.length = 0; 1559 if (c == '\r') { 1560 // happens when there is the character reference 1561 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1562 fEntityScanner.scanChar(); 1563 content.append((char)c); 1564 c = -1; 1565 } else if (c == ']') { 1566 //fStringBuffer.clear(); 1567 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1568 content.append((char)fEntityScanner.scanChar()); 1569 // remember where we are in case we get an endEntity before we 1570 // could flush the buffer out - this happens when we're parsing an 1571 // entity which ends with a ] 1572 fInScanContent = true; 1573 // 1574 // We work on a single character basis to handle cases such as: 1575 // ']]]>' which we might otherwise miss. 1576 // 1577 if (fEntityScanner.skipChar(']')) { 1578 content.append(']'); 1579 while (fEntityScanner.skipChar(']')) { 1580 content.append(']'); 1581 } 1582 if (fEntityScanner.skipChar('>')) { 1583 reportFatalError("CDEndInContent", null); 1584 } 1585 } 1586 fInScanContent = false; 1587 c = -1; 1588 } 1589 if (fDocumentHandler != null && content.length > 0) { 1590 //fDocumentHandler.characters(content, null); 1591 } 1592 return c; 1593 1594 } // scanContent():int 1595 1596 1597 /** 1598 * Scans a CDATA section. 1599 * <p> 1600 * <strong>Note:</strong> This method uses the fTempString and 1601 * fStringBuffer variables. 1602 * 1603 * @param complete True if the CDATA section is to be scanned 1604 * completely. 1605 * 1606 * @return True if CDATA is completely scanned. 1607 */ 1608 //CHANGED: 1609 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1610 throws IOException, XNIException { 1611 1612 // call handler 1613 if (fDocumentHandler != null) { 1614 //fDocumentHandler.startCDATA(null); 1615 } 1616 1617 while (true) { 1618 //scanData will fill the contentBuffer 1619 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1620 break ; 1621 /** We dont need all this code if we pass ']]>' as delimeter.. 1622 * int brackets = 2; 1623 * while (fEntityScanner.skipChar(']')) { 1624 * brackets++; 1625 * } 1626 * 1627 * //When we find more than 2 square brackets 1628 * if (fDocumentHandler != null && brackets > 2) { 1629 * //we dont need to clear the buffer.. 1630 * //contentBuffer.clear(); 1631 * for (int i = 2; i < brackets; i++) { 1632 * contentBuffer.append(']'); 1633 * } 1634 * fDocumentHandler.characters(contentBuffer, null); 1635 * } 1636 * 1637 * if (fEntityScanner.skipChar('>')) { 1638 * break; 1639 * } 1640 * if (fDocumentHandler != null) { 1641 * //we dont need to clear the buffer now.. 1642 * //contentBuffer.clear(); 1643 * contentBuffer.append("]]"); 1644 * fDocumentHandler.characters(contentBuffer, null); 1645 * } 1646 **/ 1647 } else { 1648 int c = fEntityScanner.peekChar(); 1649 if (c != -1 && isInvalidLiteral(c)) { 1650 if (XMLChar.isHighSurrogate(c)) { 1651 //contentBuffer.clear(); 1652 //scan surrogates if any.... 1653 scanSurrogates(contentBuffer); 1654 } else { 1655 reportFatalError("InvalidCharInCDSect", 1656 new Object[]{Integer.toString(c,16)}); 1657 fEntityScanner.scanChar(); 1658 } 1659 } 1660 //by this time we have also read surrogate contents if any... 1661 if (fDocumentHandler != null) { 1662 //fDocumentHandler.characters(contentBuffer, null); 1663 } 1664 } 1665 } 1666 fMarkupDepth--; 1667 1668 if (fDocumentHandler != null && contentBuffer.length > 0) { 1669 //fDocumentHandler.characters(contentBuffer, null); 1670 } 1671 1672 // call handler 1673 if (fDocumentHandler != null) { 1674 //fDocumentHandler.endCDATA(null); 1675 } 1676 1677 return true; 1678 1679 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1680 1681 /** 1682 * Scans an end element. 1683 * <p> 1684 * <pre> 1685 * [42] ETag ::= '</' Name S? '>' 1686 * </pre> 1687 * <p> 1688 * <strong>Note:</strong> This method uses the fElementQName variable. 1689 * The contents of this variable will be destroyed. The caller should 1690 * copy the needed information out of this variable before calling 1691 * this method. 1692 * 1693 * @return The element depth. 1694 */ 1695 protected int scanEndElement() throws IOException, XNIException { 1696 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1697 1698 // pop context 1699 QName endElementName = fElementStack.popElement(); 1700 1701 String rawname = endElementName.rawname; 1702 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1703 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1704 //In scanners most of the time is consumed on checks done for XML characters, we can 1705 // optimize on it and avoid the checks done for endElement, 1706 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1707 1708 // this should work both for namespace processing true or false... 1709 1710 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1711 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1712 1713 if (!fEntityScanner.skipString(endElementName.rawname)) { 1714 reportFatalError("ETagRequired", new Object[]{rawname}); 1715 } 1716 1717 // end 1718 fEntityScanner.skipSpaces(); 1719 if (!fEntityScanner.skipChar('>')) { 1720 reportFatalError("ETagUnterminated", 1721 new Object[]{rawname}); 1722 } 1723 fMarkupDepth--; 1724 1725 //we have increased the depth for two markup "<" characters 1726 fMarkupDepth--; 1727 1728 // check that this element was opened in the same entity 1729 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1730 reportFatalError("ElementEntityMismatch", 1731 new Object[]{rawname}); 1732 } 1733 1734 //We should not be popping out the context here in endELement becaause the namespace context is still 1735 //valid when parser is at the endElement state. 1736 1737 //if (fNamespaces) { 1738 // fNamespaceContext.popContext(); 1739 //} 1740 1741 // call handler 1742 if (fDocumentHandler != null ) { 1743 //end element is scanned in this function so we can send a callback 1744 //here. 1745 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1746 1747 fDocumentHandler.endElement(endElementName, null); 1748 } 1749 if(dtdGrammarUtil != null) 1750 dtdGrammarUtil.endElement(endElementName); 1751 1752 return fMarkupDepth; 1753 1754 } // scanEndElement():int 1755 1756 /** 1757 * Scans a character reference. 1758 * <p> 1759 * <pre> 1760 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1761 * </pre> 1762 */ 1763 protected void scanCharReference() 1764 throws IOException, XNIException { 1765 1766 fStringBuffer2.clear(); 1767 int ch = scanCharReferenceValue(fStringBuffer2, null); 1768 fMarkupDepth--; 1769 if (ch != -1) { 1770 // call handler 1771 1772 if (fDocumentHandler != null) { 1773 if (fNotifyCharRefs) { 1774 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1775 } 1776 Augmentations augs = null; 1777 if (fValidation && ch <= 0x20) { 1778 if (fTempAugmentations != null) { 1779 fTempAugmentations.removeAllItems(); 1780 } 1781 else { 1782 fTempAugmentations = new AugmentationsImpl(); 1783 } 1784 augs = fTempAugmentations; 1785 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1786 } 1787 //xxx: How do we deal with this - how to return charReferenceValues 1788 //now this is being commented because this is taken care in scanDocument() 1789 //fDocumentHandler.characters(fStringBuffer2, null); 1790 if (fNotifyCharRefs) { 1791 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1792 } 1793 } 1794 } 1795 1796 } // scanCharReference() 1797 1798 1799 /** 1800 * Scans an entity reference. 1801 * 1802 * @return returns true if the new entity is started. If it was built-in entity 1803 * 'false' is returned. 1804 * @throws IOException Thrown if i/o error occurs. 1805 * @throws XNIException Thrown if handler throws exception upon 1806 * notification. 1807 */ 1808 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1809 String name = fEntityScanner.scanName(); 1810 if (name == null) { 1811 reportFatalError("NameRequiredInReference", null); 1812 return; 1813 } 1814 if (!fEntityScanner.skipChar(';')) { 1815 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1816 } 1817 if (fEntityStore.isUnparsedEntity(name)) { 1818 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1819 } 1820 fMarkupDepth--; 1821 fCurrentEntityName = name; 1822 1823 // handle built-in entities 1824 if (name == fAmpSymbol) { 1825 handleCharacter('&', fAmpSymbol, content); 1826 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1827 return ; 1828 } else if (name == fLtSymbol) { 1829 handleCharacter('<', fLtSymbol, content); 1830 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1831 return ; 1832 } else if (name == fGtSymbol) { 1833 handleCharacter('>', fGtSymbol, content); 1834 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1835 return ; 1836 } else if (name == fQuotSymbol) { 1837 handleCharacter('"', fQuotSymbol, content); 1838 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1839 return ; 1840 } else if (name == fAposSymbol) { 1841 handleCharacter('\'', fAposSymbol, content); 1842 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1843 return ; 1844 } 1845 1846 //1. if the entity is external and support to external entities is not required 1847 // 2. or entities should not be replaced 1848 //3. or if it is built in entity reference. 1849 if((fEntityStore.isExternalEntity(name) && !fSupportExternalEntities) || (!fEntityStore.isExternalEntity(name) && !fReplaceEntityReferences) || foundBuiltInRefs){ 1850 fScannerState = SCANNER_STATE_REFERENCE; 1851 return ; 1852 } 1853 // start general entity 1854 if (!fEntityStore.isDeclaredEntity(name)) { 1855 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1856 if (!fSupportDTD && fReplaceEntityReferences) { 1857 reportFatalError("EntityNotDeclared", new Object[]{name}); 1858 return; 1859 } 1860 //REVISIT: one more case needs to be included: external PE and standalone is no 1861 if ( fHasExternalDTD && !fStandalone) { 1862 if (fValidation) 1863 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1864 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1865 } else 1866 reportFatalError("EntityNotDeclared", new Object[]{name}); 1867 } 1868 //we are starting the entity even if the entity was not declared 1869 //if that was the case it its taken care in XMLEntityManager.startEntity() 1870 //we immediately call the endEntity. Application gets to know if there was 1871 //any entity that was not declared. 1872 fEntityManager.startEntity(name, false); 1873 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1874 //setScannerState(SCANNER_STATE_CONTENT); 1875 //return true ; 1876 } // scanEntityReference() 1877 1878 // utility methods 1879 1880 /** 1881 * Calls document handler with a single character resulting from 1882 * built-in entity resolution. 1883 * 1884 * @param c 1885 * @param entity built-in name 1886 * @param XMLStringBuffer append the character to buffer 1887 * 1888 * we really dont need to call this function -- this function is only required when 1889 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1890 * calling this function to hanlde built-in entity reference. 1891 * 1892 */ 1893 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1894 foundBuiltInRefs = true; 1895 content.append(c); 1896 if (fDocumentHandler != null) { 1897 fSingleChar[0] = c; 1898 if (fNotifyBuiltInRefs) { 1899 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1900 } 1901 fTempString.setValues(fSingleChar, 0, 1); 1902 //fDocumentHandler.characters(fTempString, null); 1903 1904 if (fNotifyBuiltInRefs) { 1905 fDocumentHandler.endGeneralEntity(entity, null); 1906 } 1907 } 1908 } // handleCharacter(char) 1909 1910 // helper methods 1911 1912 /** 1913 * Sets the scanner state. 1914 * 1915 * @param state The new scanner state. 1916 */ 1917 protected final void setScannerState(int state) { 1918 1919 fScannerState = state; 1920 if (DEBUG_SCANNER_STATE) { 1921 System.out.print("### setScannerState: "); 1922 //System.out.print(fScannerState); 1923 System.out.print(getScannerStateName(state)); 1924 System.out.println(); 1925 } 1926 1927 } // setScannerState(int) 1928 1929 1930 /** 1931 * Sets the Driver. 1932 * 1933 * @param Driver The new Driver. 1934 */ 1935 protected final void setDriver(Driver driver) { 1936 fDriver = driver; 1937 if (DEBUG_DISPATCHER) { 1938 System.out.print("%%% setDriver: "); 1939 System.out.print(getDriverName(driver)); 1940 System.out.println(); 1941 } 1942 } 1943 1944 // 1945 // Private methods 1946 // 1947 1948 /** Returns the scanner state name. */ 1949 protected String getScannerStateName(int state) { 1950 1951 switch (state) { 1952 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1953 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1954 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1955 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1956 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1957 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1958 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1959 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1960 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1961 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1962 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1963 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1964 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 1965 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 1966 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 1967 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 1968 } 1969 1970 return "??? ("+state+')'; 1971 1972 } // getScannerStateName(int):String 1973 public String getEntityName(){ 1974 //return the cached name 1975 return fCurrentEntityName; 1976 } 1977 1978 /** Returns the driver name. */ 1979 public String getDriverName(Driver driver) { 1980 1981 if (DEBUG_DISPATCHER) { 1982 if (driver != null) { 1983 String name = driver.getClass().getName(); 1984 int index = name.lastIndexOf('.'); 1985 if (index != -1) { 1986 name = name.substring(index + 1); 1987 index = name.lastIndexOf('$'); 1988 if (index != -1) { 1989 name = name.substring(index + 1); 1990 } 1991 } 1992 return name; 1993 } 1994 } 1995 return "null"; 1996 1997 } // getDriverName():String 1998 1999 // 2000 // Classes 2001 // 2002 2003 /** 2004 * @author Neeraj Bajaj, Sun Microsystems. 2005 */ 2006 protected static final class Element { 2007 2008 // 2009 // Data 2010 // 2011 2012 /** Symbol. */ 2013 public QName qname; 2014 2015 //raw name stored as characters 2016 public char[] fRawname; 2017 2018 /** The next Element entry. */ 2019 public Element next; 2020 2021 // 2022 // Constructors 2023 // 2024 2025 /** 2026 * Constructs a new Element from the given QName and next Element 2027 * reference. 2028 */ 2029 public Element(QName qname, Element next) { 2030 this.qname.setValues(qname); 2031 this.fRawname = qname.rawname.toCharArray(); 2032 this.next = next; 2033 } 2034 2035 } // class Element 2036 2037 /** 2038 * Element stack. 2039 * 2040 * @author Neeraj Bajaj, Sun Microsystems. 2041 */ 2042 protected class ElementStack2 { 2043 2044 // 2045 // Data 2046 // 2047 2048 /** The stack data. */ 2049 protected QName [] fQName = new QName[20]; 2050 2051 //Element depth 2052 protected int fDepth; 2053 //total number of elements 2054 protected int fCount; 2055 //current position 2056 protected int fPosition; 2057 //Mark refers to the position 2058 protected int fMark; 2059 2060 protected int fLastDepth ; 2061 2062 // 2063 // Constructors 2064 // 2065 2066 /** Default constructor. */ 2067 public ElementStack2() { 2068 for (int i = 0; i < fQName.length; i++) { 2069 fQName[i] = new QName(); 2070 } 2071 fMark = fPosition = 1; 2072 } // <init>() 2073 2074 public void resize(){ 2075 /** 2076 * int length = fElements.length; 2077 * Element [] temp = new Element[length * 2]; 2078 * System.arraycopy(fElements, 0, temp, 0, length); 2079 * fElements = temp; 2080 */ 2081 //resize QNames 2082 int oldLength = fQName.length; 2083 QName [] tmp = new QName[oldLength * 2]; 2084 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2085 fQName = tmp; 2086 2087 for (int i = oldLength; i < fQName.length; i++) { 2088 fQName[i] = new QName(); 2089 } 2090 2091 } 2092 2093 2094 // 2095 // Public methods 2096 // 2097 2098 /** Check if the element scanned during the start element 2099 *matches the stored element. 2100 * 2101 *@return true if the match suceeds. 2102 */ 2103 public boolean matchElement(QName element) { 2104 //last depth is the depth when last elemnt was pushed 2105 //if last depth is greater than current depth 2106 if(DEBUG_SKIP_ALGORITHM){ 2107 System.out.println("fLastDepth = " + fLastDepth); 2108 System.out.println("fDepth = " + fDepth); 2109 } 2110 boolean match = false; 2111 if(fLastDepth > fDepth && fDepth <= 2){ 2112 if(DEBUG_SKIP_ALGORITHM){ 2113 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2114 } 2115 if(element.rawname == fQName[fDepth].rawname){ 2116 fAdd = false; 2117 //mark this position 2118 //decrease the depth by 1 as arrays are 0 based 2119 fMark = fDepth - 1; 2120 //we found the match and from next element skipping will start, add 1 2121 fPosition = fMark + 1 ; 2122 match = true; 2123 //Once we get match decrease the count -- this was increased by nextElement() 2124 --fCount; 2125 if(DEBUG_SKIP_ALGORITHM){ 2126 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2127 System.out.println("fMark = " + fMark); 2128 System.out.println("fPosition = " + fPosition); 2129 System.out.println("fDepth = " + fDepth); 2130 System.out.println("fCount = " + fCount); 2131 } 2132 }else{ 2133 fAdd = true; 2134 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2135 } 2136 } 2137 //store the last depth 2138 fLastDepth = fDepth++; 2139 return match; 2140 } // pushElement(QName):QName 2141 2142 /** 2143 * This function doesn't increase depth. The function in this function is 2144 *broken down into two functions for efficiency. <@see>matchElement</see>. 2145 * This function just returns the pointer to the object and its values are set. 2146 * 2147 *@return QName reference to the next element in the list 2148 */ 2149 public QName nextElement() { 2150 2151 //if number of elements becomes equal to the length of array -- stop the skipping 2152 if (fCount == fQName.length) { 2153 fShouldSkip = false; 2154 fAdd = false; 2155 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2156 //xxx: this is not correct, we are returning the last element 2157 //this wont make any difference since flag has been set to 'false' 2158 return fQName[--fCount]; 2159 } 2160 if(DEBUG_SKIP_ALGORITHM){ 2161 System.out.println("fCount = " + fCount); 2162 } 2163 return fQName[fCount++]; 2164 2165 } 2166 2167 /** Note that this function is considerably different than nextElement() 2168 * This function just returns the previously stored elements 2169 */ 2170 public QName getNext(){ 2171 //when position reaches number of elements in the list.. 2172 //set the position back to mark, making it a circular linked list. 2173 if(fPosition == fCount){ 2174 fPosition = fMark; 2175 } 2176 return fQName[fPosition++]; 2177 } 2178 2179 /** returns the current depth 2180 */ 2181 public int popElement(){ 2182 return fDepth--; 2183 } 2184 2185 2186 /** Clears the stack without throwing away existing QName objects. */ 2187 public void clear() { 2188 fLastDepth = 0; 2189 fDepth = 0; 2190 fCount = 0 ; 2191 fPosition = fMark = 1; 2192 } // clear() 2193 2194 } // class ElementStack 2195 2196 /** 2197 * Element stack. This stack operates without synchronization, error 2198 * checking, and it re-uses objects instead of throwing popped items 2199 * away. 2200 * 2201 * @author Andy Clark, IBM 2202 */ 2203 protected class ElementStack { 2204 2205 // 2206 // Data 2207 // 2208 2209 /** The stack data. */ 2210 protected QName[] fElements; 2211 protected int [] fInt = new int[20]; 2212 2213 2214 //Element depth 2215 protected int fDepth; 2216 //total number of elements 2217 protected int fCount; 2218 //current position 2219 protected int fPosition; 2220 //Mark refers to the position 2221 protected int fMark; 2222 2223 protected int fLastDepth ; 2224 2225 // 2226 // Constructors 2227 // 2228 2229 /** Default constructor. */ 2230 public ElementStack() { 2231 fElements = new QName[20]; 2232 for (int i = 0; i < fElements.length; i++) { 2233 fElements[i] = new QName(); 2234 } 2235 } // <init>() 2236 2237 // 2238 // Public methods 2239 // 2240 2241 /** 2242 * Pushes an element on the stack. 2243 * <p> 2244 * <strong>Note:</strong> The QName values are copied into the 2245 * stack. In other words, the caller does <em>not</em> orphan 2246 * the element to the stack. Also, the QName object returned 2247 * is <em>not</em> orphaned to the caller. It should be 2248 * considered read-only. 2249 * 2250 * @param element The element to push onto the stack. 2251 * 2252 * @return Returns the actual QName object that stores the 2253 */ 2254 //XXX: THIS FUNCTION IS NOT USED 2255 public QName pushElement(QName element) { 2256 if (fDepth == fElements.length) { 2257 QName[] array = new QName[fElements.length * 2]; 2258 System.arraycopy(fElements, 0, array, 0, fDepth); 2259 fElements = array; 2260 for (int i = fDepth; i < fElements.length; i++) { 2261 fElements[i] = new QName(); 2262 } 2263 } 2264 fElements[fDepth].setValues(element); 2265 return fElements[fDepth++]; 2266 } // pushElement(QName):QName 2267 2268 2269 /** Note that this function is considerably different than nextElement() 2270 * This function just returns the previously stored elements 2271 */ 2272 public QName getNext(){ 2273 //when position reaches number of elements in the list.. 2274 //set the position back to mark, making it a circular linked list. 2275 if(fPosition == fCount){ 2276 fPosition = fMark; 2277 } 2278 //store the position of last opened tag at particular depth 2279 //fInt[++fDepth] = fPosition; 2280 if(DEBUG_SKIP_ALGORITHM){ 2281 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2282 } 2283 //return fElements[fPosition++]; 2284 return fElements[fPosition]; 2285 } 2286 2287 /** This function should be called only when element was skipped sucessfully. 2288 * 1. Increase the depth - because element was sucessfully skipped. 2289 *2. Store the position of the element token in array "last opened tag" at depth. 2290 *3. increase the position counter so as to point to the next element in the array 2291 */ 2292 public void push(){ 2293 2294 fInt[++fDepth] = fPosition++; 2295 } 2296 2297 /** Check if the element scanned during the start element 2298 *matches the stored element. 2299 * 2300 *@return true if the match suceeds. 2301 */ 2302 public boolean matchElement(QName element) { 2303 //last depth is the depth when last elemnt was pushed 2304 //if last depth is greater than current depth 2305 //if(DEBUG_SKIP_ALGORITHM){ 2306 // System.out.println("Check if the element " + element.rawname + " matches"); 2307 // System.out.println("fLastDepth = " + fLastDepth); 2308 // System.out.println("fDepth = " + fDepth); 2309 //} 2310 boolean match = false; 2311 if(fLastDepth > fDepth && fDepth <= 3){ 2312 if(DEBUG_SKIP_ALGORITHM){ 2313 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2314 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2315 } 2316 if(element.rawname == fElements[fDepth - 1].rawname){ 2317 fAdd = false; 2318 //mark this position 2319 //decrease the depth by 1 as arrays are 0 based 2320 fMark = fDepth - 1; 2321 //we found the match 2322 fPosition = fMark; 2323 match = true; 2324 //Once we get match decrease the count -- this was increased by nextElement() 2325 --fCount; 2326 if(DEBUG_SKIP_ALGORITHM){ 2327 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2328 System.out.println("fMark = " + fMark); 2329 System.out.println("fPosition = " + fPosition); 2330 System.out.println("fDepth = " + fDepth); 2331 System.out.println("fCount = " + fCount); 2332 System.out.println("---------MATCH SUCEEDED-----------------"); 2333 System.out.println(""); 2334 } 2335 }else{ 2336 fAdd = true; 2337 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2338 } 2339 } 2340 //store the position for the current depth 2341 //when we are adding the elements, when skipping 2342 //starts even then this should be tracked ie. when 2343 //calling getNext() 2344 if(match){ 2345 //from next element skipping will start, add 1 2346 fInt[fDepth] = fPosition++; 2347 } else{ 2348 if(DEBUG_SKIP_ALGORITHM){ 2349 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2350 } 2351 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2352 fInt[fDepth] = fCount - 1; 2353 } 2354 2355 //if number of elements becomes equal to the length of array -- stop the skipping 2356 //xxx: should we do "fCount == fInt.length" 2357 if (fCount == fElements.length) { 2358 fSkip = false; 2359 fAdd = false; 2360 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2361 reposition(); 2362 if(DEBUG_SKIP_ALGORITHM){ 2363 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2364 System.out.println("REPOSITIONING THE STACK"); 2365 System.out.println("-----------SKIPPING STOPPED----------"); 2366 System.out.println(""); 2367 } 2368 return false; 2369 } 2370 if(DEBUG_SKIP_ALGORITHM){ 2371 if(match){ 2372 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2373 }else{ 2374 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2375 } 2376 } 2377 //store the last depth 2378 fLastDepth = fDepth; 2379 return match; 2380 } // matchElement(QName):QName 2381 2382 2383 /** 2384 * Returns the next element on the stack. 2385 * 2386 * @return Returns the actual QName object. Callee should 2387 * use this object to store the details of next element encountered. 2388 */ 2389 public QName nextElement() { 2390 if(fSkip){ 2391 fDepth++; 2392 //boundary checks are done in matchElement() 2393 return fElements[fCount++]; 2394 } else if (fDepth == fElements.length) { 2395 QName[] array = new QName[fElements.length * 2]; 2396 System.arraycopy(fElements, 0, array, 0, fDepth); 2397 fElements = array; 2398 for (int i = fDepth; i < fElements.length; i++) { 2399 fElements[i] = new QName(); 2400 } 2401 } 2402 2403 return fElements[fDepth++]; 2404 2405 } // pushElement(QName):QName 2406 2407 2408 /** 2409 * Pops an element off of the stack by setting the values of 2410 * the specified QName. 2411 * <p> 2412 * <strong>Note:</strong> The object returned is <em>not</em> 2413 * orphaned to the caller. Therefore, the caller should consider 2414 * the object to be read-only. 2415 */ 2416 public QName popElement() { 2417 //return the same object that was pushed -- this would avoid 2418 //setting the values for every end element. 2419 //STRONG: this object is read only -- this object reference shouldn't be stored. 2420 if(fSkip || fAdd ){ 2421 if(DEBUG_SKIP_ALGORITHM){ 2422 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2423 System.out.println(""); 2424 } 2425 return fElements[fInt[fDepth--]]; 2426 } else{ 2427 if(DEBUG_SKIP_ALGORITHM){ 2428 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2429 } 2430 return fElements[--fDepth] ; 2431 } 2432 //element.setValues(fElements[--fDepth]); 2433 } // popElement(QName) 2434 2435 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2436 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2437 *as per the depth. 2438 */ 2439 public void reposition(){ 2440 for( int i = 2 ; i <= fDepth ; i++){ 2441 fElements[i-1] = fElements[fInt[i]]; 2442 } 2443 if(DEBUG_SKIP_ALGORITHM){ 2444 for( int i = 0 ; i < fDepth ; i++){ 2445 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2446 } 2447 } 2448 } 2449 2450 /** Clears the stack without throwing away existing QName objects. */ 2451 public void clear() { 2452 fDepth = 0; 2453 fLastDepth = 0; 2454 fCount = 0 ; 2455 fPosition = fMark = 1; 2456 2457 } // clear() 2458 2459 /** 2460 * This function is as a result of optimization done for endElement -- 2461 * we dont need to set the value for every end element encouterd. 2462 * For Well formedness checks we can have the same QName object that was pushed. 2463 * the values will be set only if application need to know about the endElement 2464 * -- neeraj.bajaj@sun.com 2465 */ 2466 2467 public QName getLastPoppedElement(){ 2468 return fElements[fDepth]; 2469 } 2470 } // class ElementStack 2471 2472 /** 2473 * Drives the parser to the next state/event on the input. Parser is guaranteed 2474 * to stop at the next state/event. 2475 * 2476 * Internally XML document is divided into several states. Each state represents 2477 * a sections of XML document. When this functions returns normally, it has read 2478 * the section of XML document and returns the state corresponding to section of 2479 * document which has been read. For optimizations, a particular driver 2480 * can read ahead of the section of document (state returned) just read and 2481 * can maintain a different internal state. 2482 * 2483 * 2484 * @author Neeraj Bajaj, Sun Microsystems 2485 */ 2486 protected interface Driver { 2487 2488 2489 /** 2490 * Drives the parser to the next state/event on the input. Parser is guaranteed 2491 * to stop at the next state/event. 2492 * 2493 * Internally XML document is divided into several states. Each state represents 2494 * a sections of XML document. When this functions returns normally, it has read 2495 * the section of XML document and returns the state corresponding to section of 2496 * document which has been read. For optimizations, a particular driver 2497 * can read ahead of the section of document (state returned) just read and 2498 * can maintain a different internal state. 2499 * 2500 * @return state representing the section of document just read. 2501 * 2502 * @throws IOException Thrown on i/o error. 2503 * @throws XNIException Thrown on parse error. 2504 */ 2505 2506 public int next() throws IOException, XNIException; 2507 2508 } // interface Driver 2509 2510 /** 2511 * Driver to handle content scanning. This driver is capable of reading 2512 * the fragment of XML document. When it has finished reading fragment 2513 * of XML documents, it can pass the job of reading to another driver. 2514 * 2515 * This class has been modified as per the new design which is more suited to 2516 * efficiently build pull parser. Lot of performance improvements have been done and 2517 * the code has been added to support stax functionality/features. 2518 * 2519 * @author Neeraj Bajaj, Sun Microsystems 2520 * 2521 * 2522 * @author Andy Clark, IBM 2523 * @author Eric Ye, IBM 2524 */ 2525 protected class FragmentContentDriver 2526 implements Driver { 2527 2528 // 2529 // Driver methods 2530 // 2531 private boolean fContinueDispatching = true; 2532 private boolean fScanningForMarkup = true; 2533 2534 /** 2535 * decides the appropriate state of the parser 2536 */ 2537 private void startOfMarkup() throws IOException { 2538 fMarkupDepth++; 2539 final int ch = fEntityScanner.peekChar(); 2540 2541 switch(ch){ 2542 case '?' :{ 2543 setScannerState(SCANNER_STATE_PI); 2544 fEntityScanner.skipChar(ch); 2545 break; 2546 } 2547 case '!' :{ 2548 fEntityScanner.skipChar(ch); 2549 if (fEntityScanner.skipChar('-')) { 2550 if (!fEntityScanner.skipChar('-')) { 2551 reportFatalError("InvalidCommentStart", 2552 null); 2553 } 2554 setScannerState(SCANNER_STATE_COMMENT); 2555 } else if (fEntityScanner.skipString(cdata)) { 2556 setScannerState(SCANNER_STATE_CDATA ); 2557 } else if (!scanForDoctypeHook()) { 2558 reportFatalError("MarkupNotRecognizedInContent", 2559 null); 2560 } 2561 break; 2562 } 2563 case '/' :{ 2564 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2565 fEntityScanner.skipChar(ch); 2566 break; 2567 } 2568 default :{ 2569 if (isValidNameStartChar(ch)) { 2570 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2571 } else { 2572 reportFatalError("MarkupNotRecognizedInContent", 2573 null); 2574 } 2575 } 2576 } 2577 2578 }//startOfMarkup 2579 2580 private void startOfContent() throws IOException { 2581 if (fEntityScanner.skipChar('<')) { 2582 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2583 } else if (fEntityScanner.skipChar('&')) { 2584 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2585 } else { 2586 //element content is there.. 2587 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2588 } 2589 }//startOfContent 2590 2591 2592 /** 2593 * 2594 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2595 * At any point of time when in doubt over the current state of the parser, the state should be 2596 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2597 * the parser to one of its sub state. 2598 * sub states are defined in the parser on the basis of different XML component like 2599 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2600 * These sub states help the parser to have fine control over the parsing. These are the 2601 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2602 * decided if paresr needs to stop at next milepost ?? 2603 * 2604 */ 2605 public void decideSubState() throws IOException { 2606 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2607 2608 switch (fScannerState) { 2609 2610 case SCANNER_STATE_CONTENT: { 2611 startOfContent() ; 2612 break; 2613 } 2614 2615 case SCANNER_STATE_START_OF_MARKUP: { 2616 startOfMarkup() ; 2617 break; 2618 } 2619 } 2620 } 2621 }//decideSubState 2622 2623 /** 2624 * Drives the parser to the next state/event on the input. Parser is guaranteed 2625 * to stop at the next state/event. Internally XML document 2626 * is divided into several states. Each state represents a sections of XML 2627 * document. When this functions returns normally, it has read the section 2628 * of XML document and returns the state corresponding to section of 2629 * document which has been read. For optimizations, a particular driver 2630 * can read ahead of the section of document (state returned) just read and 2631 * can maintain a different internal state. 2632 * 2633 * State returned corresponds to Stax states. 2634 * 2635 * @return state representing the section of document just read. 2636 * 2637 * @throws IOException Thrown on i/o error. 2638 * @throws XNIException Thrown on parse error. 2639 */ 2640 2641 public int next() throws IOException, XNIException { 2642 while (true) { 2643 try { 2644 if(DEBUG_NEXT){ 2645 System.out.println("NOW IN FragmentContentDriver"); 2646 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2647 } 2648 2649 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2650 //decideSubState. 2651 2652 switch (fScannerState) { 2653 case SCANNER_STATE_CONTENT: { 2654 final int ch = fEntityScanner.peekChar(); 2655 if (ch == '<') { 2656 fEntityScanner.scanChar(); 2657 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2658 } else if (ch == '&') { 2659 fEntityScanner.scanChar(); 2660 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2661 break; 2662 } else { 2663 //element content is there.. 2664 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2665 break; 2666 } 2667 } 2668 2669 case SCANNER_STATE_START_OF_MARKUP: { 2670 startOfMarkup(); 2671 break; 2672 }//case: SCANNER_STATE_START_OF_MARKUP 2673 2674 }//end of switch 2675 //decideSubState() ; 2676 2677 //do some special handling if isCoalesce is set to true. 2678 if(fIsCoalesce){ 2679 fUsebuffer = true ; 2680 //if the last section was character data 2681 if(fLastSectionWasCharacterData){ 2682 2683 //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2684 //return the last scanned charactrer data. 2685 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2686 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2687 fLastSectionWasCharacterData = false; 2688 return XMLEvent.CHARACTERS; 2689 } 2690 }//if last section was CDATA or ENTITY REFERENCE 2691 //xxx: there might be another entity reference or CDATA after this 2692 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2693 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2694 //and current state is not SCANNER_STATE_CHARACTER_DATA 2695 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2696 //this means there is nothing more to be coalesced. 2697 //return the CHARACTERS event. 2698 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2699 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2700 2701 fLastSectionWasCData = false; 2702 fLastSectionWasEntityReference = false; 2703 return XMLEvent.CHARACTERS; 2704 } 2705 } 2706 } 2707 2708 2709 if(DEBUG_NEXT){ 2710 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2711 } 2712 2713 switch(fScannerState){ 2714 2715 case XMLEvent.START_DOCUMENT : 2716 return XMLEvent.START_DOCUMENT; 2717 2718 case SCANNER_STATE_START_ELEMENT_TAG :{ 2719 2720 //xxx this function returns true when element is empty.. can be linked to end element event. 2721 //returns true if the element is empty 2722 fEmptyElement = scanStartElement() ; 2723 //if the element is empty the next event is "end element" 2724 if(fEmptyElement){ 2725 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2726 }else{ 2727 //set the next possible state 2728 setScannerState(SCANNER_STATE_CONTENT); 2729 } 2730 return XMLEvent.START_ELEMENT ; 2731 } 2732 2733 case SCANNER_STATE_CHARACTER_DATA: { 2734 if(DEBUG_COALESCE){ 2735 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2736 System.out.println("fIsCoalesce = " + fIsCoalesce); 2737 } 2738 //if last section was either entity reference or cdata or character data we should be using buffer 2739 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2740 2741 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2742 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2743 fLastSectionWasEntityReference = false; 2744 fLastSectionWasCData = false; 2745 fLastSectionWasCharacterData = true ; 2746 fUsebuffer = true; 2747 }else{ 2748 //clear the buffer 2749 fContentBuffer.clear(); 2750 } 2751 2752 //set the fTempString length to 0 before passing it on to scanContent 2753 //scanContent sets the correct co-ordinates as per the content read 2754 fTempString.length = 0; 2755 int c = fEntityScanner.scanContent(fTempString); 2756 if(DEBUG){ 2757 System.out.println("fTempString = " + fTempString); 2758 } 2759 if(fEntityScanner.skipChar('<')){ 2760 //check if we have reached end of element 2761 if(fEntityScanner.skipChar('/')){ 2762 //increase the mark up depth 2763 fMarkupDepth++; 2764 fLastSectionWasCharacterData = false; 2765 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2766 //check if its start of new element 2767 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2768 fMarkupDepth++; 2769 fLastSectionWasCharacterData = false; 2770 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2771 }else{ 2772 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2773 //there can be cdata ahead if coalesce is true we should call again 2774 if(fIsCoalesce){ 2775 fUsebuffer = true; 2776 fLastSectionWasCharacterData = true; 2777 fContentBuffer.append(fTempString); 2778 fTempString.length = 0; 2779 continue; 2780 } 2781 } 2782 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2783 if(fUsebuffer){ 2784 fContentBuffer.append(fTempString); 2785 fTempString.length = 0; 2786 } 2787 if(DEBUG){ 2788 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2789 } 2790 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2791 if(DEBUG)System.out.println("Return SPACE EVENT"); 2792 return XMLEvent.SPACE; 2793 }else 2794 return XMLEvent.CHARACTERS; 2795 2796 } else{ 2797 fUsebuffer = true ; 2798 if(DEBUG){ 2799 System.out.println("fContentBuffer = " + fContentBuffer); 2800 System.out.println("fTempString = " + fTempString); 2801 } 2802 fContentBuffer.append(fTempString); 2803 fTempString.length = 0; 2804 } 2805 if (c == '\r') { 2806 if(DEBUG){ 2807 System.out.println("'\r' character found"); 2808 } 2809 // happens when there is the character reference 2810 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2811 fEntityScanner.scanChar(); 2812 fUsebuffer = true; 2813 fContentBuffer.append((char)c); 2814 c = -1 ; 2815 } else if (c == ']') { 2816 //fStringBuffer.clear(); 2817 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2818 fUsebuffer = true; 2819 fContentBuffer.append((char)fEntityScanner.scanChar()); 2820 // remember where we are in case we get an endEntity before we 2821 // could flush the buffer out - this happens when we're parsing an 2822 // entity which ends with a ] 2823 fInScanContent = true; 2824 2825 // We work on a single character basis to handle cases such as: 2826 // ']]]>' which we might otherwise miss. 2827 // 2828 if (fEntityScanner.skipChar(']')) { 2829 fContentBuffer.append(']'); 2830 while (fEntityScanner.skipChar(']')) { 2831 fContentBuffer.append(']'); 2832 } 2833 if (fEntityScanner.skipChar('>')) { 2834 reportFatalError("CDEndInContent", null); 2835 } 2836 } 2837 c = -1 ; 2838 fInScanContent = false; 2839 } 2840 2841 do{ 2842 //xxx: we should be using only one buffer.. 2843 // we need not to grow the buffer only when isCoalesce() is not true; 2844 2845 if (c == '<') { 2846 fEntityScanner.scanChar(); 2847 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2848 break; 2849 }//xxx what should be the behavior if entity reference is present in the content ? 2850 else if (c == '&') { 2851 fEntityScanner.scanChar(); 2852 setScannerState(SCANNER_STATE_REFERENCE); 2853 break; 2854 }///xxx since this part is also characters, it should be merged... 2855 else if (c != -1 && isInvalidLiteral(c)) { 2856 if (XMLChar.isHighSurrogate(c)) { 2857 // special case: surrogates 2858 scanSurrogates(fContentBuffer) ; 2859 setScannerState(SCANNER_STATE_CONTENT); 2860 } else { 2861 reportFatalError("InvalidCharInContent", 2862 new Object[] { 2863 Integer.toString(c, 16)}); 2864 fEntityScanner.scanChar(); 2865 } 2866 break; 2867 } 2868 //xxx: scanContent also gives character callback. 2869 c = scanContent(fContentBuffer) ; 2870 //we should not be iterating again if fIsCoalesce is not set to true 2871 2872 if(!fIsCoalesce){ 2873 setScannerState(SCANNER_STATE_CONTENT); 2874 break; 2875 } 2876 2877 }while(true); 2878 2879 //if (fDocumentHandler != null) { 2880 // fDocumentHandler.characters(fContentBuffer, null); 2881 //} 2882 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2883 //if fIsCoalesce is true there might be more data so call fDriver.next() 2884 if(fIsCoalesce){ 2885 fLastSectionWasCharacterData = true ; 2886 continue; 2887 }else{ 2888 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2889 if(DEBUG)System.out.println("Return SPACE EVENT"); 2890 return XMLEvent.SPACE; 2891 } else 2892 return XMLEvent.CHARACTERS ; 2893 } 2894 } 2895 2896 case SCANNER_STATE_END_ELEMENT_TAG :{ 2897 if(fEmptyElement){ 2898 //set it back to false. 2899 fEmptyElement = false; 2900 setScannerState(SCANNER_STATE_CONTENT); 2901 //check the case when there is comment after single element document 2902 //<foo/> and some comment after this 2903 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2904 2905 } else if(scanEndElement() == 0) { 2906 //It is last element of the document 2907 if (elementDepthIsZeroHook()) { 2908 //if element depth is zero , it indicates the end of the document 2909 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2910 //xxx understand this point once again.. 2911 return XMLEvent.END_ELEMENT ; 2912 } 2913 2914 } 2915 setScannerState(SCANNER_STATE_CONTENT); 2916 return XMLEvent.END_ELEMENT ; 2917 } 2918 2919 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2920 scanComment(); 2921 setScannerState(SCANNER_STATE_CONTENT); 2922 return XMLEvent.COMMENT; 2923 //break; 2924 } 2925 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2926 //clear the buffer first 2927 fContentBuffer.clear() ; 2928 //xxx: which buffer should be passed. Ideally we shouldn't have 2929 //more than two buffers -- 2930 //xxx: where should we add the switch for buffering. 2931 scanPI(fContentBuffer); 2932 setScannerState(SCANNER_STATE_CONTENT); 2933 return XMLEvent.PROCESSING_INSTRUCTION; 2934 //break; 2935 } 2936 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2937 //xxx: What if CDATA is the first event 2938 //<foo><![CDATA[hello<><>]]>append</foo> 2939 2940 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 2941 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2942 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 2943 fLastSectionWasCData = true ; 2944 fLastSectionWasEntityReference = false; 2945 fLastSectionWasCharacterData = false; 2946 }//if we dont need to coalesce clear the buffer 2947 else{ 2948 fContentBuffer.clear(); 2949 } 2950 fUsebuffer = true; 2951 //CDATA section is completely read in all the case. 2952 scanCDATASection(fContentBuffer , true); 2953 setScannerState(SCANNER_STATE_CONTENT); 2954 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 2955 //and just call fDispatche.next(). Since we have set the scanner state to 2956 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 2957 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 2958 //2. Check if application has set for reporting CDATA event 2959 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 2960 //return the cdata event as characters. 2961 if(fIsCoalesce){ 2962 fLastSectionWasCData = true ; 2963 //there might be more data to coalesce. 2964 continue; 2965 }else if(fReportCdataEvent){ 2966 return XMLEvent.CDATA; 2967 } else{ 2968 return XMLEvent.CHARACTERS; 2969 } 2970 } 2971 2972 case SCANNER_STATE_REFERENCE :{ 2973 fMarkupDepth++; 2974 foundBuiltInRefs = false; 2975 2976 //we should not clear the buffer only when the last state was either CDATA or 2977 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2978 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 2979 //fLastSectionWasEntityReference or fLastSectionWasCData are only 2980 //used when fIsCoalesce is set to true. 2981 fLastSectionWasEntityReference = true ; 2982 fLastSectionWasCData = false; 2983 fLastSectionWasCharacterData = false; 2984 }//if we dont need to coalesce clear the buffer 2985 else{ 2986 fContentBuffer.clear(); 2987 } 2988 fUsebuffer = true ; 2989 //take care of character reference 2990 if (fEntityScanner.skipChar('#')) { 2991 scanCharReferenceValue(fContentBuffer, null); 2992 fMarkupDepth--; 2993 if(!fIsCoalesce){ 2994 setScannerState(SCANNER_STATE_CONTENT); 2995 return XMLEvent.CHARACTERS; 2996 } 2997 } else { 2998 // this function also starts new entity 2999 scanEntityReference(fContentBuffer); 3000 //if there was built-in entity reference & coalesce is not true 3001 //return CHARACTERS 3002 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3003 setScannerState(SCANNER_STATE_CONTENT); 3004 return XMLEvent.CHARACTERS; 3005 } 3006 3007 //if there was a text declaration, call next() it will be taken care. 3008 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3009 fLastSectionWasEntityReference = true ; 3010 continue; 3011 } 3012 3013 if(fScannerState == SCANNER_STATE_REFERENCE){ 3014 setScannerState(SCANNER_STATE_CONTENT); 3015 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3016 // Skip the entity reference, we don't care 3017 continue; 3018 } 3019 return XMLEvent.ENTITY_REFERENCE; 3020 } 3021 } 3022 //Wether it was character reference, entity reference or built-in entity 3023 //set the next possible state to SCANNER_STATE_CONTENT 3024 setScannerState(SCANNER_STATE_CONTENT); 3025 fLastSectionWasEntityReference = true ; 3026 continue; 3027 } 3028 3029 case SCANNER_STATE_TEXT_DECL: { 3030 // scan text decl 3031 if (fEntityScanner.skipString("<?xml")) { 3032 fMarkupDepth++; 3033 // NOTE: special case where entity starts with a PI 3034 // whose name starts with "xml" (e.g. "xmlfoo") 3035 if (isValidNameChar(fEntityScanner.peekChar())) { 3036 fStringBuffer.clear(); 3037 fStringBuffer.append("xml"); 3038 3039 if (fNamespaces) { 3040 while (isValidNCName(fEntityScanner.peekChar())) { 3041 fStringBuffer.append((char)fEntityScanner.scanChar()); 3042 } 3043 } else { 3044 while (isValidNameChar(fEntityScanner.peekChar())) { 3045 fStringBuffer.append((char)fEntityScanner.scanChar()); 3046 } 3047 } 3048 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3049 fContentBuffer.clear(); 3050 scanPIData(target, fContentBuffer); 3051 } 3052 3053 // standard text declaration 3054 else { 3055 //xxx: this function gives callback 3056 scanXMLDeclOrTextDecl(true); 3057 } 3058 } 3059 // now that we've straightened out the readers, we can read in chunks: 3060 fEntityManager.fCurrentEntity.mayReadChunks = true; 3061 setScannerState(SCANNER_STATE_CONTENT); 3062 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3063 //it seems we have to careful when to allow function issue a callback 3064 //and when to allow adapter issue a callback. 3065 continue; 3066 } 3067 3068 3069 case SCANNER_STATE_ROOT_ELEMENT: { 3070 if (scanRootElementHook()) { 3071 fEmptyElement = true; 3072 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3073 return XMLEvent.START_ELEMENT; 3074 } 3075 setScannerState(SCANNER_STATE_CONTENT); 3076 return XMLEvent.START_ELEMENT ; 3077 } 3078 case SCANNER_STATE_CHAR_REFERENCE : { 3079 fContentBuffer.clear(); 3080 scanCharReferenceValue(fContentBuffer, null); 3081 fMarkupDepth--; 3082 setScannerState(SCANNER_STATE_CONTENT); 3083 return XMLEvent.CHARACTERS; 3084 } 3085 default: 3086 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3087 3088 }//switch 3089 } 3090 // premature end of file 3091 catch (EOFException e) { 3092 endOfFileHook(e); 3093 return -1; 3094 } 3095 } //while loop 3096 }//next 3097 3098 3099 // 3100 // Protected methods 3101 // 3102 3103 // hooks 3104 3105 // NOTE: These hook methods are added so that the full document 3106 // scanner can share the majority of code with this class. 3107 3108 /** 3109 * Scan for DOCTYPE hook. This method is a hook for subclasses 3110 * to add code to handle scanning for a the "DOCTYPE" string 3111 * after the string "<!" has been scanned. 3112 * 3113 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3114 * was not scanned. 3115 */ 3116 protected boolean scanForDoctypeHook() 3117 throws IOException, XNIException { 3118 return false; 3119 } // scanForDoctypeHook():boolean 3120 3121 /** 3122 * Element depth iz zero. This methos is a hook for subclasses 3123 * to add code to handle when the element depth hits zero. When 3124 * scanning a document fragment, an element depth of zero is 3125 * normal. However, when scanning a full XML document, the 3126 * scanner must handle the trailing miscellanous section of 3127 * the document after the end of the document's root element. 3128 * 3129 * @return True if the caller should stop and return true which 3130 * allows the scanner to switch to a new scanning 3131 * driver. A return value of false indicates that 3132 * the content driver should continue as normal. 3133 */ 3134 protected boolean elementDepthIsZeroHook() 3135 throws IOException, XNIException { 3136 return false; 3137 } // elementDepthIsZeroHook():boolean 3138 3139 /** 3140 * Scan for root element hook. This method is a hook for 3141 * subclasses to add code that handles scanning for the root 3142 * element. When scanning a document fragment, there is no 3143 * "root" element. However, when scanning a full XML document, 3144 * the scanner must handle the root element specially. 3145 * 3146 * @return True if the caller should stop and return true which 3147 * allows the scanner to switch to a new scanning 3148 * driver. A return value of false indicates that 3149 * the content driver should continue as normal. 3150 */ 3151 protected boolean scanRootElementHook() 3152 throws IOException, XNIException { 3153 return false; 3154 } // scanRootElementHook():boolean 3155 3156 /** 3157 * End of file hook. This method is a hook for subclasses to 3158 * add code that handles the end of file. The end of file in 3159 * a document fragment is OK if the markup depth is zero. 3160 * However, when scanning a full XML document, an end of file 3161 * is always premature. 3162 */ 3163 protected void endOfFileHook(EOFException e) 3164 throws IOException, XNIException { 3165 3166 // NOTE: An end of file is only only an error if we were 3167 // in the middle of scanning some markup. -Ac 3168 if (fMarkupDepth != 0) { 3169 reportFatalError("PrematureEOF", null); 3170 } 3171 3172 } // endOfFileHook() 3173 3174 } // class FragmentContentDriver 3175 3176 static void pr(String str) { 3177 System.out.println(str) ; 3178 } 3179 3180 protected boolean fUsebuffer ; 3181 3182 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3183 * maintained for attributes. 3184 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3185 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3186 * XMLString. 3187 * 3188 * @return XMLString XMLString used to store an attribute value. 3189 */ 3190 3191 protected XMLString getString(){ 3192 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3193 return (XMLString)attributeValueCache.get(fAttributeCacheUsedCount++); 3194 } else{ 3195 XMLString str = new XMLString(); 3196 fAttributeCacheUsedCount++; 3197 attributeValueCache.add(str); 3198 return str; 3199 } 3200 } 3201 3202 /** 3203 * Implements XMLBufferListener interface. 3204 */ 3205 3206 public void refresh(){ 3207 refresh(0); 3208 } 3209 3210 /** 3211 * receives callbacks from {@link XMLEntityReader } when buffer 3212 * is being changed. 3213 * @param refreshPosition 3214 */ 3215 public void refresh(int refreshPosition){ 3216 //If you are reading attributes and you got a callback 3217 //cache available attributes. 3218 if(fReadingAttributes){ 3219 fAttributes.refresh(); 3220 } 3221 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3222 //since fTempString directly matches to the underlying main buffer 3223 //store the data into buffer 3224 fContentBuffer.append(fTempString); 3225 //clear the XMLString so that data can't be added again. 3226 fTempString.length = 0; 3227 fUsebuffer = true; 3228 } 3229 } 3230 3231 } // class XMLDocumentFragmentScannerImpl