1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.xml.internal.stream.XMLBufferListener; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import com.sun.xml.internal.stream.XMLInputFactoryImpl; 27 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 28 29 import java.io.EOFException; 30 import java.io.IOException; 31 import javax.xml.stream.XMLInputFactory; 32 import javax.xml.stream.events.XMLEvent; 33 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 34 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 35 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 36 import com.sun.org.apache.xerces.internal.util.XMLChar; 37 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 38 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 39 import com.sun.org.apache.xerces.internal.xni.QName; 40 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 41 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 42 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 43 import com.sun.org.apache.xerces.internal.xni.XMLString; 44 import com.sun.org.apache.xerces.internal.xni.XNIException; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 46 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 47 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 50 import com.sun.org.apache.xerces.internal.xni.Augmentations; 51 import com.sun.org.apache.xerces.internal.impl.Constants; 52 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 53 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 54 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 55 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 56 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 57 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 58 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.State; 59 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 60 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 61 import javax.xml.XMLConstants; 62 import javax.xml.stream.XMLStreamConstants; 63 import javax.xml.stream.events.XMLEvent; 64 65 /** 66 * 67 * This class is responsible for scanning the structure and content 68 * of document fragments. 69 * 70 * This class has been modified as per the new design which is more suited to 71 * efficiently build pull parser. Lot of improvements have been done and 72 * the code has been added to support stax functionality/features. 73 * 74 * @author Neeraj Bajaj SUN Microsystems 75 * @author K.Venugopal SUN Microsystems 76 * @author Glenn Marcy, IBM 77 * @author Andy Clark, IBM 78 * @author Arnaud Le Hors, IBM 79 * @author Eric Ye, IBM 80 * @author Sunitha Reddy, SUN Microsystems 81 * 82 */ 83 public class XMLDocumentFragmentScannerImpl 84 extends XMLScanner 85 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 86 87 // 88 // Constants 89 // 90 91 protected int fElementAttributeLimit; 92 93 /** External subset resolver. **/ 94 protected ExternalSubsetResolver fExternalSubsetResolver; 95 96 // scanner states 97 98 //XXX this should be divided into more states. 99 /** Scanner state: start of markup. */ 100 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 101 102 /** Scanner state: content. */ 103 protected static final int SCANNER_STATE_CONTENT = 22; 104 105 /** Scanner state: processing instruction. */ 106 protected static final int SCANNER_STATE_PI = 23; 107 108 /** Scanner state: DOCTYPE. */ 109 protected static final int SCANNER_STATE_DOCTYPE = 24; 110 111 /** Scanner state: XML Declaration */ 112 protected static final int SCANNER_STATE_XML_DECL = 25; 113 114 /** Scanner state: root element. */ 115 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 116 117 /** Scanner state: comment. */ 118 protected static final int SCANNER_STATE_COMMENT = 27; 119 120 /** Scanner state: reference. */ 121 protected static final int SCANNER_STATE_REFERENCE = 28; 122 123 // <book type="hard"> reading attribute name 'type' 124 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 125 126 // <book type="hard"> //reading attribute value. 127 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 128 129 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 130 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 131 132 /** Scanner state: end of input. */ 133 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 134 135 /** Scanner state: terminated. */ 136 protected static final int SCANNER_STATE_TERMINATED = 34; 137 138 /** Scanner state: CDATA section. */ 139 protected static final int SCANNER_STATE_CDATA = 35; 140 141 /** Scanner state: Text declaration. */ 142 protected static final int SCANNER_STATE_TEXT_DECL = 36; 143 144 /** Scanner state: Text declaration. */ 145 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 146 147 //<book type="hard">foo</book> 148 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 149 150 //<book type="hard">foo</book> reading </book> 151 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 152 153 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 154 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 155 156 // feature identifiers 157 158 159 /** Feature identifier: notify built-in refereces. */ 160 protected static final String NOTIFY_BUILTIN_REFS = 161 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 162 163 /** Property identifier: entity resolver. */ 164 protected static final String ENTITY_RESOLVER = 165 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 166 167 /** Feature identifier: standard uri conformant */ 168 protected static final String STANDARD_URI_CONFORMANT = 169 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 170 171 /** Property identifier: Security property manager. */ 172 private static final String XML_SECURITY_PROPERTY_MANAGER = 173 Constants.XML_SECURITY_PROPERTY_MANAGER; 174 175 /** access external dtd: file protocol 176 * For DOM/SAX, the secure feature is set to true by default 177 */ 178 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 179 180 // recognized features and properties 181 182 /** Recognized features. */ 183 private static final String[] RECOGNIZED_FEATURES = { 184 NAMESPACES, 185 VALIDATION, 186 NOTIFY_BUILTIN_REFS, 187 NOTIFY_CHAR_REFS, 188 Constants.STAX_REPORT_CDATA_EVENT 189 }; 190 191 /** Feature defaults. */ 192 private static final Boolean[] FEATURE_DEFAULTS = { 193 Boolean.TRUE, 194 null, 195 Boolean.FALSE, 196 Boolean.FALSE, 197 Boolean.TRUE 198 }; 199 200 /** Recognized properties. */ 201 private static final String[] RECOGNIZED_PROPERTIES = { 202 SYMBOL_TABLE, 203 ERROR_REPORTER, 204 ENTITY_MANAGER, 205 XML_SECURITY_PROPERTY_MANAGER 206 }; 207 208 /** Property defaults. */ 209 private static final Object[] PROPERTY_DEFAULTS = { 210 null, 211 null, 212 null, 213 EXTERNAL_ACCESS_DEFAULT 214 }; 215 216 private static final char [] cdata = {'[','C','D','A','T','A','['}; 217 static final char [] xmlDecl = {'<','?','x','m','l'}; 218 private static final char [] endTag = {'<','/'}; 219 // debugging 220 221 /** Debug scanner state. */ 222 private static final boolean DEBUG_SCANNER_STATE = false; 223 224 /** Debug driver. */ 225 private static final boolean DEBUG_DISPATCHER = false; 226 227 /** Debug content driver scanning. */ 228 protected static final boolean DEBUG_START_END_ELEMENT = false; 229 230 231 /** Debug driver next */ 232 protected static final boolean DEBUG_NEXT = false ; 233 234 /** Debug driver next */ 235 protected static final boolean DEBUG = false; 236 protected static final boolean DEBUG_COALESCE = false; 237 // 238 // Data 239 // 240 241 // protected data 242 243 /** Document handler. */ 244 protected XMLDocumentHandler fDocumentHandler; 245 protected int fScannerLastState ; 246 247 /** Entity Storage */ 248 protected XMLEntityStorage fEntityStore; 249 250 /** Entity stack. */ 251 protected int[] fEntityStack = new int[4]; 252 253 /** Markup depth. */ 254 protected int fMarkupDepth; 255 256 //is the element empty 257 protected boolean fEmptyElement ; 258 259 //track if we are reading attributes, this is usefule while 260 //there is a callback 261 protected boolean fReadingAttributes = false; 262 263 /** Scanner state. */ 264 protected int fScannerState; 265 266 /** SubScanner state: inside scanContent method. */ 267 protected boolean fInScanContent = false; 268 protected boolean fLastSectionWasCData = false; 269 protected boolean fLastSectionWasEntityReference = false; 270 protected boolean fLastSectionWasCharacterData = false; 271 272 /** has external dtd */ 273 protected boolean fHasExternalDTD; 274 275 /** Standalone. */ 276 protected boolean fStandaloneSet; 277 protected boolean fStandalone; 278 protected String fVersion; 279 280 // element information 281 282 /** Current element. */ 283 protected QName fCurrentElement; 284 285 /** Element stack. */ 286 protected ElementStack fElementStack = new ElementStack(); 287 protected ElementStack2 fElementStack2 = new ElementStack2(); 288 289 // other info 290 291 /** Document system identifier. 292 * REVISIT: So what's this used for? - NG 293 * protected String fDocumentSystemId; 294 ******/ 295 296 protected String fPITarget ; 297 298 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 299 protected XMLString fPIData = new XMLString(); 300 301 // features 302 303 304 /** Notify built-in references. */ 305 protected boolean fNotifyBuiltInRefs = false; 306 307 //STAX related properties 308 //defaultValues. 309 protected boolean fSupportDTD = true; 310 protected boolean fReplaceEntityReferences = true; 311 protected boolean fSupportExternalEntities = false; 312 protected boolean fReportCdataEvent = false ; 313 protected boolean fIsCoalesce = false ; 314 protected String fDeclaredEncoding = null; 315 /** Xerces Feature: Disallow doctype declaration. */ 316 protected boolean fDisallowDoctype = false; 317 318 /** 319 * comma-delimited list of protocols that are allowed for the purpose 320 * of accessing external dtd or entity references 321 */ 322 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 323 324 /** 325 * standard uri conformant (strict uri). 326 * http://apache.org/xml/features/standard-uri-conformant 327 */ 328 protected boolean fStrictURI; 329 330 // drivers 331 332 /** Active driver. */ 333 protected Driver fDriver; 334 335 /** Content driver. */ 336 protected Driver fContentDriver = createContentDriver(); 337 338 // temporary variables 339 340 /** Element QName. */ 341 protected QName fElementQName = new QName(); 342 343 /** Attribute QName. */ 344 protected QName fAttributeQName = new QName(); 345 346 /** 347 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 348 * implements Iterator interface so we can directly give Attributes in the form of 349 * iterator. 350 */ 351 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 352 353 354 /** String. */ 355 protected XMLString fTempString = new XMLString(); 356 357 /** String. */ 358 protected XMLString fTempString2 = new XMLString(); 359 360 /** Array of 3 strings. */ 361 private String[] fStrings = new String[3]; 362 363 /** Making the buffer accesible to derived class -- String buffer. */ 364 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 365 366 /** Making the buffer accesible to derived class -- String buffer. */ 367 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 368 369 /** stores character data. */ 370 /** Making the buffer accesible to derived class -- stores PI data */ 371 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 372 373 /** Single character array. */ 374 private final char[] fSingleChar = new char[1]; 375 private String fCurrentEntityName = null; 376 377 // New members 378 protected boolean fScanToEnd = false; 379 380 protected DTDGrammarUtil dtdGrammarUtil= null; 381 382 protected boolean fAddDefaultAttr = false; 383 384 protected boolean foundBuiltInRefs = false; 385 386 /** Built-in reference character event */ 387 protected boolean builtInRefCharacterHandled = false; 388 389 //skip element algorithm 390 static final short MAX_DEPTH_LIMIT = 5 ; 391 static final short ELEMENT_ARRAY_LENGTH = 200 ; 392 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 393 static final boolean DEBUG_SKIP_ALGORITHM = false; 394 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 395 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 396 //pointer location where last element was skipped 397 short fLastPointerLocation = 0 ; 398 short fElementPointer = 0 ; 399 //2D array to store pointer info 400 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 401 protected String fElementRawname ; 402 protected boolean fShouldSkip = false; 403 protected boolean fAdd = false ; 404 protected boolean fSkip = false; 405 406 //indicates whether the parsing process should be stopped 407 private boolean stopProcess = false; 408 409 /** Reusable Augmentations. */ 410 private Augmentations fTempAugmentations = null; 411 // 412 // Constructors 413 // 414 415 /** Default constructor. */ 416 public XMLDocumentFragmentScannerImpl() { 417 } // <init>() 418 419 // 420 // XMLDocumentScanner methods 421 // 422 423 /** 424 * Sets the input source. 425 * 426 * @param inputSource The input source. 427 * 428 * @throws IOException Thrown on i/o error. 429 */ 430 public void setInputSource(XMLInputSource inputSource) throws IOException { 431 fEntityManager.setEntityHandler(this); 432 fEntityManager.startEntity("$fragment$", inputSource, false, true); 433 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 434 } // setInputSource(XMLInputSource) 435 436 /** 437 * Scans a document. 438 * 439 * @param complete True if the scanner should scan the document 440 * completely, pushing all events to the registered 441 * document handler. A value of false indicates that 442 * that the scanner should only scan the next portion 443 * of the document and return. A scanner instance is 444 * permitted to completely scan a document if it does 445 * not support this "pull" scanning model. 446 * 447 * @return True if there is more to scan, false otherwise. 448 */ 449 public boolean scanDocument(boolean complete) 450 throws IOException, XNIException { 451 452 // keep dispatching "events" 453 fEntityManager.setEntityHandler(this); 454 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 455 456 int event = next(); 457 do { 458 switch (event) { 459 case XMLStreamConstants.START_DOCUMENT : 460 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 461 break; 462 case XMLStreamConstants.START_ELEMENT : 463 //System.out.println(" in scann element"); 464 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 465 break; 466 case XMLStreamConstants.CHARACTERS : 467 fDocumentHandler.characters(getCharacterData(),null); 468 break; 469 case XMLStreamConstants.SPACE: 470 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 471 //System.out.println("in the space"); 472 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 473 break; 474 case XMLStreamConstants.ENTITY_REFERENCE : 475 //entity reference callback are given in startEntity 476 break; 477 case XMLStreamConstants.PROCESSING_INSTRUCTION : 478 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 479 break; 480 case XMLStreamConstants.COMMENT : 481 //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 482 fDocumentHandler.comment(getCharacterData(),null); 483 break; 484 case XMLStreamConstants.DTD : 485 //all DTD related callbacks are handled in DTDScanner. 486 //1. Stax doesn't define DTD states as it does for XML Document. 487 //therefore we don't need to take care of anything here. So Just break; 488 break; 489 case XMLStreamConstants.CDATA: 490 fDocumentHandler.startCDATA(null); 491 //xxx: check if CDATA values comes from getCharacterData() function 492 fDocumentHandler.characters(getCharacterData(),null); 493 fDocumentHandler.endCDATA(null); 494 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 495 break; 496 case XMLStreamConstants.NOTATION_DECLARATION : 497 break; 498 case XMLStreamConstants.ENTITY_DECLARATION : 499 break; 500 case XMLStreamConstants.NAMESPACE : 501 break; 502 case XMLStreamConstants.ATTRIBUTE : 503 break; 504 case XMLStreamConstants.END_ELEMENT : 505 //do not give callback here. 506 //this callback is given in scanEndElement function. 507 //fDocumentHandler.endElement(getElementQName(),null); 508 break; 509 default : 510 throw new InternalError("processing event: " + event); 511 512 } 513 514 if (stopProcess) { 515 break; 516 } 517 event = next(); 518 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 519 520 if(event == XMLStreamConstants.END_DOCUMENT) { 521 fDocumentHandler.endDocument(null); 522 return false; 523 } 524 525 return true; 526 527 } // scanDocument(boolean):boolean 528 529 /** 530 * Stops the parsing process. 531 */ 532 @Override 533 public boolean stop() { 534 return stopProcess = true; 535 } 536 537 /** 538 * Resumes parsing after it was stopped by calling the stop method. 539 */ 540 @Override 541 public boolean resume() { 542 //resume only if the process was stopped. 543 if (stopProcess) { 544 stopProcess = false; 545 try { 546 scanDocument(true); 547 return true; 548 } catch (IOException | XNIException ex) { 549 //can not resume the parsing process, do nothing. 550 } 551 } 552 return false; 553 } 554 555 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 556 if(fScannerLastState == XMLEvent.END_ELEMENT){ 557 fElementQName.setValues(fElementStack.getLastPoppedElement()); 558 } 559 return fElementQName ; 560 } 561 562 /** return the next state on the input 563 * @return int 564 */ 565 566 public int next() throws IOException, XNIException { 567 return fDriver.next(); 568 } 569 570 // 571 // XMLComponent methods 572 // 573 574 /** 575 * Resets the component. The component can query the component manager 576 * about any features and properties that affect the operation of the 577 * component. 578 * 579 * @param componentManager The component manager. 580 * 581 * @throws SAXException Thrown by component on initialization error. 582 * For example, if a feature or property is 583 * required for the operation of the component, the 584 * component manager may throw a 585 * SAXNotRecognizedException or a 586 * SAXNotSupportedException. 587 */ 588 589 public void reset(XMLComponentManager componentManager) 590 throws XMLConfigurationException { 591 592 super.reset(componentManager); 593 594 // other settings 595 // fDocumentSystemId = null; 596 597 // sax features 598 //fAttributes.setNamespaces(fNamespaces); 599 600 // xerces features 601 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 602 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 603 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 604 605 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 606 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 607 (ExternalSubsetResolver) resolver : null; 608 609 //attribute 610 fReadingAttributes = false; 611 //xxx: external entities are supported in Xerces 612 // it would be good to define feature for this case 613 fSupportExternalEntities = true; 614 fReplaceEntityReferences = true; 615 fIsCoalesce = false; 616 617 // setup Driver 618 setScannerState(SCANNER_STATE_CONTENT); 619 setDriver(fContentDriver); 620 621 // JAXP 1.5 features and properties 622 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 623 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 624 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 625 626 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 627 628 resetCommon(); 629 //fEntityManager.test(); 630 } // reset(XMLComponentManager) 631 632 633 public void reset(PropertyManager propertyManager){ 634 635 super.reset(propertyManager); 636 637 // other settings 638 // fDocumentSystemId = null; 639 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 640 fNotifyBuiltInRefs = false ; 641 642 //fElementStack2.clear(); 643 //fReplaceEntityReferences = true; 644 //fSupportExternalEntities = true; 645 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 646 fReplaceEntityReferences = bo.booleanValue(); 647 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 648 fSupportExternalEntities = bo.booleanValue(); 649 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 650 if(cdata != null) 651 fReportCdataEvent = cdata.booleanValue() ; 652 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 653 if(coalesce != null) 654 fIsCoalesce = coalesce.booleanValue(); 655 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 656 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 657 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 658 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 659 // setup Driver 660 //we dont need to do this -- nb. 661 //setScannerState(SCANNER_STATE_CONTENT); 662 //setDriver(fContentDriver); 663 //fEntityManager.test(); 664 665 // JAXP 1.5 features and properties 666 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 667 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 668 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 669 670 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 671 resetCommon(); 672 } // reset(XMLComponentManager) 673 674 void resetCommon() { 675 // initialize vars 676 fMarkupDepth = 0; 677 fCurrentElement = null; 678 fElementStack.clear(); 679 fHasExternalDTD = false; 680 fStandaloneSet = false; 681 fStandalone = false; 682 fInScanContent = false; 683 //skipping algorithm 684 fShouldSkip = false; 685 fAdd = false; 686 fSkip = false; 687 stopProcess = false; 688 689 fEntityStore = fEntityManager.getEntityStore(); 690 dtdGrammarUtil = null; 691 692 if (fSecurityManager != null) { 693 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 694 } else { 695 fElementAttributeLimit = 0; 696 } 697 fLimitAnalyzer = new XMLLimitAnalyzer(); 698 fEntityManager.setLimitAnalyzer(fLimitAnalyzer); 699 } 700 701 /** 702 * Returns a list of feature identifiers that are recognized by 703 * this component. This method may return null if no features 704 * are recognized by this component. 705 */ 706 public String[] getRecognizedFeatures() { 707 return (String[])(RECOGNIZED_FEATURES.clone()); 708 } // getRecognizedFeatures():String[] 709 710 /** 711 * Sets the state of a feature. This method is called by the component 712 * manager any time after reset when a feature changes state. 713 * <p> 714 * <strong>Note:</strong> Components should silently ignore features 715 * that do not affect the operation of the component. 716 * 717 * @param featureId The feature identifier. 718 * @param state The state of the feature. 719 * 720 * @throws SAXNotRecognizedException The component should not throw 721 * this exception. 722 * @throws SAXNotSupportedException The component should not throw 723 * this exception. 724 */ 725 public void setFeature(String featureId, boolean state) 726 throws XMLConfigurationException { 727 728 super.setFeature(featureId, state); 729 730 // Xerces properties 731 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 732 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 733 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 734 fNotifyBuiltInRefs = state; 735 } 736 } 737 738 } // setFeature(String,boolean) 739 740 /** 741 * Returns a list of property identifiers that are recognized by 742 * this component. This method may return null if no properties 743 * are recognized by this component. 744 */ 745 public String[] getRecognizedProperties() { 746 return (String[])(RECOGNIZED_PROPERTIES.clone()); 747 } // getRecognizedProperties():String[] 748 749 /** 750 * Sets the value of a property. This method is called by the component 751 * manager any time after reset when a property changes value. 752 * <p> 753 * <strong>Note:</strong> Components should silently ignore properties 754 * that do not affect the operation of the component. 755 * 756 * @param propertyId The property identifier. 757 * @param value The value of the property. 758 * 759 * @throws SAXNotRecognizedException The component should not throw 760 * this exception. 761 * @throws SAXNotSupportedException The component should not throw 762 * this exception. 763 */ 764 public void setProperty(String propertyId, Object value) 765 throws XMLConfigurationException { 766 767 super.setProperty(propertyId, value); 768 769 // Xerces properties 770 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 771 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 772 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 773 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 774 fEntityManager = (XMLEntityManager)value; 775 return; 776 } 777 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 778 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 779 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 780 (ExternalSubsetResolver) value : null; 781 return; 782 } 783 } 784 785 786 // Xerces properties 787 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 788 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 789 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 790 fEntityManager = (XMLEntityManager)value; 791 } 792 return; 793 } 794 795 //JAXP 1.5 properties 796 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 797 { 798 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 799 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 800 } 801 802 } // setProperty(String,Object) 803 804 /** 805 * Returns the default state for a feature, or null if this 806 * component does not want to report a default value for this 807 * feature. 808 * 809 * @param featureId The feature identifier. 810 * 811 * @since Xerces 2.2.0 812 */ 813 public Boolean getFeatureDefault(String featureId) { 814 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 815 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 816 return FEATURE_DEFAULTS[i]; 817 } 818 } 819 return null; 820 } // getFeatureDefault(String):Boolean 821 822 /** 823 * Returns the default state for a property, or null if this 824 * component does not want to report a default value for this 825 * property. 826 * 827 * @param propertyId The property identifier. 828 * 829 * @since Xerces 2.2.0 830 */ 831 public Object getPropertyDefault(String propertyId) { 832 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 833 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 834 return PROPERTY_DEFAULTS[i]; 835 } 836 } 837 return null; 838 } // getPropertyDefault(String):Object 839 840 // 841 // XMLDocumentSource methods 842 // 843 844 /** 845 * setDocumentHandler 846 * 847 * @param documentHandler 848 */ 849 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 850 fDocumentHandler = documentHandler; 851 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 852 } // setDocumentHandler(XMLDocumentHandler) 853 854 855 /** Returns the document handler */ 856 public XMLDocumentHandler getDocumentHandler(){ 857 return fDocumentHandler; 858 } 859 860 // 861 // XMLEntityHandler methods 862 // 863 864 /** 865 * This method notifies of the start of an entity. The DTD has the 866 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 867 * general entities are just specified by their name. 868 * 869 * @param name The name of the entity. 870 * @param identifier The resource identifier. 871 * @param encoding The auto-detected IANA encoding name of the entity 872 * stream. This value will be null in those situations 873 * where the entity encoding is not auto-detected (e.g. 874 * internal entities or a document entity that is 875 * parsed from a java.io.Reader). 876 * @param augs Additional information that may include infoset augmentations 877 * 878 * @throws XNIException Thrown by handler to signal an error. 879 */ 880 public void startEntity(String name, 881 XMLResourceIdentifier identifier, 882 String encoding, Augmentations augs) throws XNIException { 883 884 // keep track of this entity before fEntityDepth is increased 885 if (fEntityDepth == fEntityStack.length) { 886 int[] entityarray = new int[fEntityStack.length * 2]; 887 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 888 fEntityStack = entityarray; 889 } 890 fEntityStack[fEntityDepth] = fMarkupDepth; 891 892 super.startEntity(name, identifier, encoding, augs); 893 894 // WFC: entity declared in external subset in standalone doc 895 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 896 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 897 new Object[]{name}); 898 } 899 900 /** we are not calling the handlers yet.. */ 901 // call handler 902 if (fDocumentHandler != null && !fScanningAttribute) { 903 if (!name.equals("[xml]")) { 904 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 905 } 906 } 907 908 } // startEntity(String,XMLResourceIdentifier,String) 909 910 /** 911 * This method notifies the end of an entity. The DTD has the pseudo-name 912 * of "[dtd]" parameter entity names start with '%'; and general entities 913 * are just specified by their name. 914 * 915 * @param name The name of the entity. 916 * @param augs Additional information that may include infoset augmentations 917 * 918 * @throws XNIException Thrown by handler to signal an error. 919 */ 920 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 921 922 /** 923 * // flush possible pending output buffer - see scanContent 924 * if (fInScanContent && fStringBuffer.length != 0 925 * && fDocumentHandler != null) { 926 * fDocumentHandler.characters(fStringBuffer, null); 927 * fStringBuffer.length = 0; // make sure we know it's been flushed 928 * } 929 */ 930 super.endEntity(name, augs); 931 932 // make sure markup is properly balanced 933 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 934 reportFatalError("MarkupEntityMismatch", null); 935 } 936 937 /**/ 938 // call handler 939 if (fDocumentHandler != null && !fScanningAttribute) { 940 if (!name.equals("[xml]")) { 941 fDocumentHandler.endGeneralEntity(name, augs); 942 } 943 } 944 945 946 } // endEntity(String) 947 948 // 949 // Protected methods 950 // 951 952 // Driver factory methods 953 954 /** Creates a content Driver. */ 955 protected Driver createContentDriver() { 956 return new FragmentContentDriver(); 957 } // createContentDriver():Driver 958 959 // scanning methods 960 961 /** 962 * Scans an XML or text declaration. 963 * <p> 964 * <pre> 965 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 966 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 967 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 968 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 969 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 970 * | ('"' ('yes' | 'no') '"')) 971 * 972 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 973 * </pre> 974 * 975 * @param scanningTextDecl True if a text declaration is to 976 * be scanned instead of an XML 977 * declaration. 978 */ 979 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 980 throws IOException, XNIException { 981 982 // scan decl 983 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 984 fMarkupDepth--; 985 986 // pseudo-attribute values 987 String version = fStrings[0]; 988 String encoding = fStrings[1]; 989 String standalone = fStrings[2]; 990 fDeclaredEncoding = encoding; 991 // set standalone 992 fStandaloneSet = standalone != null; 993 fStandalone = fStandaloneSet && standalone.equals("yes"); 994 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 995 //but this information is only related with Document Entity. 996 fEntityManager.setStandalone(fStandalone); 997 998 999 // call handler 1000 if (fDocumentHandler != null) { 1001 if (scanningTextDecl) { 1002 fDocumentHandler.textDecl(version, encoding, null); 1003 } else { 1004 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 1005 } 1006 } 1007 1008 if(version != null){ 1009 fEntityScanner.setVersion(version); 1010 fEntityScanner.setXMLVersion(version); 1011 } 1012 // set encoding on reader, only if encoding was not specified by the application explicitly 1013 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 1014 fEntityScanner.setEncoding(encoding); 1015 } 1016 1017 } // scanXMLDeclOrTextDecl(boolean) 1018 1019 public String getPITarget(){ 1020 return fPITarget ; 1021 } 1022 1023 public XMLStringBuffer getPIData(){ 1024 return fContentBuffer ; 1025 } 1026 1027 //XXX: why not this function behave as per the state of the parser? 1028 public XMLString getCharacterData(){ 1029 if(fUsebuffer){ 1030 return fContentBuffer ; 1031 }else{ 1032 return fTempString; 1033 } 1034 1035 } 1036 1037 1038 /** 1039 * Scans a processing data. This is needed to handle the situation 1040 * where a document starts with a processing instruction whose 1041 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1042 * 1043 * @param target The PI target 1044 * @param data The XMLStringBuffer to fill in with the data 1045 */ 1046 protected void scanPIData(String target, XMLStringBuffer data) 1047 throws IOException, XNIException { 1048 1049 super.scanPIData(target, data); 1050 1051 //set the PI target and values 1052 fPITarget = target ; 1053 1054 fMarkupDepth--; 1055 1056 } // scanPIData(String) 1057 1058 /** 1059 * Scans a comment. 1060 * <p> 1061 * <pre> 1062 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1063 * </pre> 1064 * <p> 1065 * <strong>Note:</strong> Called after scanning past '<!--' 1066 */ 1067 protected void scanComment() throws IOException, XNIException { 1068 fContentBuffer.clear(); 1069 scanComment(fContentBuffer); 1070 //getTextCharacters can also be called for reading comments 1071 fUsebuffer = true; 1072 fMarkupDepth--; 1073 1074 } // scanComment() 1075 1076 //xxx value returned by this function may not remain valid if another event is scanned. 1077 public String getComment(){ 1078 return fContentBuffer.toString(); 1079 } 1080 1081 void addElement(String rawname){ 1082 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1083 //storing element raw name in a linear list of array 1084 fElementArray[fElementPointer] = rawname ; 1085 //storing elemnetPointer for particular element depth 1086 1087 if(DEBUG_SKIP_ALGORITHM){ 1088 StringBuffer sb = new StringBuffer() ; 1089 sb.append(" Storing element information ") ; 1090 sb.append(" fElementPointer = " + fElementPointer) ; 1091 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1092 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1093 System.out.println(sb.toString()) ; 1094 } 1095 1096 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1097 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1098 short column = storePointerForADepth(fElementPointer); 1099 if(column > 0){ 1100 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1101 //identity comparison shouldn't take much time and we can rely on this 1102 //since its guaranteed to have same object id for same string. 1103 if(rawname == fElementArray[pointer]){ 1104 fShouldSkip = true ; 1105 fLastPointerLocation = pointer ; 1106 //reset the things and return. 1107 resetPointer((short)fElementStack.fDepth , column) ; 1108 fElementArray[fElementPointer] = null ; 1109 return ; 1110 }else{ 1111 fShouldSkip = false ; 1112 } 1113 } 1114 } 1115 fElementPointer++ ; 1116 } 1117 } 1118 1119 1120 void resetPointer(short depth, short column){ 1121 fPointerInfo[depth] [column] = (short)0; 1122 } 1123 1124 //returns column information at which pointer was stored. 1125 short storePointerForADepth(short elementPointer){ 1126 short depth = (short) fElementStack.fDepth ; 1127 1128 //Stores element pointer locations at particular depth , only 4 pointer locations 1129 //are stored at particular depth for now. 1130 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1131 1132 if(canStore(depth, i)){ 1133 fPointerInfo[depth][i] = elementPointer ; 1134 if(DEBUG_SKIP_ALGORITHM){ 1135 StringBuffer sb = new StringBuffer() ; 1136 sb.append(" Pointer information ") ; 1137 sb.append(" fElementPointer = " + fElementPointer) ; 1138 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1139 sb.append(" column = " + i ) ; 1140 System.out.println(sb.toString()) ; 1141 } 1142 return i; 1143 } 1144 //else 1145 //pointer was not stored because we reached the limit 1146 } 1147 return -1 ; 1148 } 1149 1150 boolean canStore(short depth, short column){ 1151 //colum = 0 , means first element at particular depth 1152 //column = 1, means second element at particular depth 1153 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1154 return fPointerInfo[depth][column] == 0 ? true : false ; 1155 } 1156 1157 1158 short getElementPointer(short depth, short column){ 1159 //colum = 0 , means first element at particular depth 1160 //column = 1, means second element at particular depth 1161 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1162 return fPointerInfo[depth][column] ; 1163 } 1164 1165 //this function assumes that string passed is not null and skips 1166 //the following string from the buffer this makes sure 1167 boolean skipFromTheBuffer(String rawname) throws IOException{ 1168 if(fEntityScanner.skipString(rawname)){ 1169 char c = (char)fEntityScanner.peekChar() ; 1170 //If the start element was completely skipped we should encounter either ' '(space), 1171 //or '/' (in case of empty element) or '>' 1172 if( c == ' ' || c == '/' || c == '>'){ 1173 fElementRawname = rawname ; 1174 return true ; 1175 } else{ 1176 return false; 1177 } 1178 } else 1179 return false ; 1180 } 1181 1182 boolean skipQElement(String rawname) throws IOException{ 1183 1184 final int c = fEntityScanner.getChar(rawname.length()); 1185 //if this character is still valid element name -- this means string can't match 1186 if(XMLChar.isName(c)){ 1187 return false; 1188 }else{ 1189 return fEntityScanner.skipString(rawname); 1190 } 1191 } 1192 1193 protected boolean skipElement() throws IOException { 1194 1195 if(!fShouldSkip) return false ; 1196 1197 if(fLastPointerLocation != 0){ 1198 //Look at the next element stored in the array list.. we might just get a match. 1199 String rawname = fElementArray[fLastPointerLocation + 1] ; 1200 if(rawname != null && skipFromTheBuffer(rawname)){ 1201 fLastPointerLocation++ ; 1202 if(DEBUG_SKIP_ALGORITHM){ 1203 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1204 } 1205 return true ; 1206 } else{ 1207 //reset it back to zero... we haven't got the correct subset yet. 1208 fLastPointerLocation = 0 ; 1209 1210 } 1211 } 1212 //xxx: we can put some logic here as from what column it should start looking 1213 //for now we always start at 0 1214 //fallback to tolerant algorithm, it would look for differnt element stored at different 1215 //depth and get us the pointer location. 1216 return fShouldSkip && skipElement((short)0); 1217 1218 } 1219 1220 //start of the column at which it should try searching 1221 boolean skipElement(short column) throws IOException { 1222 short depth = (short)fElementStack.fDepth ; 1223 1224 if(depth > MAX_DEPTH_LIMIT){ 1225 return fShouldSkip = false ; 1226 } 1227 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1228 short pointer = getElementPointer(depth , i ) ; 1229 1230 if(pointer == 0){ 1231 return fShouldSkip = false ; 1232 } 1233 1234 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1235 if(DEBUG_SKIP_ALGORITHM){ 1236 System.out.println(); 1237 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1238 System.out.println(); 1239 } 1240 fLastPointerLocation = pointer ; 1241 return fShouldSkip = true ; 1242 } 1243 } 1244 return fShouldSkip = false ; 1245 } 1246 1247 /** 1248 * Scans a start element. This method will handle the binding of 1249 * namespace information and notifying the handler of the start 1250 * of the element. 1251 * <p> 1252 * <pre> 1253 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1254 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1255 * </pre> 1256 * <p> 1257 * <strong>Note:</strong> This method assumes that the leading 1258 * '<' character has been consumed. 1259 * <p> 1260 * <strong>Note:</strong> This method uses the fElementQName and 1261 * fAttributes variables. The contents of these variables will be 1262 * destroyed. The caller should copy important information out of 1263 * these variables before calling this method. 1264 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1265 * 1266 * @return True if element is empty. (i.e. It matches 1267 * production [44]. 1268 */ 1269 // fElementQName will have the details of element just read.. 1270 // fAttributes will have the details of all the attributes. 1271 protected boolean scanStartElement() 1272 throws IOException, XNIException { 1273 1274 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1275 //when skipping is true and no more elements should be added 1276 if(fSkip && !fAdd){ 1277 //get the stored element -- if everything goes right this should match the 1278 //token in the buffer 1279 1280 QName name = fElementStack.getNext(); 1281 1282 if(DEBUG_SKIP_ALGORITHM){ 1283 System.out.println("Trying to skip String = " + name.rawname); 1284 } 1285 1286 //Be conservative -- if skipping fails -- stop. 1287 fSkip = fEntityScanner.skipString(name.rawname); 1288 1289 if(fSkip){ 1290 if(DEBUG_SKIP_ALGORITHM){ 1291 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1292 } 1293 fElementStack.push(); 1294 fElementQName = name; 1295 }else{ 1296 //if skipping fails reposition the stack or fallback to normal way of processing 1297 fElementStack.reposition(); 1298 if(DEBUG_SKIP_ALGORITHM){ 1299 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1300 } 1301 } 1302 } 1303 1304 //we are still at the stage of adding elements 1305 //the elements were not matched or 1306 //fSkip is not set to true 1307 if(!fSkip || fAdd){ 1308 //get the next element from the stack 1309 fElementQName = fElementStack.nextElement(); 1310 // name 1311 if (fNamespaces) { 1312 fEntityScanner.scanQName(fElementQName); 1313 } else { 1314 String name = fEntityScanner.scanName(); 1315 fElementQName.setValues(null, name, name, null); 1316 } 1317 1318 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1319 if(DEBUG_SKIP_ALGORITHM){ 1320 if(fAdd){ 1321 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1322 } 1323 } 1324 1325 } 1326 1327 //when the elements are being added , we need to check if we are set for skipping the elements 1328 if(fAdd){ 1329 //this sets the value of fAdd variable 1330 fElementStack.matchElement(fElementQName); 1331 } 1332 1333 1334 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1335 fCurrentElement = fElementQName; 1336 1337 String rawname = fElementQName.rawname; 1338 1339 fEmptyElement = false; 1340 1341 fAttributes.removeAllAttributes(); 1342 1343 checkDepth(rawname); 1344 if(!seekCloseOfStartTag()){ 1345 fReadingAttributes = true; 1346 fAttributeCacheUsedCount =0; 1347 fStringBufferIndex =0; 1348 fAddDefaultAttr = true; 1349 do { 1350 scanAttribute(fAttributes); 1351 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1352 fAttributes.getLength() > fElementAttributeLimit){ 1353 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1354 "ElementAttributeLimit", 1355 new Object[]{rawname, fElementAttributeLimit }, 1356 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1357 } 1358 1359 } while (!seekCloseOfStartTag()); 1360 fReadingAttributes=false; 1361 } 1362 1363 if (fEmptyElement) { 1364 //decrease the markup depth.. 1365 fMarkupDepth--; 1366 1367 // check that this element was opened in the same entity 1368 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1369 reportFatalError("ElementEntityMismatch", 1370 new Object[]{fCurrentElement.rawname}); 1371 } 1372 // call handler 1373 if (fDocumentHandler != null) { 1374 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1375 } 1376 1377 //We should not be popping out the context here in endELement becaause the namespace context is still 1378 //valid when parser is at the endElement state. 1379 //if (fNamespaces) { 1380 // fNamespaceContext.popContext(); 1381 //} 1382 1383 //pop the element off the stack.. 1384 fElementStack.popElement(); 1385 1386 } else { 1387 1388 if(dtdGrammarUtil != null) 1389 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1390 if(fDocumentHandler != null){ 1391 //complete element and attributes are traversed in this function so we can send a callback 1392 //here. 1393 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1394 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1395 } 1396 } 1397 1398 1399 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1400 return fEmptyElement; 1401 1402 } // scanStartElement():boolean 1403 1404 /** 1405 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1406 * Characters are consumed. 1407 */ 1408 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1409 // spaces 1410 boolean sawSpace = fEntityScanner.skipSpaces(); 1411 1412 // end tag? 1413 final int c = fEntityScanner.peekChar(); 1414 if (c == '>') { 1415 fEntityScanner.scanChar(); 1416 return true; 1417 } else if (c == '/') { 1418 fEntityScanner.scanChar(); 1419 if (!fEntityScanner.skipChar('>')) { 1420 reportFatalError("ElementUnterminated", 1421 new Object[]{fElementQName.rawname}); 1422 } 1423 fEmptyElement = true; 1424 return true; 1425 } else if (!isValidNameStartChar(c) || !sawSpace) { 1426 reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); 1427 } 1428 1429 return false; 1430 } 1431 1432 public boolean hasAttributes(){ 1433 return fAttributes.getLength() > 0 ? true : false ; 1434 } 1435 1436 1437 /** 1438 * Scans an attribute. 1439 * <p> 1440 * <pre> 1441 * [41] Attribute ::= Name Eq AttValue 1442 * </pre> 1443 * <p> 1444 * <strong>Note:</strong> This method assumes that the next 1445 * character on the stream is the first character of the attribute 1446 * name. 1447 * <p> 1448 * <strong>Note:</strong> This method uses the fAttributeQName and 1449 * fQName variables. The contents of these variables will be 1450 * destroyed. 1451 * 1452 * @param attributes The attributes list for the scanned attribute. 1453 */ 1454 1455 /** 1456 * protected void scanAttribute(AttributeIteratorImpl attributes) 1457 * throws IOException, XNIException { 1458 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1459 * 1460 * 1461 * // name 1462 * if (fNamespaces) { 1463 * fEntityScanner.scanQName(fAttributeQName); 1464 * } 1465 * else { 1466 * String name = fEntityScanner.scanName(); 1467 * fAttributeQName.setValues(null, name, name, null); 1468 * } 1469 * 1470 * // equals 1471 * fEntityScanner.skipSpaces(); 1472 * if (!fEntityScanner.skipChar('=')) { 1473 * reportFatalError("EqRequiredInAttribute", 1474 * new Object[]{fAttributeQName.rawname}); 1475 * } 1476 * fEntityScanner.skipSpaces(); 1477 * 1478 * 1479 * // content 1480 * int oldLen = attributes.getLength(); 1481 */ 1482 /**xxx there is one check of duplicate attribute that has been removed. 1483 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1484 * 1485 * // WFC: Unique Att Spec 1486 * if (oldLen == attributes.getLength()) { 1487 * reportFatalError("AttributeNotUnique", 1488 * new Object[]{fCurrentElement.rawname, 1489 * fAttributeQName.rawname}); 1490 * } 1491 */ 1492 1493 /* 1494 //REVISIT: one more case needs to be included: external PE and standalone is no 1495 boolean isVC = fHasExternalDTD && !fStandalone; 1496 scanAttributeValue(fTempString, fTempString2, 1497 fAttributeQName.rawname, attributes, 1498 oldLen, isVC); 1499 1500 //attributes.setValue(oldLen, fTempString.toString()); 1501 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1502 //attributes.setSpecified(oldLen, true); 1503 1504 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1505 fAttributes.addAttribute(attribute); 1506 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1507 } // scanAttribute(XMLAttributes) 1508 1509 */ 1510 1511 /** return the attribute iterator implementation */ 1512 public XMLAttributesIteratorImpl getAttributeIterator(){ 1513 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1514 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1515 fAddDefaultAttr = false; 1516 } 1517 return fAttributes; 1518 } 1519 1520 /** return if standalone is set */ 1521 public boolean standaloneSet(){ 1522 return fStandaloneSet; 1523 } 1524 /** return if the doucment is standalone */ 1525 public boolean isStandAlone(){ 1526 return fStandalone ; 1527 } 1528 /** 1529 * Scans an attribute name value pair. 1530 * <p> 1531 * <pre> 1532 * [41] Attribute ::= Name Eq AttValue 1533 * </pre> 1534 * <p> 1535 * <strong>Note:</strong> This method assumes that the next 1536 * character on the stream is the first character of the attribute 1537 * name. 1538 * <p> 1539 * <strong>Note:</strong> This method uses the fAttributeQName and 1540 * fQName variables. The contents of these variables will be 1541 * destroyed. 1542 * 1543 * @param attributes The attributes list for the scanned attribute. 1544 */ 1545 1546 protected void scanAttribute(XMLAttributes attributes) 1547 throws IOException, XNIException { 1548 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1549 1550 // name 1551 if (fNamespaces) { 1552 fEntityScanner.scanQName(fAttributeQName); 1553 } else { 1554 String name = fEntityScanner.scanName(); 1555 fAttributeQName.setValues(null, name, name, null); 1556 } 1557 1558 // equals 1559 fEntityScanner.skipSpaces(); 1560 if (!fEntityScanner.skipChar('=')) { 1561 reportFatalError("EqRequiredInAttribute", 1562 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1563 } 1564 fEntityScanner.skipSpaces(); 1565 1566 int attIndex = 0 ; 1567 //REVISIT: one more case needs to be included: external PE and standalone is no 1568 boolean isVC = fHasExternalDTD && !fStandalone; 1569 //fTempString would store attribute value 1570 ///fTempString2 would store attribute non-normalized value 1571 1572 //this function doesn't use 'attIndex'. We are adding the attribute later 1573 //after we have figured out that current attribute is not namespace declaration 1574 //since scanAttributeValue doesn't use attIndex parameter therefore we 1575 //can safely add the attribute later.. 1576 XMLString tmpStr = getString(); 1577 1578 scanAttributeValue(tmpStr, fTempString2, 1579 fAttributeQName.rawname, attributes, 1580 attIndex, isVC, fCurrentElement.rawname); 1581 1582 // content 1583 int oldLen = attributes.getLength(); 1584 //if the attribute name already exists.. new value is replaced with old value 1585 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1586 1587 // WFC: Unique Att Spec 1588 //attributes count will be same if the current attribute name already exists for this element name. 1589 //this means there are two duplicate attributes. 1590 if (oldLen == attributes.getLength()) { 1591 reportFatalError("AttributeNotUnique", 1592 new Object[]{fCurrentElement.rawname, 1593 fAttributeQName.rawname}); 1594 } 1595 1596 //tmpString contains attribute value 1597 //we are passing null as the attribute value 1598 attributes.setValue(attIndex, null, tmpStr); 1599 1600 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1601 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1602 attributes.setSpecified(attIndex, true); 1603 1604 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1605 1606 } // scanAttribute(XMLAttributes) 1607 1608 /** 1609 * Scans element content. 1610 * 1611 * @return Returns the next character on the stream. 1612 */ 1613 //CHANGED: 1614 //EARLIER: scanContent() 1615 //NOW: scanContent(XMLStringBuffer) 1616 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1617 //this function appends the data to the buffer. 1618 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1619 //set the fTempString length to 0 before passing it on to scanContent 1620 //scanContent sets the correct co-ordinates as per the content read 1621 fTempString.length = 0; 1622 int c = fEntityScanner.scanContent(fTempString); 1623 content.append(fTempString); 1624 fTempString.length = 0; 1625 if (c == '\r') { 1626 // happens when there is the character reference 1627 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1628 fEntityScanner.scanChar(); 1629 content.append((char)c); 1630 c = -1; 1631 } else if (c == ']') { 1632 //fStringBuffer.clear(); 1633 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1634 content.append((char)fEntityScanner.scanChar()); 1635 // remember where we are in case we get an endEntity before we 1636 // could flush the buffer out - this happens when we're parsing an 1637 // entity which ends with a ] 1638 fInScanContent = true; 1639 // 1640 // We work on a single character basis to handle cases such as: 1641 // ']]]>' which we might otherwise miss. 1642 // 1643 if (fEntityScanner.skipChar(']')) { 1644 content.append(']'); 1645 while (fEntityScanner.skipChar(']')) { 1646 content.append(']'); 1647 } 1648 if (fEntityScanner.skipChar('>')) { 1649 reportFatalError("CDEndInContent", null); 1650 } 1651 } 1652 fInScanContent = false; 1653 c = -1; 1654 } 1655 if (fDocumentHandler != null && content.length > 0) { 1656 //fDocumentHandler.characters(content, null); 1657 } 1658 return c; 1659 1660 } // scanContent():int 1661 1662 1663 /** 1664 * Scans a CDATA section. 1665 * <p> 1666 * <strong>Note:</strong> This method uses the fTempString and 1667 * fStringBuffer variables. 1668 * 1669 * @param complete True if the CDATA section is to be scanned 1670 * completely. 1671 * 1672 * @return True if CDATA is completely scanned. 1673 */ 1674 //CHANGED: 1675 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1676 throws IOException, XNIException { 1677 1678 // call handler 1679 if (fDocumentHandler != null) { 1680 //fDocumentHandler.startCDATA(null); 1681 } 1682 1683 while (true) { 1684 //scanData will fill the contentBuffer 1685 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1686 break ; 1687 /** We dont need all this code if we pass ']]>' as delimeter.. 1688 * int brackets = 2; 1689 * while (fEntityScanner.skipChar(']')) { 1690 * brackets++; 1691 * } 1692 * 1693 * //When we find more than 2 square brackets 1694 * if (fDocumentHandler != null && brackets > 2) { 1695 * //we dont need to clear the buffer.. 1696 * //contentBuffer.clear(); 1697 * for (int i = 2; i < brackets; i++) { 1698 * contentBuffer.append(']'); 1699 * } 1700 * fDocumentHandler.characters(contentBuffer, null); 1701 * } 1702 * 1703 * if (fEntityScanner.skipChar('>')) { 1704 * break; 1705 * } 1706 * if (fDocumentHandler != null) { 1707 * //we dont need to clear the buffer now.. 1708 * //contentBuffer.clear(); 1709 * contentBuffer.append("]]"); 1710 * fDocumentHandler.characters(contentBuffer, null); 1711 * } 1712 **/ 1713 } else { 1714 int c = fEntityScanner.peekChar(); 1715 if (c != -1 && isInvalidLiteral(c)) { 1716 if (XMLChar.isHighSurrogate(c)) { 1717 //contentBuffer.clear(); 1718 //scan surrogates if any.... 1719 scanSurrogates(contentBuffer); 1720 } else { 1721 reportFatalError("InvalidCharInCDSect", 1722 new Object[]{Integer.toString(c,16)}); 1723 fEntityScanner.scanChar(); 1724 } 1725 } 1726 //by this time we have also read surrogate contents if any... 1727 if (fDocumentHandler != null) { 1728 //fDocumentHandler.characters(contentBuffer, null); 1729 } 1730 } 1731 } 1732 fMarkupDepth--; 1733 1734 if (fDocumentHandler != null && contentBuffer.length > 0) { 1735 //fDocumentHandler.characters(contentBuffer, null); 1736 } 1737 1738 // call handler 1739 if (fDocumentHandler != null) { 1740 //fDocumentHandler.endCDATA(null); 1741 } 1742 1743 return true; 1744 1745 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1746 1747 /** 1748 * Scans an end element. 1749 * <p> 1750 * <pre> 1751 * [42] ETag ::= '</' Name S? '>' 1752 * </pre> 1753 * <p> 1754 * <strong>Note:</strong> This method uses the fElementQName variable. 1755 * The contents of this variable will be destroyed. The caller should 1756 * copy the needed information out of this variable before calling 1757 * this method. 1758 * 1759 * @return The element depth. 1760 */ 1761 protected int scanEndElement() throws IOException, XNIException { 1762 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1763 1764 // pop context 1765 QName endElementName = fElementStack.popElement(); 1766 1767 String rawname = endElementName.rawname; 1768 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1769 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1770 //In scanners most of the time is consumed on checks done for XML characters, we can 1771 // optimize on it and avoid the checks done for endElement, 1772 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1773 1774 // this should work both for namespace processing true or false... 1775 1776 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1777 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1778 1779 if (!fEntityScanner.skipString(endElementName.rawname)) { 1780 reportFatalError("ETagRequired", new Object[]{rawname}); 1781 } 1782 1783 // end 1784 fEntityScanner.skipSpaces(); 1785 if (!fEntityScanner.skipChar('>')) { 1786 reportFatalError("ETagUnterminated", 1787 new Object[]{rawname}); 1788 } 1789 fMarkupDepth--; 1790 1791 //we have increased the depth for two markup "<" characters 1792 fMarkupDepth--; 1793 1794 // check that this element was opened in the same entity 1795 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1796 reportFatalError("ElementEntityMismatch", 1797 new Object[]{rawname}); 1798 } 1799 1800 //We should not be popping out the context here in endELement becaause the namespace context is still 1801 //valid when parser is at the endElement state. 1802 1803 //if (fNamespaces) { 1804 // fNamespaceContext.popContext(); 1805 //} 1806 1807 // call handler 1808 if (fDocumentHandler != null ) { 1809 //end element is scanned in this function so we can send a callback 1810 //here. 1811 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1812 1813 fDocumentHandler.endElement(endElementName, null); 1814 } 1815 if(dtdGrammarUtil != null) 1816 dtdGrammarUtil.endElement(endElementName); 1817 1818 return fMarkupDepth; 1819 1820 } // scanEndElement():int 1821 1822 /** 1823 * Scans a character reference. 1824 * <p> 1825 * <pre> 1826 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1827 * </pre> 1828 */ 1829 protected void scanCharReference() 1830 throws IOException, XNIException { 1831 1832 fStringBuffer2.clear(); 1833 int ch = scanCharReferenceValue(fStringBuffer2, null); 1834 fMarkupDepth--; 1835 if (ch != -1) { 1836 // call handler 1837 1838 if (fDocumentHandler != null) { 1839 if (fNotifyCharRefs) { 1840 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1841 } 1842 Augmentations augs = null; 1843 if (fValidation && ch <= 0x20) { 1844 if (fTempAugmentations != null) { 1845 fTempAugmentations.removeAllItems(); 1846 } 1847 else { 1848 fTempAugmentations = new AugmentationsImpl(); 1849 } 1850 augs = fTempAugmentations; 1851 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1852 } 1853 //xxx: How do we deal with this - how to return charReferenceValues 1854 //now this is being commented because this is taken care in scanDocument() 1855 //fDocumentHandler.characters(fStringBuffer2, null); 1856 if (fNotifyCharRefs) { 1857 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1858 } 1859 } 1860 } 1861 1862 } // scanCharReference() 1863 1864 1865 /** 1866 * Scans an entity reference. 1867 * 1868 * @return returns true if the new entity is started. If it was built-in entity 1869 * 'false' is returned. 1870 * @throws IOException Thrown if i/o error occurs. 1871 * @throws XNIException Thrown if handler throws exception upon 1872 * notification. 1873 */ 1874 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1875 String name = fEntityScanner.scanName(); 1876 if (name == null) { 1877 reportFatalError("NameRequiredInReference", null); 1878 return; 1879 } 1880 if (!fEntityScanner.skipChar(';')) { 1881 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1882 } 1883 if (fEntityStore.isUnparsedEntity(name)) { 1884 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1885 } 1886 fMarkupDepth--; 1887 fCurrentEntityName = name; 1888 1889 // handle built-in entities 1890 if (name == fAmpSymbol) { 1891 handleCharacter('&', fAmpSymbol, content); 1892 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1893 return ; 1894 } else if (name == fLtSymbol) { 1895 handleCharacter('<', fLtSymbol, content); 1896 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1897 return ; 1898 } else if (name == fGtSymbol) { 1899 handleCharacter('>', fGtSymbol, content); 1900 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1901 return ; 1902 } else if (name == fQuotSymbol) { 1903 handleCharacter('"', fQuotSymbol, content); 1904 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1905 return ; 1906 } else if (name == fAposSymbol) { 1907 handleCharacter('\'', fAposSymbol, content); 1908 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1909 return ; 1910 } 1911 1912 //1. if the entity is external and support to external entities is not required 1913 // 2. or entities should not be replaced 1914 //3. or if it is built in entity reference. 1915 boolean isEE = fEntityStore.isExternalEntity(name); 1916 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1917 fScannerState = SCANNER_STATE_REFERENCE; 1918 return ; 1919 } 1920 // start general entity 1921 if (!fEntityStore.isDeclaredEntity(name)) { 1922 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1923 if (!fSupportDTD && fReplaceEntityReferences) { 1924 reportFatalError("EntityNotDeclared", new Object[]{name}); 1925 return; 1926 } 1927 //REVISIT: one more case needs to be included: external PE and standalone is no 1928 if ( fHasExternalDTD && !fStandalone) { 1929 if (fValidation) 1930 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1931 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1932 } else 1933 reportFatalError("EntityNotDeclared", new Object[]{name}); 1934 } 1935 //we are starting the entity even if the entity was not declared 1936 //if that was the case it its taken care in XMLEntityManager.startEntity() 1937 //we immediately call the endEntity. Application gets to know if there was 1938 //any entity that was not declared. 1939 fEntityManager.startEntity(name, false); 1940 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1941 //setScannerState(SCANNER_STATE_CONTENT); 1942 //return true ; 1943 } // scanEntityReference() 1944 1945 // utility methods 1946 1947 /** 1948 * Check if the depth exceeds the maxElementDepth limit 1949 * @param elementName name of the current element 1950 */ 1951 void checkDepth(String elementName) { 1952 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1953 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1954 fSecurityManager.debugPrint(fLimitAnalyzer); 1955 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1956 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1957 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1958 "maxElementDepth"}); 1959 } 1960 } 1961 1962 /** 1963 * Calls document handler with a single character resulting from 1964 * built-in entity resolution. 1965 * 1966 * @param c 1967 * @param entity built-in name 1968 * @param XMLStringBuffer append the character to buffer 1969 * 1970 * we really dont need to call this function -- this function is only required when 1971 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1972 * calling this function to hanlde built-in entity reference. 1973 * 1974 */ 1975 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1976 foundBuiltInRefs = true; 1977 content.append(c); 1978 if (fDocumentHandler != null) { 1979 fSingleChar[0] = c; 1980 if (fNotifyBuiltInRefs) { 1981 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1982 } 1983 fTempString.setValues(fSingleChar, 0, 1); 1984 if(!fIsCoalesce){ 1985 fDocumentHandler.characters(fTempString, null); 1986 builtInRefCharacterHandled = true; 1987 } 1988 1989 if (fNotifyBuiltInRefs) { 1990 fDocumentHandler.endGeneralEntity(entity, null); 1991 } 1992 } 1993 } // handleCharacter(char) 1994 1995 // helper methods 1996 1997 /** 1998 * Sets the scanner state. 1999 * 2000 * @param state The new scanner state. 2001 */ 2002 protected final void setScannerState(int state) { 2003 2004 fScannerState = state; 2005 if (DEBUG_SCANNER_STATE) { 2006 System.out.print("### setScannerState: "); 2007 //System.out.print(fScannerState); 2008 System.out.print(getScannerStateName(state)); 2009 System.out.println(); 2010 } 2011 2012 } // setScannerState(int) 2013 2014 2015 /** 2016 * Sets the Driver. 2017 * 2018 * @param Driver The new Driver. 2019 */ 2020 protected final void setDriver(Driver driver) { 2021 fDriver = driver; 2022 if (DEBUG_DISPATCHER) { 2023 System.out.print("%%% setDriver: "); 2024 System.out.print(getDriverName(driver)); 2025 System.out.println(); 2026 } 2027 } 2028 2029 // 2030 // Private methods 2031 // 2032 2033 /** Returns the scanner state name. */ 2034 protected String getScannerStateName(int state) { 2035 2036 switch (state) { 2037 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 2038 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 2039 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 2040 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 2041 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 2042 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 2043 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 2044 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 2045 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 2046 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 2047 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 2048 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 2049 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 2050 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 2051 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 2052 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 2053 } 2054 2055 return "??? ("+state+')'; 2056 2057 } // getScannerStateName(int):String 2058 public String getEntityName(){ 2059 //return the cached name 2060 return fCurrentEntityName; 2061 } 2062 2063 /** Returns the driver name. */ 2064 public String getDriverName(Driver driver) { 2065 2066 if (DEBUG_DISPATCHER) { 2067 if (driver != null) { 2068 String name = driver.getClass().getName(); 2069 int index = name.lastIndexOf('.'); 2070 if (index != -1) { 2071 name = name.substring(index + 1); 2072 index = name.lastIndexOf('$'); 2073 if (index != -1) { 2074 name = name.substring(index + 1); 2075 } 2076 } 2077 return name; 2078 } 2079 } 2080 return "null"; 2081 2082 } // getDriverName():String 2083 2084 /** 2085 * Check the protocol used in the systemId against allowed protocols 2086 * 2087 * @param systemId the Id of the URI 2088 * @param allowedProtocols a list of allowed protocols separated by comma 2089 * @return the name of the protocol if rejected, null otherwise 2090 */ 2091 String checkAccess(String systemId, String allowedProtocols) throws IOException { 2092 String baseSystemId = fEntityScanner.getBaseSystemId(); 2093 String expandedSystemId = fEntityManager.expandSystemId(systemId, baseSystemId,fStrictURI); 2094 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 2095 } 2096 2097 // 2098 // Classes 2099 // 2100 2101 /** 2102 * @author Neeraj Bajaj, Sun Microsystems. 2103 */ 2104 protected static final class Element { 2105 2106 // 2107 // Data 2108 // 2109 2110 /** Symbol. */ 2111 public QName qname; 2112 2113 //raw name stored as characters 2114 public char[] fRawname; 2115 2116 /** The next Element entry. */ 2117 public Element next; 2118 2119 // 2120 // Constructors 2121 // 2122 2123 /** 2124 * Constructs a new Element from the given QName and next Element 2125 * reference. 2126 */ 2127 public Element(QName qname, Element next) { 2128 this.qname.setValues(qname); 2129 this.fRawname = qname.rawname.toCharArray(); 2130 this.next = next; 2131 } 2132 2133 } // class Element 2134 2135 /** 2136 * Element stack. 2137 * 2138 * @author Neeraj Bajaj, Sun Microsystems. 2139 */ 2140 protected class ElementStack2 { 2141 2142 // 2143 // Data 2144 // 2145 2146 /** The stack data. */ 2147 protected QName [] fQName = new QName[20]; 2148 2149 //Element depth 2150 protected int fDepth; 2151 //total number of elements 2152 protected int fCount; 2153 //current position 2154 protected int fPosition; 2155 //Mark refers to the position 2156 protected int fMark; 2157 2158 protected int fLastDepth ; 2159 2160 // 2161 // Constructors 2162 // 2163 2164 /** Default constructor. */ 2165 public ElementStack2() { 2166 for (int i = 0; i < fQName.length; i++) { 2167 fQName[i] = new QName(); 2168 } 2169 fMark = fPosition = 1; 2170 } // <init>() 2171 2172 public void resize(){ 2173 /** 2174 * int length = fElements.length; 2175 * Element [] temp = new Element[length * 2]; 2176 * System.arraycopy(fElements, 0, temp, 0, length); 2177 * fElements = temp; 2178 */ 2179 //resize QNames 2180 int oldLength = fQName.length; 2181 QName [] tmp = new QName[oldLength * 2]; 2182 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2183 fQName = tmp; 2184 2185 for (int i = oldLength; i < fQName.length; i++) { 2186 fQName[i] = new QName(); 2187 } 2188 2189 } 2190 2191 2192 // 2193 // Public methods 2194 // 2195 2196 /** Check if the element scanned during the start element 2197 *matches the stored element. 2198 * 2199 *@return true if the match suceeds. 2200 */ 2201 public boolean matchElement(QName element) { 2202 //last depth is the depth when last elemnt was pushed 2203 //if last depth is greater than current depth 2204 if(DEBUG_SKIP_ALGORITHM){ 2205 System.out.println("fLastDepth = " + fLastDepth); 2206 System.out.println("fDepth = " + fDepth); 2207 } 2208 boolean match = false; 2209 if(fLastDepth > fDepth && fDepth <= 2){ 2210 if(DEBUG_SKIP_ALGORITHM){ 2211 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2212 } 2213 if(element.rawname == fQName[fDepth].rawname){ 2214 fAdd = false; 2215 //mark this position 2216 //decrease the depth by 1 as arrays are 0 based 2217 fMark = fDepth - 1; 2218 //we found the match and from next element skipping will start, add 1 2219 fPosition = fMark + 1 ; 2220 match = true; 2221 //Once we get match decrease the count -- this was increased by nextElement() 2222 --fCount; 2223 if(DEBUG_SKIP_ALGORITHM){ 2224 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2225 System.out.println("fMark = " + fMark); 2226 System.out.println("fPosition = " + fPosition); 2227 System.out.println("fDepth = " + fDepth); 2228 System.out.println("fCount = " + fCount); 2229 } 2230 }else{ 2231 fAdd = true; 2232 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2233 } 2234 } 2235 //store the last depth 2236 fLastDepth = fDepth++; 2237 return match; 2238 } // pushElement(QName):QName 2239 2240 /** 2241 * This function doesn't increase depth. The function in this function is 2242 *broken down into two functions for efficiency. <@see>matchElement</see>. 2243 * This function just returns the pointer to the object and its values are set. 2244 * 2245 *@return QName reference to the next element in the list 2246 */ 2247 public QName nextElement() { 2248 2249 //if number of elements becomes equal to the length of array -- stop the skipping 2250 if (fCount == fQName.length) { 2251 fShouldSkip = false; 2252 fAdd = false; 2253 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2254 //xxx: this is not correct, we are returning the last element 2255 //this wont make any difference since flag has been set to 'false' 2256 return fQName[--fCount]; 2257 } 2258 if(DEBUG_SKIP_ALGORITHM){ 2259 System.out.println("fCount = " + fCount); 2260 } 2261 return fQName[fCount++]; 2262 2263 } 2264 2265 /** Note that this function is considerably different than nextElement() 2266 * This function just returns the previously stored elements 2267 */ 2268 public QName getNext(){ 2269 //when position reaches number of elements in the list.. 2270 //set the position back to mark, making it a circular linked list. 2271 if(fPosition == fCount){ 2272 fPosition = fMark; 2273 } 2274 return fQName[fPosition++]; 2275 } 2276 2277 /** returns the current depth 2278 */ 2279 public int popElement(){ 2280 return fDepth--; 2281 } 2282 2283 2284 /** Clears the stack without throwing away existing QName objects. */ 2285 public void clear() { 2286 fLastDepth = 0; 2287 fDepth = 0; 2288 fCount = 0 ; 2289 fPosition = fMark = 1; 2290 } // clear() 2291 2292 } // class ElementStack 2293 2294 /** 2295 * Element stack. This stack operates without synchronization, error 2296 * checking, and it re-uses objects instead of throwing popped items 2297 * away. 2298 * 2299 * @author Andy Clark, IBM 2300 */ 2301 protected class ElementStack { 2302 2303 // 2304 // Data 2305 // 2306 2307 /** The stack data. */ 2308 protected QName[] fElements; 2309 protected int [] fInt = new int[20]; 2310 2311 2312 //Element depth 2313 protected int fDepth; 2314 //total number of elements 2315 protected int fCount; 2316 //current position 2317 protected int fPosition; 2318 //Mark refers to the position 2319 protected int fMark; 2320 2321 protected int fLastDepth ; 2322 2323 // 2324 // Constructors 2325 // 2326 2327 /** Default constructor. */ 2328 public ElementStack() { 2329 fElements = new QName[20]; 2330 for (int i = 0; i < fElements.length; i++) { 2331 fElements[i] = new QName(); 2332 } 2333 } // <init>() 2334 2335 // 2336 // Public methods 2337 // 2338 2339 /** 2340 * Pushes an element on the stack. 2341 * <p> 2342 * <strong>Note:</strong> The QName values are copied into the 2343 * stack. In other words, the caller does <em>not</em> orphan 2344 * the element to the stack. Also, the QName object returned 2345 * is <em>not</em> orphaned to the caller. It should be 2346 * considered read-only. 2347 * 2348 * @param element The element to push onto the stack. 2349 * 2350 * @return Returns the actual QName object that stores the 2351 */ 2352 //XXX: THIS FUNCTION IS NOT USED 2353 public QName pushElement(QName element) { 2354 if (fDepth == fElements.length) { 2355 QName[] array = new QName[fElements.length * 2]; 2356 System.arraycopy(fElements, 0, array, 0, fDepth); 2357 fElements = array; 2358 for (int i = fDepth; i < fElements.length; i++) { 2359 fElements[i] = new QName(); 2360 } 2361 } 2362 fElements[fDepth].setValues(element); 2363 return fElements[fDepth++]; 2364 } // pushElement(QName):QName 2365 2366 2367 /** Note that this function is considerably different than nextElement() 2368 * This function just returns the previously stored elements 2369 */ 2370 public QName getNext(){ 2371 //when position reaches number of elements in the list.. 2372 //set the position back to mark, making it a circular linked list. 2373 if(fPosition == fCount){ 2374 fPosition = fMark; 2375 } 2376 //store the position of last opened tag at particular depth 2377 //fInt[++fDepth] = fPosition; 2378 if(DEBUG_SKIP_ALGORITHM){ 2379 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2380 } 2381 //return fElements[fPosition++]; 2382 return fElements[fPosition]; 2383 } 2384 2385 /** This function should be called only when element was skipped sucessfully. 2386 * 1. Increase the depth - because element was sucessfully skipped. 2387 *2. Store the position of the element token in array "last opened tag" at depth. 2388 *3. increase the position counter so as to point to the next element in the array 2389 */ 2390 public void push(){ 2391 2392 fInt[++fDepth] = fPosition++; 2393 } 2394 2395 /** Check if the element scanned during the start element 2396 *matches the stored element. 2397 * 2398 *@return true if the match suceeds. 2399 */ 2400 public boolean matchElement(QName element) { 2401 //last depth is the depth when last elemnt was pushed 2402 //if last depth is greater than current depth 2403 //if(DEBUG_SKIP_ALGORITHM){ 2404 // System.out.println("Check if the element " + element.rawname + " matches"); 2405 // System.out.println("fLastDepth = " + fLastDepth); 2406 // System.out.println("fDepth = " + fDepth); 2407 //} 2408 boolean match = false; 2409 if(fLastDepth > fDepth && fDepth <= 3){ 2410 if(DEBUG_SKIP_ALGORITHM){ 2411 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2412 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2413 } 2414 if(element.rawname == fElements[fDepth - 1].rawname){ 2415 fAdd = false; 2416 //mark this position 2417 //decrease the depth by 1 as arrays are 0 based 2418 fMark = fDepth - 1; 2419 //we found the match 2420 fPosition = fMark; 2421 match = true; 2422 //Once we get match decrease the count -- this was increased by nextElement() 2423 --fCount; 2424 if(DEBUG_SKIP_ALGORITHM){ 2425 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2426 System.out.println("fMark = " + fMark); 2427 System.out.println("fPosition = " + fPosition); 2428 System.out.println("fDepth = " + fDepth); 2429 System.out.println("fCount = " + fCount); 2430 System.out.println("---------MATCH SUCEEDED-----------------"); 2431 System.out.println(""); 2432 } 2433 }else{ 2434 fAdd = true; 2435 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2436 } 2437 } 2438 //store the position for the current depth 2439 //when we are adding the elements, when skipping 2440 //starts even then this should be tracked ie. when 2441 //calling getNext() 2442 if(match){ 2443 //from next element skipping will start, add 1 2444 fInt[fDepth] = fPosition++; 2445 } else{ 2446 if(DEBUG_SKIP_ALGORITHM){ 2447 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2448 } 2449 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2450 fInt[fDepth] = fCount - 1; 2451 } 2452 2453 //if number of elements becomes equal to the length of array -- stop the skipping 2454 //xxx: should we do "fCount == fInt.length" 2455 if (fCount == fElements.length) { 2456 fSkip = false; 2457 fAdd = false; 2458 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2459 reposition(); 2460 if(DEBUG_SKIP_ALGORITHM){ 2461 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2462 System.out.println("REPOSITIONING THE STACK"); 2463 System.out.println("-----------SKIPPING STOPPED----------"); 2464 System.out.println(""); 2465 } 2466 return false; 2467 } 2468 if(DEBUG_SKIP_ALGORITHM){ 2469 if(match){ 2470 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2471 }else{ 2472 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2473 } 2474 } 2475 //store the last depth 2476 fLastDepth = fDepth; 2477 return match; 2478 } // matchElement(QName):QName 2479 2480 2481 /** 2482 * Returns the next element on the stack. 2483 * 2484 * @return Returns the actual QName object. Callee should 2485 * use this object to store the details of next element encountered. 2486 */ 2487 public QName nextElement() { 2488 if(fSkip){ 2489 fDepth++; 2490 //boundary checks are done in matchElement() 2491 return fElements[fCount++]; 2492 } else if (fDepth == fElements.length) { 2493 QName[] array = new QName[fElements.length * 2]; 2494 System.arraycopy(fElements, 0, array, 0, fDepth); 2495 fElements = array; 2496 for (int i = fDepth; i < fElements.length; i++) { 2497 fElements[i] = new QName(); 2498 } 2499 } 2500 2501 return fElements[fDepth++]; 2502 2503 } // pushElement(QName):QName 2504 2505 2506 /** 2507 * Pops an element off of the stack by setting the values of 2508 * the specified QName. 2509 * <p> 2510 * <strong>Note:</strong> The object returned is <em>not</em> 2511 * orphaned to the caller. Therefore, the caller should consider 2512 * the object to be read-only. 2513 */ 2514 public QName popElement() { 2515 //return the same object that was pushed -- this would avoid 2516 //setting the values for every end element. 2517 //STRONG: this object is read only -- this object reference shouldn't be stored. 2518 if(fSkip || fAdd ){ 2519 if(DEBUG_SKIP_ALGORITHM){ 2520 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2521 System.out.println(""); 2522 } 2523 return fElements[fInt[fDepth--]]; 2524 } else{ 2525 if(DEBUG_SKIP_ALGORITHM){ 2526 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2527 } 2528 return fElements[--fDepth] ; 2529 } 2530 //element.setValues(fElements[--fDepth]); 2531 } // popElement(QName) 2532 2533 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2534 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2535 *as per the depth. 2536 */ 2537 public void reposition(){ 2538 for( int i = 2 ; i <= fDepth ; i++){ 2539 fElements[i-1] = fElements[fInt[i]]; 2540 } 2541 if(DEBUG_SKIP_ALGORITHM){ 2542 for( int i = 0 ; i < fDepth ; i++){ 2543 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2544 } 2545 } 2546 } 2547 2548 /** Clears the stack without throwing away existing QName objects. */ 2549 public void clear() { 2550 fDepth = 0; 2551 fLastDepth = 0; 2552 fCount = 0 ; 2553 fPosition = fMark = 1; 2554 2555 } // clear() 2556 2557 /** 2558 * This function is as a result of optimization done for endElement -- 2559 * we dont need to set the value for every end element encouterd. 2560 * For Well formedness checks we can have the same QName object that was pushed. 2561 * the values will be set only if application need to know about the endElement 2562 * -- neeraj.bajaj@sun.com 2563 */ 2564 2565 public QName getLastPoppedElement(){ 2566 return fElements[fDepth]; 2567 } 2568 } // class ElementStack 2569 2570 /** 2571 * Drives the parser to the next state/event on the input. Parser is guaranteed 2572 * to stop at the next state/event. 2573 * 2574 * Internally XML document is divided into several states. Each state represents 2575 * a sections of XML document. When this functions returns normally, it has read 2576 * the section of XML document and returns the state corresponding to section of 2577 * document which has been read. For optimizations, a particular driver 2578 * can read ahead of the section of document (state returned) just read and 2579 * can maintain a different internal state. 2580 * 2581 * 2582 * @author Neeraj Bajaj, Sun Microsystems 2583 */ 2584 protected interface Driver { 2585 2586 2587 /** 2588 * Drives the parser to the next state/event on the input. Parser is guaranteed 2589 * to stop at the next state/event. 2590 * 2591 * Internally XML document is divided into several states. Each state represents 2592 * a sections of XML document. When this functions returns normally, it has read 2593 * the section of XML document and returns the state corresponding to section of 2594 * document which has been read. For optimizations, a particular driver 2595 * can read ahead of the section of document (state returned) just read and 2596 * can maintain a different internal state. 2597 * 2598 * @return state representing the section of document just read. 2599 * 2600 * @throws IOException Thrown on i/o error. 2601 * @throws XNIException Thrown on parse error. 2602 */ 2603 2604 public int next() throws IOException, XNIException; 2605 2606 } // interface Driver 2607 2608 /** 2609 * Driver to handle content scanning. This driver is capable of reading 2610 * the fragment of XML document. When it has finished reading fragment 2611 * of XML documents, it can pass the job of reading to another driver. 2612 * 2613 * This class has been modified as per the new design which is more suited to 2614 * efficiently build pull parser. Lot of performance improvements have been done and 2615 * the code has been added to support stax functionality/features. 2616 * 2617 * @author Neeraj Bajaj, Sun Microsystems 2618 * 2619 * 2620 * @author Andy Clark, IBM 2621 * @author Eric Ye, IBM 2622 */ 2623 protected class FragmentContentDriver 2624 implements Driver { 2625 2626 // 2627 // Driver methods 2628 // 2629 private boolean fContinueDispatching = true; 2630 private boolean fScanningForMarkup = true; 2631 2632 /** 2633 * decides the appropriate state of the parser 2634 */ 2635 private void startOfMarkup() throws IOException { 2636 fMarkupDepth++; 2637 final int ch = fEntityScanner.peekChar(); 2638 2639 switch(ch){ 2640 case '?' :{ 2641 setScannerState(SCANNER_STATE_PI); 2642 fEntityScanner.skipChar(ch); 2643 break; 2644 } 2645 case '!' :{ 2646 fEntityScanner.skipChar(ch); 2647 if (fEntityScanner.skipChar('-')) { 2648 if (!fEntityScanner.skipChar('-')) { 2649 reportFatalError("InvalidCommentStart", 2650 null); 2651 } 2652 setScannerState(SCANNER_STATE_COMMENT); 2653 } else if (fEntityScanner.skipString(cdata)) { 2654 setScannerState(SCANNER_STATE_CDATA ); 2655 } else if (!scanForDoctypeHook()) { 2656 reportFatalError("MarkupNotRecognizedInContent", 2657 null); 2658 } 2659 break; 2660 } 2661 case '/' :{ 2662 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2663 fEntityScanner.skipChar(ch); 2664 break; 2665 } 2666 default :{ 2667 if (isValidNameStartChar(ch)) { 2668 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2669 } else { 2670 reportFatalError("MarkupNotRecognizedInContent", 2671 null); 2672 } 2673 } 2674 } 2675 2676 }//startOfMarkup 2677 2678 private void startOfContent() throws IOException { 2679 if (fEntityScanner.skipChar('<')) { 2680 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2681 } else if (fEntityScanner.skipChar('&')) { 2682 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2683 } else { 2684 //element content is there.. 2685 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2686 } 2687 }//startOfContent 2688 2689 2690 /** 2691 * 2692 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2693 * At any point of time when in doubt over the current state of the parser, the state should be 2694 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2695 * the parser to one of its sub state. 2696 * sub states are defined in the parser on the basis of different XML component like 2697 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2698 * These sub states help the parser to have fine control over the parsing. These are the 2699 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2700 * decided if paresr needs to stop at next milepost ?? 2701 * 2702 */ 2703 public void decideSubState() throws IOException { 2704 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2705 2706 switch (fScannerState) { 2707 2708 case SCANNER_STATE_CONTENT: { 2709 startOfContent() ; 2710 break; 2711 } 2712 2713 case SCANNER_STATE_START_OF_MARKUP: { 2714 startOfMarkup() ; 2715 break; 2716 } 2717 } 2718 } 2719 }//decideSubState 2720 2721 /** 2722 * Drives the parser to the next state/event on the input. Parser is guaranteed 2723 * to stop at the next state/event. Internally XML document 2724 * is divided into several states. Each state represents a sections of XML 2725 * document. When this functions returns normally, it has read the section 2726 * of XML document and returns the state corresponding to section of 2727 * document which has been read. For optimizations, a particular driver 2728 * can read ahead of the section of document (state returned) just read and 2729 * can maintain a different internal state. 2730 * 2731 * State returned corresponds to Stax states. 2732 * 2733 * @return state representing the section of document just read. 2734 * 2735 * @throws IOException Thrown on i/o error. 2736 * @throws XNIException Thrown on parse error. 2737 */ 2738 2739 public int next() throws IOException, XNIException { 2740 while (true) { 2741 try { 2742 if(DEBUG_NEXT){ 2743 System.out.println("NOW IN FragmentContentDriver"); 2744 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2745 } 2746 2747 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2748 //decideSubState. 2749 2750 switch (fScannerState) { 2751 case SCANNER_STATE_CONTENT: { 2752 final int ch = fEntityScanner.peekChar(); 2753 if (ch == '<') { 2754 fEntityScanner.scanChar(); 2755 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2756 } else if (ch == '&') { 2757 fEntityScanner.scanChar(); 2758 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2759 break; 2760 } else { 2761 //element content is there.. 2762 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2763 break; 2764 } 2765 } 2766 2767 case SCANNER_STATE_START_OF_MARKUP: { 2768 startOfMarkup(); 2769 break; 2770 }//case: SCANNER_STATE_START_OF_MARKUP 2771 2772 }//end of switch 2773 //decideSubState() ; 2774 2775 //do some special handling if isCoalesce is set to true. 2776 if(fIsCoalesce){ 2777 fUsebuffer = true ; 2778 //if the last section was character data 2779 if(fLastSectionWasCharacterData){ 2780 2781 //if we dont encounter any CDATA or ENTITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2782 //return the last scanned charactrer data. 2783 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2784 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2785 fLastSectionWasCharacterData = false; 2786 return XMLEvent.CHARACTERS; 2787 } 2788 }//if last section was CDATA or ENTITY REFERENCE 2789 //xxx: there might be another entity reference or CDATA after this 2790 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2791 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2792 //and current state is not SCANNER_STATE_CHARACTER_DATA 2793 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2794 //this means there is nothing more to be coalesced. 2795 //return the CHARACTERS event. 2796 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2797 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2798 2799 fLastSectionWasCData = false; 2800 fLastSectionWasEntityReference = false; 2801 return XMLEvent.CHARACTERS; 2802 } 2803 } 2804 } 2805 2806 2807 if(DEBUG_NEXT){ 2808 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2809 } 2810 2811 switch(fScannerState){ 2812 2813 case XMLEvent.START_DOCUMENT : 2814 return XMLEvent.START_DOCUMENT; 2815 2816 case SCANNER_STATE_START_ELEMENT_TAG :{ 2817 2818 //xxx this function returns true when element is empty.. can be linked to end element event. 2819 //returns true if the element is empty 2820 fEmptyElement = scanStartElement() ; 2821 //if the element is empty the next event is "end element" 2822 if(fEmptyElement){ 2823 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2824 }else{ 2825 //set the next possible state 2826 setScannerState(SCANNER_STATE_CONTENT); 2827 } 2828 return XMLEvent.START_ELEMENT ; 2829 } 2830 2831 case SCANNER_STATE_CHARACTER_DATA: { 2832 if(DEBUG_COALESCE){ 2833 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2834 System.out.println("fIsCoalesce = " + fIsCoalesce); 2835 } 2836 //if last section was either entity reference or cdata or character data we should be using buffer 2837 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2838 2839 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2840 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2841 fLastSectionWasEntityReference = false; 2842 fLastSectionWasCData = false; 2843 fLastSectionWasCharacterData = true ; 2844 fUsebuffer = true; 2845 }else{ 2846 //clear the buffer 2847 fContentBuffer.clear(); 2848 } 2849 2850 //set the fTempString length to 0 before passing it on to scanContent 2851 //scanContent sets the correct co-ordinates as per the content read 2852 fTempString.length = 0; 2853 int c = fEntityScanner.scanContent(fTempString); 2854 if(DEBUG){ 2855 System.out.println("fTempString = " + fTempString); 2856 } 2857 if(fEntityScanner.skipChar('<')){ 2858 //check if we have reached end of element 2859 if(fEntityScanner.skipChar('/')){ 2860 //increase the mark up depth 2861 fMarkupDepth++; 2862 fLastSectionWasCharacterData = false; 2863 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2864 //check if its start of new element 2865 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2866 fMarkupDepth++; 2867 fLastSectionWasCharacterData = false; 2868 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2869 }else{ 2870 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2871 //there can be cdata ahead if coalesce is true we should call again 2872 if(fIsCoalesce){ 2873 fUsebuffer = true; 2874 fLastSectionWasCharacterData = true; 2875 fContentBuffer.append(fTempString); 2876 fTempString.length = 0; 2877 continue; 2878 } 2879 } 2880 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2881 if(fUsebuffer){ 2882 fContentBuffer.append(fTempString); 2883 fTempString.length = 0; 2884 } 2885 if(DEBUG){ 2886 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2887 } 2888 //check limit before returning event 2889 checkLimit(fContentBuffer); 2890 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2891 if(DEBUG)System.out.println("Return SPACE EVENT"); 2892 return XMLEvent.SPACE; 2893 }else 2894 return XMLEvent.CHARACTERS; 2895 2896 } else{ 2897 fUsebuffer = true ; 2898 if(DEBUG){ 2899 System.out.println("fContentBuffer = " + fContentBuffer); 2900 System.out.println("fTempString = " + fTempString); 2901 } 2902 fContentBuffer.append(fTempString); 2903 fTempString.length = 0; 2904 } 2905 if (c == '\r') { 2906 if(DEBUG){ 2907 System.out.println("'\r' character found"); 2908 } 2909 // happens when there is the character reference 2910 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2911 fEntityScanner.scanChar(); 2912 fUsebuffer = true; 2913 fContentBuffer.append((char)c); 2914 c = -1 ; 2915 } else if (c == ']') { 2916 //fStringBuffer.clear(); 2917 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2918 fUsebuffer = true; 2919 fContentBuffer.append((char)fEntityScanner.scanChar()); 2920 // remember where we are in case we get an endEntity before we 2921 // could flush the buffer out - this happens when we're parsing an 2922 // entity which ends with a ] 2923 fInScanContent = true; 2924 2925 // We work on a single character basis to handle cases such as: 2926 // ']]]>' which we might otherwise miss. 2927 // 2928 if (fEntityScanner.skipChar(']')) { 2929 fContentBuffer.append(']'); 2930 while (fEntityScanner.skipChar(']')) { 2931 fContentBuffer.append(']'); 2932 } 2933 if (fEntityScanner.skipChar('>')) { 2934 reportFatalError("CDEndInContent", null); 2935 } 2936 } 2937 c = -1 ; 2938 fInScanContent = false; 2939 } 2940 2941 do{ 2942 //xxx: we should be using only one buffer.. 2943 // we need not to grow the buffer only when isCoalesce() is not true; 2944 2945 if (c == '<') { 2946 fEntityScanner.scanChar(); 2947 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2948 break; 2949 }//xxx what should be the behavior if entity reference is present in the content ? 2950 else if (c == '&') { 2951 fEntityScanner.scanChar(); 2952 setScannerState(SCANNER_STATE_REFERENCE); 2953 break; 2954 }///xxx since this part is also characters, it should be merged... 2955 else if (c != -1 && isInvalidLiteral(c)) { 2956 if (XMLChar.isHighSurrogate(c)) { 2957 // special case: surrogates 2958 scanSurrogates(fContentBuffer) ; 2959 setScannerState(SCANNER_STATE_CONTENT); 2960 } else { 2961 reportFatalError("InvalidCharInContent", 2962 new Object[] { 2963 Integer.toString(c, 16)}); 2964 fEntityScanner.scanChar(); 2965 } 2966 break; 2967 } 2968 //xxx: scanContent also gives character callback. 2969 c = scanContent(fContentBuffer) ; 2970 //we should not be iterating again if fIsCoalesce is not set to true 2971 2972 if(!fIsCoalesce){ 2973 setScannerState(SCANNER_STATE_CONTENT); 2974 break; 2975 } 2976 2977 }while(true); 2978 2979 //if (fDocumentHandler != null) { 2980 // fDocumentHandler.characters(fContentBuffer, null); 2981 //} 2982 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2983 //if fIsCoalesce is true there might be more data so call fDriver.next() 2984 if(fIsCoalesce){ 2985 fLastSectionWasCharacterData = true ; 2986 continue; 2987 }else{ 2988 //check limit before returning event 2989 checkLimit(fContentBuffer); 2990 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2991 if(DEBUG)System.out.println("Return SPACE EVENT"); 2992 return XMLEvent.SPACE; 2993 } else 2994 return XMLEvent.CHARACTERS ; 2995 } 2996 } 2997 2998 case SCANNER_STATE_END_ELEMENT_TAG :{ 2999 if(fEmptyElement){ 3000 //set it back to false. 3001 fEmptyElement = false; 3002 setScannerState(SCANNER_STATE_CONTENT); 3003 //check the case when there is comment after single element document 3004 //<foo/> and some comment after this 3005 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 3006 3007 } else if(scanEndElement() == 0) { 3008 //It is last element of the document 3009 if (elementDepthIsZeroHook()) { 3010 //if element depth is zero , it indicates the end of the document 3011 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 3012 //xxx understand this point once again.. 3013 return XMLEvent.END_ELEMENT ; 3014 } 3015 3016 } 3017 setScannerState(SCANNER_STATE_CONTENT); 3018 return XMLEvent.END_ELEMENT ; 3019 } 3020 3021 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 3022 scanComment(); 3023 setScannerState(SCANNER_STATE_CONTENT); 3024 return XMLEvent.COMMENT; 3025 //break; 3026 } 3027 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 3028 //clear the buffer first 3029 fContentBuffer.clear() ; 3030 //xxx: which buffer should be passed. Ideally we shouldn't have 3031 //more than two buffers -- 3032 //xxx: where should we add the switch for buffering. 3033 scanPI(fContentBuffer); 3034 setScannerState(SCANNER_STATE_CONTENT); 3035 return XMLEvent.PROCESSING_INSTRUCTION; 3036 //break; 3037 } 3038 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 3039 //xxx: What if CDATA is the first event 3040 //<foo><![CDATA[hello<><>]]>append</foo> 3041 3042 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 3043 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3044 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3045 fLastSectionWasCData = true ; 3046 fLastSectionWasEntityReference = false; 3047 fLastSectionWasCharacterData = false; 3048 }//if we dont need to coalesce clear the buffer 3049 else{ 3050 fContentBuffer.clear(); 3051 } 3052 fUsebuffer = true; 3053 //CDATA section is completely read in all the case. 3054 scanCDATASection(fContentBuffer , true); 3055 setScannerState(SCANNER_STATE_CONTENT); 3056 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 3057 //and just call fDispatche.next(). Since we have set the scanner state to 3058 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 3059 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 3060 //2. Check if application has set for reporting CDATA event 3061 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 3062 //return the cdata event as characters. 3063 if(fIsCoalesce){ 3064 fLastSectionWasCData = true ; 3065 //there might be more data to coalesce. 3066 continue; 3067 }else if(fReportCdataEvent){ 3068 return XMLEvent.CDATA; 3069 } else{ 3070 return XMLEvent.CHARACTERS; 3071 } 3072 } 3073 3074 case SCANNER_STATE_REFERENCE :{ 3075 fMarkupDepth++; 3076 foundBuiltInRefs = false; 3077 3078 //we should not clear the buffer only when the last state was either CDATA or 3079 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3080 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3081 //fLastSectionWasEntityReference or fLastSectionWasCData are only 3082 //used when fIsCoalesce is set to true. 3083 fLastSectionWasEntityReference = true ; 3084 fLastSectionWasCData = false; 3085 fLastSectionWasCharacterData = false; 3086 }//if we dont need to coalesce clear the buffer 3087 else{ 3088 fContentBuffer.clear(); 3089 } 3090 fUsebuffer = true ; 3091 //take care of character reference 3092 if (fEntityScanner.skipChar('#')) { 3093 scanCharReferenceValue(fContentBuffer, null); 3094 fMarkupDepth--; 3095 if(!fIsCoalesce){ 3096 setScannerState(SCANNER_STATE_CONTENT); 3097 return XMLEvent.CHARACTERS; 3098 } 3099 } else { 3100 // this function also starts new entity 3101 scanEntityReference(fContentBuffer); 3102 //if there was built-in entity reference & coalesce is not true 3103 //return CHARACTERS 3104 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3105 setScannerState(SCANNER_STATE_CONTENT); 3106 if (builtInRefCharacterHandled) { 3107 builtInRefCharacterHandled = false; 3108 return XMLEvent.ENTITY_REFERENCE; 3109 } else { 3110 return XMLEvent.CHARACTERS; 3111 } 3112 } 3113 3114 //if there was a text declaration, call next() it will be taken care. 3115 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3116 fLastSectionWasEntityReference = true ; 3117 continue; 3118 } 3119 3120 if(fScannerState == SCANNER_STATE_REFERENCE){ 3121 setScannerState(SCANNER_STATE_CONTENT); 3122 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3123 // Skip the entity reference, we don't care 3124 continue; 3125 } 3126 return XMLEvent.ENTITY_REFERENCE; 3127 } 3128 } 3129 //Wether it was character reference, entity reference or built-in entity 3130 //set the next possible state to SCANNER_STATE_CONTENT 3131 setScannerState(SCANNER_STATE_CONTENT); 3132 fLastSectionWasEntityReference = true ; 3133 continue; 3134 } 3135 3136 case SCANNER_STATE_TEXT_DECL: { 3137 // scan text decl 3138 if (fEntityScanner.skipString("<?xml")) { 3139 fMarkupDepth++; 3140 // NOTE: special case where entity starts with a PI 3141 // whose name starts with "xml" (e.g. "xmlfoo") 3142 if (isValidNameChar(fEntityScanner.peekChar())) { 3143 fStringBuffer.clear(); 3144 fStringBuffer.append("xml"); 3145 3146 if (fNamespaces) { 3147 while (isValidNCName(fEntityScanner.peekChar())) { 3148 fStringBuffer.append((char)fEntityScanner.scanChar()); 3149 } 3150 } else { 3151 while (isValidNameChar(fEntityScanner.peekChar())) { 3152 fStringBuffer.append((char)fEntityScanner.scanChar()); 3153 } 3154 } 3155 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3156 fContentBuffer.clear(); 3157 scanPIData(target, fContentBuffer); 3158 } 3159 3160 // standard text declaration 3161 else { 3162 //xxx: this function gives callback 3163 scanXMLDeclOrTextDecl(true); 3164 } 3165 } 3166 // now that we've straightened out the readers, we can read in chunks: 3167 fEntityManager.fCurrentEntity.mayReadChunks = true; 3168 setScannerState(SCANNER_STATE_CONTENT); 3169 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3170 //it seems we have to careful when to allow function issue a callback 3171 //and when to allow adapter issue a callback. 3172 continue; 3173 } 3174 3175 3176 case SCANNER_STATE_ROOT_ELEMENT: { 3177 if (scanRootElementHook()) { 3178 fEmptyElement = true; 3179 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3180 return XMLEvent.START_ELEMENT; 3181 } 3182 setScannerState(SCANNER_STATE_CONTENT); 3183 return XMLEvent.START_ELEMENT ; 3184 } 3185 case SCANNER_STATE_CHAR_REFERENCE : { 3186 fContentBuffer.clear(); 3187 scanCharReferenceValue(fContentBuffer, null); 3188 fMarkupDepth--; 3189 setScannerState(SCANNER_STATE_CONTENT); 3190 return XMLEvent.CHARACTERS; 3191 } 3192 default: 3193 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3194 3195 }//switch 3196 } 3197 // premature end of file 3198 catch (EOFException e) { 3199 endOfFileHook(e); 3200 return -1; 3201 } 3202 } //while loop 3203 }//next 3204 3205 /** 3206 * Add the count of the content buffer and check if the accumulated 3207 * value exceeds the limit 3208 * @param buffer content buffer 3209 */ 3210 protected void checkLimit(XMLStringBuffer buffer) { 3211 if (fLimitAnalyzer.isTracking(fCurrentEntityName)) { 3212 fLimitAnalyzer.addValue(Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntityName, buffer.length); 3213 if (fSecurityManager.isOverLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 3214 fSecurityManager.debugPrint(fLimitAnalyzer); 3215 reportFatalError("MaxEntitySizeLimit", new Object[]{fCurrentEntityName, 3216 fLimitAnalyzer.getValue(Limit.GENERAL_ENTITY_SIZE_LIMIT), 3217 fSecurityManager.getLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT), 3218 fSecurityManager.getStateLiteral(Limit.GENERAL_ENTITY_SIZE_LIMIT)}); 3219 } 3220 if (fSecurityManager.isOverLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 3221 fSecurityManager.debugPrint(fLimitAnalyzer); 3222 reportFatalError("TotalEntitySizeLimit", 3223 new Object[]{fLimitAnalyzer.getTotalValue(Limit.TOTAL_ENTITY_SIZE_LIMIT), 3224 fSecurityManager.getLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT), 3225 fSecurityManager.getStateLiteral(Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 3226 } 3227 } 3228 } 3229 3230 // 3231 // Protected methods 3232 // 3233 3234 // hooks 3235 3236 // NOTE: These hook methods are added so that the full document 3237 // scanner can share the majority of code with this class. 3238 3239 /** 3240 * Scan for DOCTYPE hook. This method is a hook for subclasses 3241 * to add code to handle scanning for a the "DOCTYPE" string 3242 * after the string "<!" has been scanned. 3243 * 3244 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3245 * was not scanned. 3246 */ 3247 protected boolean scanForDoctypeHook() 3248 throws IOException, XNIException { 3249 return false; 3250 } // scanForDoctypeHook():boolean 3251 3252 /** 3253 * Element depth iz zero. This methos is a hook for subclasses 3254 * to add code to handle when the element depth hits zero. When 3255 * scanning a document fragment, an element depth of zero is 3256 * normal. However, when scanning a full XML document, the 3257 * scanner must handle the trailing miscellanous section of 3258 * the document after the end of the document's root element. 3259 * 3260 * @return True if the caller should stop and return true which 3261 * allows the scanner to switch to a new scanning 3262 * driver. A return value of false indicates that 3263 * the content driver should continue as normal. 3264 */ 3265 protected boolean elementDepthIsZeroHook() 3266 throws IOException, XNIException { 3267 return false; 3268 } // elementDepthIsZeroHook():boolean 3269 3270 /** 3271 * Scan for root element hook. This method is a hook for 3272 * subclasses to add code that handles scanning for the root 3273 * element. When scanning a document fragment, there is no 3274 * "root" element. However, when scanning a full XML document, 3275 * the scanner must handle the root element specially. 3276 * 3277 * @return True if the caller should stop and return true which 3278 * allows the scanner to switch to a new scanning 3279 * driver. A return value of false indicates that 3280 * the content driver should continue as normal. 3281 */ 3282 protected boolean scanRootElementHook() 3283 throws IOException, XNIException { 3284 return false; 3285 } // scanRootElementHook():boolean 3286 3287 /** 3288 * End of file hook. This method is a hook for subclasses to 3289 * add code that handles the end of file. The end of file in 3290 * a document fragment is OK if the markup depth is zero. 3291 * However, when scanning a full XML document, an end of file 3292 * is always premature. 3293 */ 3294 protected void endOfFileHook(EOFException e) 3295 throws IOException, XNIException { 3296 3297 // NOTE: An end of file is only only an error if we were 3298 // in the middle of scanning some markup. -Ac 3299 if (fMarkupDepth != 0) { 3300 reportFatalError("PrematureEOF", null); 3301 } 3302 3303 } // endOfFileHook() 3304 3305 } // class FragmentContentDriver 3306 3307 static void pr(String str) { 3308 System.out.println(str) ; 3309 } 3310 3311 protected boolean fUsebuffer ; 3312 3313 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3314 * maintained for attributes. 3315 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3316 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3317 * XMLString. 3318 * 3319 * @return XMLString XMLString used to store an attribute value. 3320 */ 3321 3322 protected XMLString getString(){ 3323 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3324 return (XMLString)attributeValueCache.get(fAttributeCacheUsedCount++); 3325 } else{ 3326 XMLString str = new XMLString(); 3327 fAttributeCacheUsedCount++; 3328 attributeValueCache.add(str); 3329 return str; 3330 } 3331 } 3332 3333 /** 3334 * Implements XMLBufferListener interface. 3335 */ 3336 3337 public void refresh(){ 3338 refresh(0); 3339 } 3340 3341 /** 3342 * receives callbacks from {@link XMLEntityReader } when buffer 3343 * is being changed. 3344 * @param refreshPosition 3345 */ 3346 public void refresh(int refreshPosition){ 3347 //If you are reading attributes and you got a callback 3348 //cache available attributes. 3349 if(fReadingAttributes){ 3350 fAttributes.refresh(); 3351 } 3352 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3353 //since fTempString directly matches to the underlying main buffer 3354 //store the data into buffer 3355 fContentBuffer.append(fTempString); 3356 //clear the XMLString so that data can't be added again. 3357 fTempString.length = 0; 3358 fUsebuffer = true; 3359 } 3360 } 3361 3362 } // class XMLDocumentFragmentScannerImpl