1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 24 import com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDDescription; 25 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 26 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 27 import com.sun.org.apache.xerces.internal.util.XMLChar; 28 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 29 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 30 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 31 import com.sun.org.apache.xerces.internal.xni.Augmentations; 32 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 33 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 34 import com.sun.org.apache.xerces.internal.xni.XMLString; 35 import com.sun.org.apache.xerces.internal.xni.XNIException; 36 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 37 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 38 import com.sun.org.apache.xerces.internal.xni.parser.XMLDTDScanner; 39 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 40 import com.sun.xml.internal.stream.Entity; 41 import com.sun.xml.internal.stream.StaxXMLInputSource; 42 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 43 import java.io.EOFException; 44 import java.io.IOException; 45 import javax.xml.stream.XMLInputFactory; 46 import javax.xml.stream.events.XMLEvent; 47 48 49 /** 50 * This class is responsible for scanning XML document structure 51 * and content. 52 * 53 * This class has been modified as per the new design which is more suited to 54 * efficiently build pull parser. Lot of improvements have been done and 55 * the code has been added to support stax functionality/features. 56 * 57 * @author Neeraj Bajaj, Sun Microsystems 58 * @author K.Venugopal, Sun Microsystems 59 * @author Glenn Marcy, IBM 60 * @author Andy Clark, IBM 61 * @author Arnaud Le Hors, IBM 62 * @author Eric Ye, IBM 63 * @author Sunitha Reddy, Sun Microsystems 64 * 65 * Refer to the table in unit-test javax.xml.stream.XMLStreamReaderTest.SupportDTD for changes 66 * related to property SupportDTD. 67 * @author Joe Wang, Sun Microsystems 68 * @version $Id: XMLDocumentScannerImpl.java,v 1.17 2010-11-01 04:39:41 joehw Exp $ 69 */ 70 public class XMLDocumentScannerImpl 71 extends XMLDocumentFragmentScannerImpl{ 72 73 // 74 // Constants 75 // 76 77 // scanner states 78 79 /** Scanner state: XML declaration. */ 80 protected static final int SCANNER_STATE_XML_DECL = 42; 81 82 /** Scanner state: prolog. */ 83 protected static final int SCANNER_STATE_PROLOG = 43; 84 85 /** Scanner state: trailing misc. */ 86 protected static final int SCANNER_STATE_TRAILING_MISC = 44; 87 88 /** Scanner state: DTD internal declarations. */ 89 protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 45; 90 91 /** Scanner state: open DTD external subset. */ 92 protected static final int SCANNER_STATE_DTD_EXTERNAL = 46; 93 94 /** Scanner state: DTD external declarations. */ 95 protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 47; 96 97 /** Scanner state: NO MORE ELEMENTS. */ 98 protected static final int SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION = 48; 99 100 // feature identifiers 101 102 /** Property identifier document scanner: */ 103 protected static final String DOCUMENT_SCANNER = 104 Constants.XERCES_PROPERTY_PREFIX + Constants.DOCUMENT_SCANNER_PROPERTY; 105 106 /** Feature identifier: load external DTD. */ 107 protected static final String LOAD_EXTERNAL_DTD = 108 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 109 110 /** Feature identifier: load external DTD. */ 111 protected static final String DISALLOW_DOCTYPE_DECL_FEATURE = 112 Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE; 113 114 // property identifiers 115 116 /** Property identifier: DTD scanner. */ 117 protected static final String DTD_SCANNER = 118 Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY; 119 120 // property identifier: ValidationManager 121 protected static final String VALIDATION_MANAGER = 122 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 123 124 /** property identifier: NamespaceContext */ 125 protected static final String NAMESPACE_CONTEXT = 126 Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY; 127 128 // recognized features and properties 129 130 /** Recognized features. */ 131 private static final String[] RECOGNIZED_FEATURES = { 132 LOAD_EXTERNAL_DTD, 133 DISALLOW_DOCTYPE_DECL_FEATURE, 134 }; 135 136 /** Feature defaults. */ 137 private static final Boolean[] FEATURE_DEFAULTS = { 138 Boolean.TRUE, 139 Boolean.FALSE, 140 }; 141 142 /** Recognized properties. */ 143 private static final String[] RECOGNIZED_PROPERTIES = { 144 DTD_SCANNER, 145 VALIDATION_MANAGER 146 }; 147 148 /** Property defaults. */ 149 private static final Object[] PROPERTY_DEFAULTS = { 150 null, 151 null 152 }; 153 154 // 155 // Data((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 156 // 157 158 // properties 159 160 /** DTD scanner. */ 161 protected XMLDTDScanner fDTDScanner = null; 162 163 /** Validation manager . */ 164 //xxx: fValidationManager code needs to be added yet! 165 protected ValidationManager fValidationManager; 166 167 protected XMLStringBuffer fDTDDecl = null; 168 protected boolean fReadingDTD = false; 169 protected boolean fAddedListener = false; 170 171 // protected data 172 173 // other info 174 175 /** Doctype name. */ 176 protected String fDoctypeName; 177 178 /** Doctype declaration public identifier. */ 179 protected String fDoctypePublicId; 180 181 /** Doctype declaration system identifier. */ 182 protected String fDoctypeSystemId; 183 184 /** Namespace support. */ 185 protected NamespaceContext fNamespaceContext = new NamespaceSupport(); 186 187 // features 188 189 /** Load external DTD. */ 190 protected boolean fLoadExternalDTD = true; 191 192 // state 193 194 /** Seen doctype declaration. */ 195 protected boolean fSeenDoctypeDecl; 196 197 protected boolean fScanEndElement; 198 199 //protected int fScannerLastState ; 200 201 // drivers 202 203 /** XML declaration driver. */ 204 protected Driver fXMLDeclDriver = new XMLDeclDriver(); 205 206 /** Prolog driver. */ 207 protected Driver fPrologDriver = new PrologDriver(); 208 209 /** DTD driver. */ 210 protected Driver fDTDDriver = null ; 211 212 /** Trailing miscellaneous section driver. */ 213 protected Driver fTrailingMiscDriver = new TrailingMiscDriver(); 214 protected int fStartPos = 0; 215 protected int fEndPos = 0; 216 protected boolean fSeenInternalSubset= false; 217 // temporary variables 218 219 /** Array of 3 strings. */ 220 private String[] fStrings = new String[3]; 221 222 /** External subset source. */ 223 private XMLInputSource fExternalSubsetSource = null; 224 225 /** A DTD Description. */ 226 private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null); 227 228 /** String. */ 229 private XMLString fString = new XMLString(); 230 231 private static final char [] DOCTYPE = {'D','O','C','T','Y','P','E'}; 232 private static final char [] COMMENTSTRING = {'-','-'}; 233 234 // 235 // Constructors 236 // 237 238 /** Default constructor. */ 239 public XMLDocumentScannerImpl() {} // <init>() 240 241 242 // 243 // XMLDocumentScanner methods 244 // 245 246 247 /** 248 * Sets the input source. 249 * 250 * @param inputSource The input source. 251 * 252 * @throws IOException Thrown on i/o error. 253 */ 254 public void setInputSource(XMLInputSource inputSource) throws IOException { 255 fEntityManager.setEntityHandler(this); 256 //this starts a new entity and sets the current entity to the document entity. 257 fEntityManager.startDocumentEntity(inputSource); 258 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 259 setScannerState(XMLEvent.START_DOCUMENT); 260 } // setInputSource(XMLInputSource) 261 262 263 264 /**return the state of the scanner */ 265 public int getScannetState(){ 266 return fScannerState ; 267 } 268 269 270 271 272 public void reset(PropertyManager propertyManager) { 273 super.reset(propertyManager); 274 // other settings 275 fDoctypeName = null; 276 fDoctypePublicId = null; 277 fDoctypeSystemId = null; 278 fSeenDoctypeDecl = false; 279 fNamespaceContext.reset(); 280 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 281 282 // xerces features 283 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 284 setScannerState(XMLEvent.START_DOCUMENT); 285 setDriver(fXMLDeclDriver); 286 fSeenInternalSubset = false; 287 if(fDTDScanner != null){ 288 ((XMLDTDScannerImpl)fDTDScanner).reset(propertyManager); 289 } 290 fEndPos = 0; 291 fStartPos = 0; 292 if(fDTDDecl != null){ 293 fDTDDecl.clear(); 294 } 295 296 } 297 298 /** 299 * Resets the component. The component can query the component manager 300 * about any features and properties that affect the operation of the 301 * component. 302 * 303 * @param componentManager The component manager. 304 * 305 * @throws SAXException Thrown by component on initialization error. 306 * For example, if a feature or property is 307 * required for the operation of the component, the 308 * component manager may throw a 309 * SAXNotRecognizedException or a 310 * SAXNotSupportedException. 311 */ 312 public void reset(XMLComponentManager componentManager) 313 throws XMLConfigurationException { 314 315 super.reset(componentManager); 316 317 // other settings 318 fDoctypeName = null; 319 fDoctypePublicId = null; 320 fDoctypeSystemId = null; 321 fSeenDoctypeDecl = false; 322 fExternalSubsetSource = null; 323 324 // xerces features 325 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 326 fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE, false); 327 328 fNamespaces = componentManager.getFeature(NAMESPACES, true); 329 330 fSeenInternalSubset = false; 331 // xerces properties 332 fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER); 333 334 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 335 336 try { 337 fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT); 338 } 339 catch (XMLConfigurationException e) { } 340 if (fNamespaceContext == null) { 341 fNamespaceContext = new NamespaceSupport(); 342 } 343 fNamespaceContext.reset(); 344 345 fEndPos = 0; 346 fStartPos = 0; 347 if(fDTDDecl != null) 348 fDTDDecl.clear(); 349 350 351 //fEntityScanner.registerListener((XMLBufferListener)componentManager.getProperty(DOCUMENT_SCANNER)); 352 353 // setup driver 354 setScannerState(SCANNER_STATE_XML_DECL); 355 setDriver(fXMLDeclDriver); 356 357 } // reset(XMLComponentManager) 358 359 360 /** 361 * Returns a list of feature identifiers that are recognized by 362 * this component. This method may return null if no features 363 * are recognized by this component. 364 */ 365 public String[] getRecognizedFeatures() { 366 String[] featureIds = super.getRecognizedFeatures(); 367 int length = featureIds != null ? featureIds.length : 0; 368 String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length]; 369 if (featureIds != null) { 370 System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length); 371 } 372 System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length); 373 return combinedFeatureIds; 374 } // getRecognizedFeatures():String[] 375 376 /** 377 * Sets the state of a feature. This method is called by the component 378 * manager any time after reset when a feature changes state. 379 * <p> 380 * <strong>Note:</strong> Components should silently ignore features 381 * that do not affect the operation of the component. 382 * 383 * @param featureId The feature identifier. 384 * @param state The state of the feature. 385 * 386 * @throws SAXNotRecognizedException The component should not throw 387 * this exception. 388 * @throws SAXNotSupportedException The component should not throw 389 * this exception. 390 */ 391 public void setFeature(String featureId, boolean state) 392 throws XMLConfigurationException { 393 394 super.setFeature(featureId, state); 395 396 // Xerces properties 397 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 398 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 399 400 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 401 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 402 fLoadExternalDTD = state; 403 return; 404 } 405 else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() && 406 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) { 407 fDisallowDoctype = state; 408 return; 409 } 410 } 411 412 } // setFeature(String,boolean) 413 414 /** 415 * Returns a list of property identifiers that are recognized by 416 * this component. This method may return null if no properties 417 * are recognized by this component. 418 */ 419 public String[] getRecognizedProperties() { 420 String[] propertyIds = super.getRecognizedProperties(); 421 int length = propertyIds != null ? propertyIds.length : 0; 422 String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length]; 423 if (propertyIds != null) { 424 System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length); 425 } 426 System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length); 427 return combinedPropertyIds; 428 } // getRecognizedProperties():String[] 429 430 /** 431 * Sets the value of a property. This method is called by the component 432 * manager any time after reset when a property changes value. 433 * <p> 434 * <strong>Note:</strong> Components should silently ignore properties 435 * that do not affect the operation of the component. 436 * 437 * @param propertyId The property identifier. 438 * @param value The value of the property. 439 * 440 * @throws SAXNotRecognizedException The component should not throw 441 * this exception. 442 * @throws SAXNotSupportedException The component should not throw 443 * this exception. 444 */ 445 public void setProperty(String propertyId, Object value) 446 throws XMLConfigurationException { 447 448 super.setProperty(propertyId, value); 449 450 // Xerces properties 451 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 452 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 453 454 if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() && 455 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) { 456 fDTDScanner = (XMLDTDScanner)value; 457 } 458 if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() && 459 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) { 460 if (value != null) { 461 fNamespaceContext = (NamespaceContext)value; 462 } 463 } 464 465 return; 466 } 467 468 } // setProperty(String,Object) 469 470 /** 471 * Returns the default state for a feature, or null if this 472 * component does not want to report a default value for this 473 * feature. 474 * 475 * @param featureId The feature identifier. 476 * 477 * @since Xerces 2.2.0 478 */ 479 public Boolean getFeatureDefault(String featureId) { 480 481 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 482 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 483 return FEATURE_DEFAULTS[i]; 484 } 485 } 486 return super.getFeatureDefault(featureId); 487 } // getFeatureDefault(String):Boolean 488 489 /** 490 * Returns the default state for a property, or null if this 491 * component does not want to report a default value for this 492 * property. 493 * 494 * @param propertyId The property identifier. 495 * 496 * @since Xerces 2.2.0 497 */ 498 public Object getPropertyDefault(String propertyId) { 499 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 500 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 501 return PROPERTY_DEFAULTS[i]; 502 } 503 } 504 return super.getPropertyDefault(propertyId); 505 } // getPropertyDefault(String):Object 506 507 // 508 // XMLEntityHandler methods 509 // 510 511 /** 512 * This method notifies of the start of an entity. The DTD has the 513 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 514 * general entities are just specified by their name. 515 * 516 * @param name The name of the entity. 517 * @param identifier The resource identifier. 518 * @param encoding The auto-detected IANA encoding name of the entity 519 * stream. This value will be null in those situations 520 * where the entity encoding is not auto-detected (e.g. 521 * internal entities or a document entity that is 522 * parsed from a java.io.Reader). 523 * 524 * @throws XNIException Thrown by handler to signal an error. 525 */ 526 public void startEntity(String name, 527 XMLResourceIdentifier identifier, 528 String encoding, Augmentations augs) throws XNIException { 529 530 super.startEntity(name, identifier, encoding,augs); 531 532 //register current document scanner as a listener for XMLEntityScanner 533 fEntityScanner.registerListener(this); 534 535 // prepare to look for a TextDecl if external general entity 536 if (!name.equals("[xml]") && fEntityScanner.isExternal()) { 537 // Don't do this if we're skipping the entity! 538 if (augs == null || !((Boolean) augs.getItem(Constants.ENTITY_SKIPPED)).booleanValue()) { 539 setScannerState(SCANNER_STATE_TEXT_DECL); 540 } 541 } 542 543 // call handler 544 /** comment this part.. LOCATOR problem.. */ 545 if (fDocumentHandler != null && name.equals("[xml]")) { 546 fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null); 547 } 548 549 } // startEntity(String,identifier,String) 550 551 552 /** 553 * This method notifies the end of an entity. The DTD has the pseudo-name 554 * of "[dtd]" parameter entity names start with '%'; and general entities 555 * are just specified by their name. 556 * 557 * @param name The name of the entity. 558 * 559 * @throws XNIException Thrown by handler to signal an error. 560 */ 561 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 562 563 super.endEntity(name, augs); 564 565 if(name.equals("[xml]")){ 566 //if fMarkupDepth has reached 0. 567 //and driver is fTrailingMiscDriver (which 568 //handles end of document in normal case) 569 //set the scanner state of SCANNER_STATE_TERMINATED 570 if(fMarkupDepth == 0 && fDriver == fTrailingMiscDriver){ 571 //set the scanner set to SCANNER_STATE_TERMINATED 572 setScannerState(SCANNER_STATE_TERMINATED) ; 573 } else{ 574 //else we have reached the end of document prematurely 575 //so throw EOFException. 576 throw new java.io.EOFException(); 577 } 578 579 //this is taken care in wrapper which generates XNI callbacks, There are no next events 580 581 //if (fDocumentHandler != null) { 582 //fDocumentHandler.endDocument(null); 583 //} 584 } 585 } // endEntity(String) 586 587 588 public XMLStringBuffer getDTDDecl(){ 589 Entity entity = fEntityScanner.getCurrentEntity(); 590 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 591 if(fSeenInternalSubset) 592 fDTDDecl.append("]>"); 593 return fDTDDecl; 594 } 595 596 public String getCharacterEncodingScheme(){ 597 return fDeclaredEncoding; 598 } 599 600 /** return the next state on the input 601 * 602 * @return int 603 */ 604 605 public int next() throws IOException, XNIException { 606 return fDriver.next(); 607 } 608 609 //getNamespaceContext 610 public NamespaceContext getNamespaceContext(){ 611 return fNamespaceContext ; 612 } 613 614 615 616 // 617 // Protected methods 618 // 619 620 // driver factory methods 621 622 /** Creates a content driver. */ 623 protected Driver createContentDriver() { 624 return new ContentDriver(); 625 } // createContentDriver():Driver 626 627 // scanning methods 628 629 /** Scans a doctype declaration. */ 630 protected boolean scanDoctypeDecl(boolean supportDTD) throws IOException, XNIException { 631 632 // spaces 633 if (!fEntityScanner.skipSpaces()) { 634 reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL", 635 null); 636 } 637 638 // root element name 639 fDoctypeName = fEntityScanner.scanName(); 640 if (fDoctypeName == null) { 641 reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null); 642 } 643 644 // external id 645 if (fEntityScanner.skipSpaces()) { 646 scanExternalID(fStrings, false); 647 fDoctypeSystemId = fStrings[0]; 648 fDoctypePublicId = fStrings[1]; 649 fEntityScanner.skipSpaces(); 650 } 651 652 fHasExternalDTD = fDoctypeSystemId != null; 653 654 // Attempt to locate an external subset with an external subset resolver. 655 if (supportDTD && !fHasExternalDTD && fExternalSubsetResolver != null) { 656 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 657 fDTDDescription.setRootName(fDoctypeName); 658 fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 659 fHasExternalDTD = fExternalSubsetSource != null; 660 } 661 662 // call handler 663 if (supportDTD && fDocumentHandler != null) { 664 // NOTE: I don't like calling the doctypeDecl callback until 665 // end of the *full* doctype line (including internal 666 // subset) is parsed correctly but SAX2 requires that 667 // it knows the root element name and public and system 668 // identifier for the startDTD call. -Ac 669 if (fExternalSubsetSource == null) { 670 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 671 } 672 else { 673 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null); 674 } 675 } 676 677 // is there an internal subset? 678 boolean internalSubset = true; 679 if (!fEntityScanner.skipChar('[')) { 680 internalSubset = false; 681 fEntityScanner.skipSpaces(); 682 if (!fEntityScanner.skipChar('>')) { 683 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 684 } 685 fMarkupDepth--; 686 } 687 return internalSubset; 688 689 } // scanDoctypeDecl():boolean 690 691 // 692 // Private methods 693 // 694 /** Set the scanner state after scanning DTD */ 695 protected void setEndDTDScanState() { 696 setScannerState(SCANNER_STATE_PROLOG); 697 setDriver(fPrologDriver); 698 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 699 fReadingDTD=false; 700 } 701 702 /** Returns the scanner state name. */ 703 protected String getScannerStateName(int state) { 704 705 switch (state) { 706 case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL"; 707 case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG"; 708 case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC"; 709 case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS"; 710 case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL"; 711 case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS"; 712 } 713 return super.getScannerStateName(state); 714 715 } // getScannerStateName(int):String 716 717 // 718 // Classes 719 // 720 721 /** 722 * Driver to handle XMLDecl scanning. 723 * 724 * This class has been modified as per the new design which is more suited to 725 * efficiently build pull parser. Lots of performance improvements have been done and 726 * the code has been added to support stax functionality/features. 727 * 728 * @author Neeraj Bajaj, Sun Microsystems. 729 * 730 * @author Andy Clark, IBM 731 */ 732 protected final class XMLDeclDriver 733 implements Driver { 734 735 // 736 // Driver methods 737 // 738 739 740 public int next() throws IOException, XNIException { 741 if(DEBUG_NEXT){ 742 System.out.println("NOW IN XMLDeclDriver"); 743 } 744 745 // next driver is prolog regardless of whether there 746 // is an XMLDecl in this document 747 setScannerState(SCANNER_STATE_PROLOG); 748 setDriver(fPrologDriver); 749 750 //System.out.println("fEntityScanner = " + fEntityScanner); 751 // scan XMLDecl 752 try { 753 if (fEntityScanner.skipString(xmlDecl)) { 754 fMarkupDepth++; 755 // NOTE: special case where document starts with a PI 756 // whose name starts with "xml" (e.g. "xmlfoo") 757 if (XMLChar.isName(fEntityScanner.peekChar())) { 758 fStringBuffer.clear(); 759 fStringBuffer.append("xml"); 760 while (XMLChar.isName(fEntityScanner.peekChar())) { 761 fStringBuffer.append((char)fEntityScanner.scanChar()); 762 } 763 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 764 //this function should fill the data.. and set the fEvent object to this event. 765 fContentBuffer.clear() ; 766 scanPIData(target, fContentBuffer); 767 //REVISIT:where else we can set this value to 'true' 768 fEntityManager.fCurrentEntity.mayReadChunks = true; 769 //return PI event since PI was encountered 770 return XMLEvent.PROCESSING_INSTRUCTION ; 771 } 772 // standard XML declaration 773 else { 774 scanXMLDeclOrTextDecl(false); 775 //REVISIT:where else we can set this value to 'true' 776 fEntityManager.fCurrentEntity.mayReadChunks = true; 777 return XMLEvent.START_DOCUMENT; 778 } 779 } else{ 780 //REVISIT:where else we can set this value to 'true' 781 fEntityManager.fCurrentEntity.mayReadChunks = true; 782 //In both case return the START_DOCUMENT. ony difference is that first block will 783 //cosume the XML declaration if any. 784 return XMLEvent.START_DOCUMENT; 785 } 786 787 788 //START_OF_THE_DOCUMENT 789 790 791 } 792 793 // premature end of file 794 catch (EOFException e) { 795 reportFatalError("PrematureEOF", null); 796 return -1; 797 //throw e; 798 } 799 800 } 801 } // class XMLDeclDriver 802 803 /** 804 * Driver to handle prolog scanning. 805 * 806 * @author Andy Clark, IBM 807 */ 808 protected final class PrologDriver 809 implements Driver { 810 811 /** 812 * Drives the parser to the next state/event on the input. Parser is guaranteed 813 * to stop at the next state/event. 814 * 815 * Internally XML document is divided into several states. Each state represents 816 * a sections of XML document. When this functions returns normally, it has read 817 * the section of XML document and returns the state corresponding to section of 818 * document which has been read. For optimizations, a particular driver 819 * can read ahead of the section of document (state returned) just read and 820 * can maintain a different internal state. 821 * 822 * @return state representing the section of document just read. 823 * 824 * @throws IOException Thrown on i/o error. 825 * @throws XNIException Thrown on parse error. 826 */ 827 828 public int next() throws IOException, XNIException { 829 //System.out.println("here in next"); 830 831 if(DEBUG_NEXT){ 832 System.out.println("NOW IN PrologDriver"); 833 } 834 try { 835 do { 836 switch (fScannerState) { 837 case SCANNER_STATE_PROLOG: { 838 fEntityScanner.skipSpaces(); 839 if (fEntityScanner.skipChar('<')) { 840 setScannerState(SCANNER_STATE_START_OF_MARKUP); 841 } else if (fEntityScanner.skipChar('&')) { 842 setScannerState(SCANNER_STATE_REFERENCE); 843 } else { 844 setScannerState(SCANNER_STATE_CONTENT); 845 } 846 break; 847 } 848 849 case SCANNER_STATE_START_OF_MARKUP: { 850 fMarkupDepth++; 851 852 if (fEntityScanner.skipChar('?')) { 853 setScannerState(SCANNER_STATE_PI); 854 } else if (fEntityScanner.skipChar('!')) { 855 if (fEntityScanner.skipChar('-')) { 856 if (!fEntityScanner.skipChar('-')) { 857 reportFatalError("InvalidCommentStart", 858 null); 859 } 860 setScannerState(SCANNER_STATE_COMMENT); 861 } else if (fEntityScanner.skipString(DOCTYPE)) { 862 setScannerState(SCANNER_STATE_DOCTYPE); 863 Entity entity = fEntityScanner.getCurrentEntity(); 864 if(entity instanceof Entity.ScannedEntity){ 865 fStartPos=((Entity.ScannedEntity)entity).position; 866 } 867 fReadingDTD=true; 868 if(fDTDDecl == null) 869 fDTDDecl = new XMLStringBuffer(); 870 fDTDDecl.append("<!DOCTYPE"); 871 872 } else { 873 reportFatalError("MarkupNotRecognizedInProlog", 874 null); 875 } 876 } else if (XMLChar.isNameStart(fEntityScanner.peekChar())) { 877 setScannerState(SCANNER_STATE_ROOT_ELEMENT); 878 setDriver(fContentDriver); 879 //from now onwards this would be handled by fContentDriver,in the same next() call 880 return fContentDriver.next(); 881 882 } else { 883 reportFatalError("MarkupNotRecognizedInProlog", 884 null); 885 } 886 break; 887 } 888 } 889 } while (fScannerState == SCANNER_STATE_PROLOG || fScannerState == SCANNER_STATE_START_OF_MARKUP ); 890 891 switch(fScannerState){ 892 /** 893 //this part is handled by FragmentContentHandler 894 case SCANNER_STATE_ROOT_ELEMENT: { 895 //we have read '<' and beginning of reading the start element tag 896 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 897 setDriver(fContentDriver); 898 //from now onwards this would be handled by fContentDriver,in the same next() call 899 return fContentDriver.next(); 900 } 901 */ 902 case SCANNER_STATE_COMMENT: { 903 //this function fills the data.. 904 scanComment(); 905 setScannerState(SCANNER_STATE_PROLOG); 906 return XMLEvent.COMMENT; 907 //setScannerState(SCANNER_STATE_PROLOG); 908 //break; 909 } 910 case SCANNER_STATE_PI: { 911 fContentBuffer.clear() ; 912 scanPI(fContentBuffer); 913 setScannerState(SCANNER_STATE_PROLOG); 914 return XMLEvent.PROCESSING_INSTRUCTION; 915 } 916 917 case SCANNER_STATE_DOCTYPE: { 918 if (fDisallowDoctype) { 919 reportFatalError("DoctypeNotAllowed", null); 920 } 921 922 if (fSeenDoctypeDecl) { 923 reportFatalError("AlreadySeenDoctype", null); 924 } 925 fSeenDoctypeDecl = true; 926 927 // scanDoctypeDecl() sends XNI doctypeDecl event that 928 // in SAX is converted to startDTD() event. 929 if (scanDoctypeDecl(fSupportDTD)) { 930 //allow parsing of entity decls to continue in order to stay well-formed 931 setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS); 932 fSeenInternalSubset = true; 933 if(fDTDDriver == null){ 934 fDTDDriver = new DTDDriver(); 935 } 936 setDriver(fContentDriver); 937 //always return DTD event, the event however, will not contain any entities 938 return fDTDDriver.next(); 939 } 940 941 if(fSeenDoctypeDecl){ 942 Entity entity = fEntityScanner.getCurrentEntity(); 943 if(entity instanceof Entity.ScannedEntity){ 944 fEndPos = ((Entity.ScannedEntity)entity).position; 945 } 946 fReadingDTD = false; 947 } 948 949 // handle external subset 950 if (fDoctypeSystemId != null) { 951 if (((fValidation || fLoadExternalDTD) 952 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 953 if (fSupportDTD) { 954 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 955 } else { 956 setScannerState(SCANNER_STATE_PROLOG); 957 } 958 959 setDriver(fContentDriver); 960 if(fDTDDriver == null) { 961 fDTDDriver = new DTDDriver(); 962 } 963 964 return fDTDDriver.next(); 965 } 966 } 967 else if (fExternalSubsetSource != null) { 968 if (((fValidation || fLoadExternalDTD) 969 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 970 // This handles the case of a DOCTYPE that had neither an internal subset or an external subset. 971 fDTDScanner.setInputSource(fExternalSubsetSource); 972 fExternalSubsetSource = null; 973 if (fSupportDTD) 974 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 975 else 976 setScannerState(SCANNER_STATE_PROLOG); 977 setDriver(fContentDriver); 978 if(fDTDDriver == null) 979 fDTDDriver = new DTDDriver(); 980 return fDTDDriver.next(); 981 } 982 } 983 984 // Send endDTD() call if: 985 // a) systemId is null or if an external subset resolver could not locate an external subset. 986 // b) "load-external-dtd" and validation are false 987 // c) DTD grammar is cached 988 989 // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD 990 // in SAX this results in 2 events: startDTD, endDTD 991 if (fDTDScanner != null) { 992 fDTDScanner.setInputSource(null); 993 } 994 setScannerState(SCANNER_STATE_PROLOG); 995 return XMLEvent.DTD; 996 } 997 998 case SCANNER_STATE_CONTENT: { 999 reportFatalError("ContentIllegalInProlog", null); 1000 fEntityScanner.scanChar(); 1001 } 1002 case SCANNER_STATE_REFERENCE: { 1003 reportFatalError("ReferenceIllegalInProlog", null); 1004 } 1005 1006 /** 1007 * if (complete) { 1008 * if (fEntityScanner.scanChar() != '<') { 1009 * reportFatalError("RootElementRequired", null); 1010 * } 1011 * setScannerState(SCANNER_STATE_ROOT_ELEMENT); 1012 * setDriver(fContentDriver); 1013 * } 1014 */ 1015 } 1016 } 1017 // premature end of file 1018 catch (EOFException e) { 1019 reportFatalError("PrematureEOF", null); 1020 //xxx what should be returned here.... ??? 1021 return -1 ; 1022 //throw e; 1023 } 1024 //xxx what should be returned here.... ??? 1025 return -1; 1026 1027 } 1028 1029 1030 } // class PrologDriver 1031 1032 /** 1033 * Driver to handle the internal and external DTD subsets. 1034 * 1035 * @author Andy Clark, IBM 1036 */ 1037 protected final class DTDDriver 1038 implements Driver { 1039 1040 // 1041 // Driver methods 1042 // 1043 1044 public int next() throws IOException, XNIException{ 1045 // throw new XNIException("DTD Parsing is currently not supported"); 1046 if(DEBUG_NEXT){ 1047 System.out.println("Now in DTD Driver"); 1048 } 1049 1050 dispatch(true); 1051 1052 if(DEBUG_NEXT){ 1053 System.out.println("After calling dispatch(true) -- At this point whole DTD is read."); 1054 } 1055 1056 //xxx: remove this hack and align this with reusing DTD components 1057 //currently this routine will only be executed from Stax 1058 if(fPropertyManager != null){ 1059 dtdGrammarUtil = new DTDGrammarUtil(((XMLDTDScannerImpl)fDTDScanner).getGrammar(),fSymbolTable, fNamespaceContext); 1060 } 1061 1062 return XMLEvent.DTD ; 1063 } 1064 1065 /** 1066 * Dispatch an XML "event". 1067 * 1068 * @param complete True if this driver is intended to scan 1069 * and dispatch as much as possible. 1070 * 1071 * @return True if there is more to dispatch either from this 1072 * or a another driver. 1073 * 1074 * @throws IOException Thrown on i/o error. 1075 * @throws XNIException Thrown on parse error. 1076 */ 1077 public boolean dispatch(boolean complete) 1078 throws IOException, XNIException { 1079 fEntityManager.setEntityHandler(null); 1080 try { 1081 boolean again; 1082 XMLResourceIdentifierImpl resourceIdentifier = new XMLResourceIdentifierImpl(); 1083 if( fDTDScanner == null){ 1084 1085 if (fEntityManager.getEntityScanner() instanceof XML11EntityScanner){ 1086 fDTDScanner = new XML11DTDScannerImpl(); 1087 } else 1088 1089 fDTDScanner = new XMLDTDScannerImpl(); 1090 1091 ((XMLDTDScannerImpl)fDTDScanner).reset(fPropertyManager); 1092 } 1093 do { 1094 again = false; 1095 switch (fScannerState) { 1096 case SCANNER_STATE_DTD_INTERNAL_DECLS: { 1097 // REVISIT: Should there be a feature for 1098 // the "complete" parameter? 1099 boolean completeDTD = true; 1100 1101 boolean moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD); 1102 Entity entity = fEntityScanner.getCurrentEntity(); 1103 if(entity instanceof Entity.ScannedEntity){ 1104 fEndPos=((Entity.ScannedEntity)entity).position; 1105 } 1106 fReadingDTD=false; 1107 if (!moreToScan) { 1108 // end doctype declaration 1109 if (!fEntityScanner.skipChar(']')) { 1110 reportFatalError("EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET", 1111 null); 1112 } 1113 fEntityScanner.skipSpaces(); 1114 if (!fEntityScanner.skipChar('>')) { 1115 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 1116 } 1117 fMarkupDepth--; 1118 1119 if (!fSupportDTD) { 1120 //simply reset the entity store without having to mess around 1121 //with the DTD Scanner code 1122 fEntityStore = fEntityManager.getEntityStore(); 1123 fEntityStore.reset(); 1124 } else { 1125 // scan external subset next unless we are ignoring DTDs 1126 if (fDoctypeSystemId != null && (fValidation || fLoadExternalDTD)) { 1127 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 1128 break; 1129 } 1130 } 1131 1132 setEndDTDScanState(); 1133 return true; 1134 1135 } 1136 break; 1137 } 1138 case SCANNER_STATE_DTD_EXTERNAL: { 1139 /** 1140 fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1141 fDTDDescription.setRootName(fDoctypeName); 1142 XMLInputSource xmlInputSource = 1143 fEntityManager.resolveEntity(fDTDDescription); 1144 fDTDScanner.setInputSource(xmlInputSource); 1145 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1146 again = true; 1147 break; 1148 */ 1149 1150 resourceIdentifier.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1151 XMLInputSource xmlInputSource = null ; 1152 StaxXMLInputSource staxInputSource = fEntityManager.resolveEntityAsPerStax(resourceIdentifier); 1153 1154 // Check access permission. If the source is resolved by a resolver, the check is skipped. 1155 if (!staxInputSource.hasResolver()) { 1156 String accessError = checkAccess(fDoctypeSystemId, fAccessExternalDTD); 1157 if (accessError != null) { 1158 reportFatalError("AccessExternalDTD", new Object[]{ SecuritySupport.sanitizePath(fDoctypeSystemId), accessError }); 1159 } 1160 } 1161 xmlInputSource = staxInputSource.getXMLInputSource(); 1162 fDTDScanner.setInputSource(xmlInputSource); 1163 if (fEntityScanner.fCurrentEntity != null) { 1164 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1165 } else { 1166 setScannerState(SCANNER_STATE_PROLOG); 1167 } 1168 again = true; 1169 break; 1170 } 1171 case SCANNER_STATE_DTD_EXTERNAL_DECLS: { 1172 // REVISIT: Should there be a feature for 1173 // the "complete" parameter? 1174 boolean completeDTD = true; 1175 boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD); 1176 if (!moreToScan) { 1177 setEndDTDScanState(); 1178 return true; 1179 } 1180 break; 1181 } 1182 case SCANNER_STATE_PROLOG : { 1183 // skip entity decls 1184 setEndDTDScanState(); 1185 return true; 1186 } 1187 default: { 1188 throw new XNIException("DTDDriver#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')'); 1189 } 1190 } 1191 } while (complete || again); 1192 } 1193 1194 // premature end of file 1195 catch (EOFException e) { 1196 e.printStackTrace(); 1197 reportFatalError("PrematureEOF", null); 1198 return false; 1199 //throw e; 1200 } 1201 1202 // cleanup 1203 finally { 1204 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1205 } 1206 1207 return true; 1208 1209 } 1210 1211 // dispatch(boolean):boolean 1212 1213 } // class DTDDriver 1214 1215 /** 1216 * Driver to handle content scanning. 1217 * 1218 * @author Andy Clark, IBM 1219 * @author Eric Ye, IBM 1220 */ 1221 protected class ContentDriver 1222 extends FragmentContentDriver { 1223 1224 // 1225 // Protected methods 1226 // 1227 1228 // hooks 1229 1230 // NOTE: These hook methods are added so that the full document 1231 // scanner can share the majority of code with this class. 1232 1233 /** 1234 * Scan for DOCTYPE hook. This method is a hook for subclasses 1235 * to add code to handle scanning for a the "DOCTYPE" string 1236 * after the string "<!" has been scanned. 1237 * 1238 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 1239 * was not scanned. 1240 */ 1241 protected boolean scanForDoctypeHook() 1242 throws IOException, XNIException { 1243 1244 if (fEntityScanner.skipString(DOCTYPE)) { 1245 setScannerState(SCANNER_STATE_DOCTYPE); 1246 // fEntityScanner.markStartOfDTD(); 1247 return true; 1248 } 1249 return false; 1250 1251 } // scanForDoctypeHook():boolean 1252 1253 /** 1254 * Element depth iz zero. This methos is a hook for subclasses 1255 * to add code to handle when the element depth hits zero. When 1256 * scanning a document fragment, an element depth of zero is 1257 * normal. However, when scanning a full XML document, the 1258 * scanner must handle the trailing miscellanous section of 1259 * the document after the end of the document's root element. 1260 * 1261 * @return True if the caller should stop and return true which 1262 * allows the scanner to switch to a new scanning 1263 * driver. A return value of false indicates that 1264 * the content driver should continue as normal. 1265 */ 1266 protected boolean elementDepthIsZeroHook() 1267 throws IOException, XNIException { 1268 1269 setScannerState(SCANNER_STATE_TRAILING_MISC); 1270 setDriver(fTrailingMiscDriver); 1271 return true; 1272 1273 } // elementDepthIsZeroHook():boolean 1274 1275 /** 1276 * Scan for root element hook. This method is a hook for 1277 * subclasses to add code that handles scanning for the root 1278 * element. When scanning a document fragment, there is no 1279 * "root" element. However, when scanning a full XML document, 1280 * the scanner must handle the root element specially. 1281 * 1282 * @return True if the caller should stop and return true which 1283 * allows the scanner to switch to a new scanning 1284 * driver. A return value of false indicates that 1285 * the content driver should continue as normal. 1286 */ 1287 protected boolean scanRootElementHook() 1288 throws IOException, XNIException { 1289 1290 if (scanStartElement()) { 1291 setScannerState(SCANNER_STATE_TRAILING_MISC); 1292 setDriver(fTrailingMiscDriver); 1293 return true; 1294 } 1295 return false; 1296 1297 } // scanRootElementHook():boolean 1298 1299 /** 1300 * End of file hook. This method is a hook for subclasses to 1301 * add code that handles the end of file. The end of file in 1302 * a document fragment is OK if the markup depth is zero. 1303 * However, when scanning a full XML document, an end of file 1304 * is always premature. 1305 */ 1306 protected void endOfFileHook(EOFException e) 1307 throws IOException, XNIException { 1308 1309 reportFatalError("PrematureEOF", null); 1310 // in case continue-after-fatal-error set, should not do this... 1311 //throw e; 1312 1313 } // endOfFileHook() 1314 1315 protected void resolveExternalSubsetAndRead() 1316 throws IOException, XNIException { 1317 1318 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 1319 fDTDDescription.setRootName(fElementQName.rawname); 1320 XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 1321 1322 if (src != null) { 1323 fDoctypeName = fElementQName.rawname; 1324 fDoctypePublicId = src.getPublicId(); 1325 fDoctypeSystemId = src.getSystemId(); 1326 // call document handler 1327 if (fDocumentHandler != null) { 1328 // This inserts a doctypeDecl event into the stream though no 1329 // DOCTYPE existed in the instance document. 1330 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 1331 } 1332 try { 1333 fDTDScanner.setInputSource(src); 1334 while (fDTDScanner.scanDTDExternalSubset(true)); 1335 } finally { 1336 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1337 } 1338 } 1339 } // resolveExternalSubsetAndRead() 1340 1341 1342 1343 } // class ContentDriver 1344 1345 /** 1346 * Driver to handle trailing miscellaneous section scanning. 1347 * 1348 * @author Andy Clark, IBM 1349 * @author Eric Ye, IBM 1350 */ 1351 protected final class TrailingMiscDriver 1352 implements Driver { 1353 1354 // 1355 // Driver methods 1356 // 1357 public int next() throws IOException, XNIException{ 1358 //this could for cases like <foo/> 1359 //look at scanRootElementHook 1360 if(fEmptyElement){ 1361 fEmptyElement = false; 1362 return XMLEvent.END_ELEMENT; 1363 } 1364 1365 try { 1366 if(fScannerState == SCANNER_STATE_TERMINATED){ 1367 return XMLEvent.END_DOCUMENT ;} 1368 do { 1369 switch (fScannerState) { 1370 case SCANNER_STATE_TRAILING_MISC: { 1371 1372 fEntityScanner.skipSpaces(); 1373 //we should have reached the end of the document in 1374 //most cases. 1375 if(fScannerState == SCANNER_STATE_TERMINATED ){ 1376 return XMLEvent.END_DOCUMENT ; 1377 } 1378 if (fEntityScanner.skipChar('<')) { 1379 setScannerState(SCANNER_STATE_START_OF_MARKUP); 1380 } else { 1381 setScannerState(SCANNER_STATE_CONTENT); 1382 } 1383 break; 1384 } 1385 case SCANNER_STATE_START_OF_MARKUP: { 1386 fMarkupDepth++; 1387 if (fEntityScanner.skipChar('?')) { 1388 setScannerState(SCANNER_STATE_PI); 1389 } else if (fEntityScanner.skipChar('!')) { 1390 setScannerState(SCANNER_STATE_COMMENT); 1391 } else if (fEntityScanner.skipChar('/')) { 1392 reportFatalError("MarkupNotRecognizedInMisc", 1393 null); 1394 } else if (XMLChar.isNameStart(fEntityScanner.peekChar())) { 1395 reportFatalError("MarkupNotRecognizedInMisc", 1396 null); 1397 scanStartElement(); 1398 setScannerState(SCANNER_STATE_CONTENT); 1399 } else { 1400 reportFatalError("MarkupNotRecognizedInMisc", 1401 null); 1402 } 1403 break; 1404 } 1405 } 1406 }while(fScannerState == SCANNER_STATE_START_OF_MARKUP || fScannerState == SCANNER_STATE_TRAILING_MISC); 1407 if(DEBUG_NEXT){ 1408 System.out.println("State set by deciding while loop [TrailingMiscellaneous] is = " + getScannerStateName(fScannerState)); 1409 } 1410 switch (fScannerState){ 1411 case SCANNER_STATE_PI: { 1412 fContentBuffer.clear(); 1413 scanPI(fContentBuffer); 1414 setScannerState(SCANNER_STATE_TRAILING_MISC); 1415 return XMLEvent.PROCESSING_INSTRUCTION ; 1416 } 1417 case SCANNER_STATE_COMMENT: { 1418 if (!fEntityScanner.skipString(COMMENTSTRING)) { 1419 reportFatalError("InvalidCommentStart", null); 1420 } 1421 scanComment(); 1422 setScannerState(SCANNER_STATE_TRAILING_MISC); 1423 return XMLEvent.COMMENT; 1424 } 1425 case SCANNER_STATE_CONTENT: { 1426 int ch = fEntityScanner.peekChar(); 1427 if (ch == -1) { 1428 setScannerState(SCANNER_STATE_TERMINATED); 1429 return XMLEvent.END_DOCUMENT ; 1430 } else{ 1431 reportFatalError("ContentIllegalInTrailingMisc", 1432 null); 1433 fEntityScanner.scanChar(); 1434 setScannerState(SCANNER_STATE_TRAILING_MISC); 1435 return XMLEvent.CHARACTERS; 1436 } 1437 1438 } 1439 case SCANNER_STATE_REFERENCE: { 1440 reportFatalError("ReferenceIllegalInTrailingMisc", 1441 null); 1442 setScannerState(SCANNER_STATE_TRAILING_MISC); 1443 return XMLEvent.ENTITY_REFERENCE ; 1444 } 1445 case SCANNER_STATE_TERMINATED: { 1446 //there can't be any element after SCANNER_STATE_TERMINATED or when the parser 1447 //has reached the end of document 1448 setScannerState(SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION); 1449 //xxx what to do when the scanner has reached the terminating state. 1450 return XMLEvent.END_DOCUMENT ; 1451 } 1452 case SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION:{ 1453 throw new java.util.NoSuchElementException("No more events to be parsed"); 1454 } 1455 default: throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 1456 }//switch 1457 1458 } catch (EOFException e) { 1459 // NOTE: This is the only place we're allowed to reach 1460 // the real end of the document stream. Unless the 1461 // end of file was reached prematurely. 1462 if (fMarkupDepth != 0) { 1463 reportFatalError("PrematureEOF", null); 1464 return -1; 1465 //throw e; 1466 } 1467 //System.out.println("EOFException thrown") ; 1468 setScannerState(SCANNER_STATE_TERMINATED); 1469 } 1470 1471 return XMLEvent.END_DOCUMENT; 1472 1473 }//next 1474 1475 } // class TrailingMiscDriver 1476 1477 /** 1478 * Implements XMLBufferListener interface. 1479 */ 1480 1481 1482 /** 1483 * receives callbacks from {@link XMLEntityReader } when buffer 1484 * is being changed. 1485 * @param refreshPosition 1486 */ 1487 public void refresh(int refreshPosition){ 1488 super.refresh(refreshPosition); 1489 if(fReadingDTD){ 1490 Entity entity = fEntityScanner.getCurrentEntity(); 1491 if(entity instanceof Entity.ScannedEntity){ 1492 fEndPos=((Entity.ScannedEntity)entity).position; 1493 } 1494 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 1495 fStartPos = refreshPosition; 1496 } 1497 } 1498 1499 } // class XMLDocumentScannerImpl