1 /* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDDescription; 25 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 26 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 27 import com.sun.org.apache.xerces.internal.util.XMLChar; 28 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 29 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 30 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 31 import com.sun.org.apache.xerces.internal.xni.Augmentations; 32 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 33 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 34 import com.sun.org.apache.xerces.internal.xni.XNIException; 35 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 36 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 37 import com.sun.org.apache.xerces.internal.xni.parser.XMLDTDScanner; 38 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 39 import com.sun.xml.internal.stream.Entity; 40 import com.sun.xml.internal.stream.StaxXMLInputSource; 41 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 42 import java.io.EOFException; 43 import java.io.IOException; 44 import javax.xml.stream.XMLInputFactory; 45 import javax.xml.stream.events.XMLEvent; 46 47 48 /** 49 * This class is responsible for scanning XML document structure 50 * and content. 51 * 52 * This class has been modified as per the new design which is more suited to 53 * efficiently build pull parser. Lot of improvements have been done and 54 * the code has been added to support stax functionality/features. 55 * 56 * @author Neeraj Bajaj, Sun Microsystems 57 * @author K.Venugopal, Sun Microsystems 58 * @author Glenn Marcy, IBM 59 * @author Andy Clark, IBM 60 * @author Arnaud Le Hors, IBM 61 * @author Eric Ye, IBM 62 * @author Sunitha Reddy, Sun Microsystems 63 * 64 * Refer to the table in unit-test javax.xml.stream.XMLStreamReaderTest.SupportDTD for changes 65 * related to property SupportDTD. 66 * @author Joe Wang, Sun Microsystems 67 */ 68 public class XMLDocumentScannerImpl 69 extends XMLDocumentFragmentScannerImpl{ 70 71 // 72 // Constants 73 // 74 75 // scanner states 76 77 /** Scanner state: XML declaration. */ 78 protected static final int SCANNER_STATE_XML_DECL = 42; 79 80 /** Scanner state: prolog. */ 81 protected static final int SCANNER_STATE_PROLOG = 43; 82 83 /** Scanner state: trailing misc. */ 84 protected static final int SCANNER_STATE_TRAILING_MISC = 44; 85 86 /** Scanner state: DTD internal declarations. */ 87 protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 45; 88 89 /** Scanner state: open DTD external subset. */ 90 protected static final int SCANNER_STATE_DTD_EXTERNAL = 46; 91 92 /** Scanner state: DTD external declarations. */ 93 protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 47; 94 95 /** Scanner state: NO MORE ELEMENTS. */ 96 protected static final int SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION = 48; 97 98 // feature identifiers 99 100 /** Property identifier document scanner: */ 101 protected static final String DOCUMENT_SCANNER = 102 Constants.XERCES_PROPERTY_PREFIX + Constants.DOCUMENT_SCANNER_PROPERTY; 103 104 /** Feature identifier: load external DTD. */ 105 protected static final String LOAD_EXTERNAL_DTD = 106 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 107 108 /** Feature identifier: load external DTD. */ 109 protected static final String DISALLOW_DOCTYPE_DECL_FEATURE = 110 Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE; 111 112 // property identifiers 113 114 /** Property identifier: DTD scanner. */ 115 protected static final String DTD_SCANNER = 116 Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY; 117 118 // property identifier: ValidationManager 119 protected static final String VALIDATION_MANAGER = 120 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 121 122 /** property identifier: NamespaceContext */ 123 protected static final String NAMESPACE_CONTEXT = 124 Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY; 125 126 // recognized features and properties 127 128 /** Recognized features. */ 129 private static final String[] RECOGNIZED_FEATURES = { 130 LOAD_EXTERNAL_DTD, 131 DISALLOW_DOCTYPE_DECL_FEATURE, 132 }; 133 134 /** Feature defaults. */ 135 private static final Boolean[] FEATURE_DEFAULTS = { 136 Boolean.TRUE, 137 Boolean.FALSE, 138 }; 139 140 /** Recognized properties. */ 141 private static final String[] RECOGNIZED_PROPERTIES = { 142 DTD_SCANNER, 143 VALIDATION_MANAGER 144 }; 145 146 /** Property defaults. */ 147 private static final Object[] PROPERTY_DEFAULTS = { 148 null, 149 null 150 }; 151 152 // 153 // Data((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 154 // 155 156 // properties 157 158 /** DTD scanner. */ 159 protected XMLDTDScanner fDTDScanner = null; 160 161 /** Validation manager . */ 162 //xxx: fValidationManager code needs to be added yet! 163 protected ValidationManager fValidationManager; 164 165 protected XMLStringBuffer fDTDDecl = null; 166 protected boolean fReadingDTD = false; 167 protected boolean fAddedListener = false; 168 169 // protected data 170 171 // other info 172 173 /** Doctype name. */ 174 protected String fDoctypeName; 175 176 /** Doctype declaration public identifier. */ 177 protected String fDoctypePublicId; 178 179 /** Doctype declaration system identifier. */ 180 protected String fDoctypeSystemId; 181 182 /** Namespace support. */ 183 protected NamespaceContext fNamespaceContext = new NamespaceSupport(); 184 185 // features 186 187 /** Load external DTD. */ 188 protected boolean fLoadExternalDTD = true; 189 190 // state 191 192 /** Seen doctype declaration. */ 193 protected boolean fSeenDoctypeDecl; 194 195 protected boolean fScanEndElement; 196 197 //protected int fScannerLastState ; 198 199 // drivers 200 201 /** XML declaration driver. */ 202 protected Driver fXMLDeclDriver = new XMLDeclDriver(); 203 204 /** Prolog driver. */ 205 protected Driver fPrologDriver = new PrologDriver(); 206 207 /** DTD driver. */ 208 protected Driver fDTDDriver = null ; 209 210 /** Trailing miscellaneous section driver. */ 211 protected Driver fTrailingMiscDriver = new TrailingMiscDriver(); 212 protected int fStartPos = 0; 213 protected int fEndPos = 0; 214 protected boolean fSeenInternalSubset= false; 215 // temporary variables 216 217 /** Array of 3 strings. */ 218 private String[] fStrings = new String[3]; 219 220 /** External subset source. */ 221 private XMLInputSource fExternalSubsetSource = null; 222 223 /** A DTD Description. */ 224 private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null); 225 226 private static final char [] DOCTYPE = {'D','O','C','T','Y','P','E'}; 227 private static final char [] COMMENTSTRING = {'-','-'}; 228 229 // 230 // Constructors 231 // 232 233 /** Default constructor. */ 234 public XMLDocumentScannerImpl() {} // <init>() 235 236 237 // 238 // XMLDocumentScanner methods 239 // 240 241 242 /** 243 * Sets the input source. 244 * 245 * @param inputSource The input source. 246 * 247 * @throws IOException Thrown on i/o error. 248 */ 249 public void setInputSource(XMLInputSource inputSource) throws IOException { 250 fEntityManager.setEntityHandler(this); 251 //this starts a new entity and sets the current entity to the document entity. 252 fEntityManager.startDocumentEntity(inputSource); 253 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 254 setScannerState(XMLEvent.START_DOCUMENT); 255 } // setInputSource(XMLInputSource) 256 257 258 259 /**return the state of the scanner */ 260 public int getScannetState(){ 261 return fScannerState ; 262 } 263 264 265 266 267 public void reset(PropertyManager propertyManager) { 268 super.reset(propertyManager); 269 // other settings 270 fDoctypeName = null; 271 fDoctypePublicId = null; 272 fDoctypeSystemId = null; 273 fSeenDoctypeDecl = false; 274 fNamespaceContext.reset(); 275 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 276 277 // xerces features 278 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 279 setScannerState(XMLEvent.START_DOCUMENT); 280 setDriver(fXMLDeclDriver); 281 fSeenInternalSubset = false; 282 if(fDTDScanner != null){ 283 ((XMLDTDScannerImpl)fDTDScanner).reset(propertyManager); 284 } 285 fEndPos = 0; 286 fStartPos = 0; 287 if(fDTDDecl != null){ 288 fDTDDecl.clear(); 289 } 290 291 } 292 293 /** 294 * Resets the component. The component can query the component manager 295 * about any features and properties that affect the operation of the 296 * component. 297 * 298 * @param componentManager The component manager. 299 * 300 * @throws SAXException Thrown by component on initialization error. 301 * For example, if a feature or property is 302 * required for the operation of the component, the 303 * component manager may throw a 304 * SAXNotRecognizedException or a 305 * SAXNotSupportedException. 306 */ 307 public void reset(XMLComponentManager componentManager) 308 throws XMLConfigurationException { 309 310 super.reset(componentManager); 311 312 // other settings 313 fDoctypeName = null; 314 fDoctypePublicId = null; 315 fDoctypeSystemId = null; 316 fSeenDoctypeDecl = false; 317 fExternalSubsetSource = null; 318 319 // xerces features 320 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 321 fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE, false); 322 323 fNamespaces = componentManager.getFeature(NAMESPACES, true); 324 325 fSeenInternalSubset = false; 326 // xerces properties 327 fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER); 328 329 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 330 331 try { 332 fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT); 333 } 334 catch (XMLConfigurationException e) { } 335 if (fNamespaceContext == null) { 336 fNamespaceContext = new NamespaceSupport(); 337 } 338 fNamespaceContext.reset(); 339 340 fEndPos = 0; 341 fStartPos = 0; 342 if(fDTDDecl != null) 343 fDTDDecl.clear(); 344 345 346 //fEntityScanner.registerListener((XMLBufferListener)componentManager.getProperty(DOCUMENT_SCANNER)); 347 348 // setup driver 349 setScannerState(SCANNER_STATE_XML_DECL); 350 setDriver(fXMLDeclDriver); 351 352 } // reset(XMLComponentManager) 353 354 355 /** 356 * Returns a list of feature identifiers that are recognized by 357 * this component. This method may return null if no features 358 * are recognized by this component. 359 */ 360 public String[] getRecognizedFeatures() { 361 String[] featureIds = super.getRecognizedFeatures(); 362 int length = featureIds != null ? featureIds.length : 0; 363 String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length]; 364 if (featureIds != null) { 365 System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length); 366 } 367 System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length); 368 return combinedFeatureIds; 369 } // getRecognizedFeatures():String[] 370 371 /** 372 * Sets the state of a feature. This method is called by the component 373 * manager any time after reset when a feature changes state. 374 * <p> 375 * <strong>Note:</strong> Components should silently ignore features 376 * that do not affect the operation of the component. 377 * 378 * @param featureId The feature identifier. 379 * @param state The state of the feature. 380 * 381 * @throws SAXNotRecognizedException The component should not throw 382 * this exception. 383 * @throws SAXNotSupportedException The component should not throw 384 * this exception. 385 */ 386 public void setFeature(String featureId, boolean state) 387 throws XMLConfigurationException { 388 389 super.setFeature(featureId, state); 390 391 // Xerces properties 392 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 393 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 394 395 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 396 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 397 fLoadExternalDTD = state; 398 return; 399 } 400 else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() && 401 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) { 402 fDisallowDoctype = state; 403 return; 404 } 405 } 406 407 } // setFeature(String,boolean) 408 409 /** 410 * Returns a list of property identifiers that are recognized by 411 * this component. This method may return null if no properties 412 * are recognized by this component. 413 */ 414 public String[] getRecognizedProperties() { 415 String[] propertyIds = super.getRecognizedProperties(); 416 int length = propertyIds != null ? propertyIds.length : 0; 417 String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length]; 418 if (propertyIds != null) { 419 System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length); 420 } 421 System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length); 422 return combinedPropertyIds; 423 } // getRecognizedProperties():String[] 424 425 /** 426 * Sets the value of a property. This method is called by the component 427 * manager any time after reset when a property changes value. 428 * <p> 429 * <strong>Note:</strong> Components should silently ignore properties 430 * that do not affect the operation of the component. 431 * 432 * @param propertyId The property identifier. 433 * @param value The value of the property. 434 * 435 * @throws SAXNotRecognizedException The component should not throw 436 * this exception. 437 * @throws SAXNotSupportedException The component should not throw 438 * this exception. 439 */ 440 public void setProperty(String propertyId, Object value) 441 throws XMLConfigurationException { 442 443 super.setProperty(propertyId, value); 444 445 // Xerces properties 446 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 447 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 448 449 if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() && 450 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) { 451 fDTDScanner = (XMLDTDScanner)value; 452 } 453 if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() && 454 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) { 455 if (value != null) { 456 fNamespaceContext = (NamespaceContext)value; 457 } 458 } 459 460 return; 461 } 462 463 } // setProperty(String,Object) 464 465 /** 466 * Returns the default state for a feature, or null if this 467 * component does not want to report a default value for this 468 * feature. 469 * 470 * @param featureId The feature identifier. 471 * 472 * @since Xerces 2.2.0 473 */ 474 public Boolean getFeatureDefault(String featureId) { 475 476 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 477 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 478 return FEATURE_DEFAULTS[i]; 479 } 480 } 481 return super.getFeatureDefault(featureId); 482 } // getFeatureDefault(String):Boolean 483 484 /** 485 * Returns the default state for a property, or null if this 486 * component does not want to report a default value for this 487 * property. 488 * 489 * @param propertyId The property identifier. 490 * 491 * @since Xerces 2.2.0 492 */ 493 public Object getPropertyDefault(String propertyId) { 494 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 495 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 496 return PROPERTY_DEFAULTS[i]; 497 } 498 } 499 return super.getPropertyDefault(propertyId); 500 } // getPropertyDefault(String):Object 501 502 // 503 // XMLEntityHandler methods 504 // 505 506 /** 507 * This method notifies of the start of an entity. The DTD has the 508 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 509 * general entities are just specified by their name. 510 * 511 * @param name The name of the entity. 512 * @param identifier The resource identifier. 513 * @param encoding The auto-detected IANA encoding name of the entity 514 * stream. This value will be null in those situations 515 * where the entity encoding is not auto-detected (e.g. 516 * internal entities or a document entity that is 517 * parsed from a java.io.Reader). 518 * 519 * @throws XNIException Thrown by handler to signal an error. 520 */ 521 public void startEntity(String name, 522 XMLResourceIdentifier identifier, 523 String encoding, Augmentations augs) throws XNIException { 524 525 super.startEntity(name, identifier, encoding,augs); 526 527 //register current document scanner as a listener for XMLEntityScanner 528 fEntityScanner.registerListener(this); 529 530 // prepare to look for a TextDecl if external general entity 531 if (!name.equals("[xml]") && fEntityScanner.isExternal()) { 532 // Don't do this if we're skipping the entity! 533 if (augs == null || !((Boolean) augs.getItem(Constants.ENTITY_SKIPPED)).booleanValue()) { 534 setScannerState(SCANNER_STATE_TEXT_DECL); 535 } 536 } 537 538 // call handler 539 /** comment this part.. LOCATOR problem.. */ 540 if (fDocumentHandler != null && name.equals("[xml]")) { 541 fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null); 542 } 543 544 } // startEntity(String,identifier,String) 545 546 547 /** 548 * This method notifies the end of an entity. The DTD has the pseudo-name 549 * of "[dtd]" parameter entity names start with '%'; and general entities 550 * are just specified by their name. 551 * 552 * @param name The name of the entity. 553 * 554 * @throws XNIException Thrown by handler to signal an error. 555 */ 556 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 557 558 super.endEntity(name, augs); 559 560 if(name.equals("[xml]")){ 561 //if fMarkupDepth has reached 0. 562 //and driver is fTrailingMiscDriver (which 563 //handles end of document in normal case) 564 //set the scanner state of SCANNER_STATE_TERMINATED 565 if(fMarkupDepth == 0 && fDriver == fTrailingMiscDriver){ 566 //set the scanner set to SCANNER_STATE_TERMINATED 567 setScannerState(SCANNER_STATE_TERMINATED) ; 568 } else{ 569 //else we have reached the end of document prematurely 570 //so throw EOFException. 571 throw new java.io.EOFException(); 572 } 573 574 //this is taken care in wrapper which generates XNI callbacks, There are no next events 575 576 //if (fDocumentHandler != null) { 577 //fDocumentHandler.endDocument(null); 578 //} 579 } 580 } // endEntity(String) 581 582 583 public XMLStringBuffer getDTDDecl(){ 584 Entity entity = fEntityScanner.getCurrentEntity(); 585 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 586 if(fSeenInternalSubset) 587 fDTDDecl.append("]>"); 588 return fDTDDecl; 589 } 590 591 public String getCharacterEncodingScheme(){ 592 return fDeclaredEncoding; 593 } 594 595 /** return the next state on the input 596 * 597 * @return int 598 */ 599 600 public int next() throws IOException, XNIException { 601 return fDriver.next(); 602 } 603 604 //getNamespaceContext 605 public NamespaceContext getNamespaceContext(){ 606 return fNamespaceContext ; 607 } 608 609 610 611 // 612 // Protected methods 613 // 614 615 // driver factory methods 616 617 /** Creates a content driver. */ 618 protected Driver createContentDriver() { 619 return new ContentDriver(); 620 } // createContentDriver():Driver 621 622 // scanning methods 623 624 /** Scans a doctype declaration. */ 625 protected boolean scanDoctypeDecl(boolean supportDTD) throws IOException, XNIException { 626 627 // spaces 628 if (!fEntityScanner.skipSpaces()) { 629 reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL", 630 null); 631 } 632 633 // root element name 634 fDoctypeName = fEntityScanner.scanName(NameType.DOCTYPE); 635 if (fDoctypeName == null) { 636 reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null); 637 } 638 639 // external id 640 if (fEntityScanner.skipSpaces()) { 641 scanExternalID(fStrings, false); 642 fDoctypeSystemId = fStrings[0]; 643 fDoctypePublicId = fStrings[1]; 644 fEntityScanner.skipSpaces(); 645 } 646 647 fHasExternalDTD = fDoctypeSystemId != null; 648 649 // Attempt to locate an external subset with an external subset resolver. 650 if (supportDTD && !fHasExternalDTD && fExternalSubsetResolver != null) { 651 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 652 fDTDDescription.setRootName(fDoctypeName); 653 fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 654 fHasExternalDTD = fExternalSubsetSource != null; 655 } 656 657 // call handler 658 if (supportDTD && fDocumentHandler != null) { 659 // NOTE: I don't like calling the doctypeDecl callback until 660 // end of the *full* doctype line (including internal 661 // subset) is parsed correctly but SAX2 requires that 662 // it knows the root element name and public and system 663 // identifier for the startDTD call. -Ac 664 if (fExternalSubsetSource == null) { 665 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 666 } 667 else { 668 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null); 669 } 670 } 671 672 // is there an internal subset? 673 boolean internalSubset = true; 674 if (!fEntityScanner.skipChar('[', null)) { 675 internalSubset = false; 676 fEntityScanner.skipSpaces(); 677 if (!fEntityScanner.skipChar('>', null)) { 678 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 679 } 680 fMarkupDepth--; 681 } 682 return internalSubset; 683 684 } // scanDoctypeDecl():boolean 685 686 // 687 // Private methods 688 // 689 /** Set the scanner state after scanning DTD */ 690 protected void setEndDTDScanState() { 691 setScannerState(SCANNER_STATE_PROLOG); 692 setDriver(fPrologDriver); 693 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 694 fReadingDTD=false; 695 } 696 697 /** Returns the scanner state name. */ 698 protected String getScannerStateName(int state) { 699 700 switch (state) { 701 case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL"; 702 case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG"; 703 case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC"; 704 case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS"; 705 case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL"; 706 case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS"; 707 } 708 return super.getScannerStateName(state); 709 710 } // getScannerStateName(int):String 711 712 // 713 // Classes 714 // 715 716 /** 717 * Driver to handle XMLDecl scanning. 718 * 719 * This class has been modified as per the new design which is more suited to 720 * efficiently build pull parser. Lots of performance improvements have been done and 721 * the code has been added to support stax functionality/features. 722 * 723 * @author Neeraj Bajaj, Sun Microsystems. 724 * 725 * @author Andy Clark, IBM 726 */ 727 protected final class XMLDeclDriver 728 implements Driver { 729 730 // 731 // Driver methods 732 // 733 734 735 public int next() throws IOException, XNIException { 736 if(DEBUG_NEXT){ 737 System.out.println("NOW IN XMLDeclDriver"); 738 } 739 740 // next driver is prolog regardless of whether there 741 // is an XMLDecl in this document 742 setScannerState(SCANNER_STATE_PROLOG); 743 setDriver(fPrologDriver); 744 745 //System.out.println("fEntityScanner = " + fEntityScanner); 746 // scan XMLDecl 747 try { 748 if (fEntityScanner.skipString(xmlDecl)) { 749 fMarkupDepth++; 750 // NOTE: special case where document starts with a PI 751 // whose name starts with "xml" (e.g. "xmlfoo") 752 if (XMLChar.isName(fEntityScanner.peekChar())) { 753 fStringBuffer.clear(); 754 fStringBuffer.append("xml"); 755 while (XMLChar.isName(fEntityScanner.peekChar())) { 756 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 757 } 758 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 759 //this function should fill the data.. and set the fEvent object to this event. 760 fContentBuffer.clear() ; 761 scanPIData(target, fContentBuffer); 762 //REVISIT:where else we can set this value to 'true' 763 fEntityManager.fCurrentEntity.mayReadChunks = true; 764 //return PI event since PI was encountered 765 return XMLEvent.PROCESSING_INSTRUCTION ; 766 } 767 // standard XML declaration 768 else { 769 scanXMLDeclOrTextDecl(false); 770 //REVISIT:where else we can set this value to 'true' 771 fEntityManager.fCurrentEntity.mayReadChunks = true; 772 return XMLEvent.START_DOCUMENT; 773 } 774 } else{ 775 //REVISIT:where else we can set this value to 'true' 776 fEntityManager.fCurrentEntity.mayReadChunks = true; 777 //In both case return the START_DOCUMENT. ony difference is that first block will 778 //cosume the XML declaration if any. 779 return XMLEvent.START_DOCUMENT; 780 } 781 782 783 //START_OF_THE_DOCUMENT 784 785 786 } 787 788 // premature end of file 789 catch (EOFException e) { 790 reportFatalError("PrematureEOF", null); 791 return -1; 792 //throw e; 793 } 794 795 } 796 } // class XMLDeclDriver 797 798 /** 799 * Driver to handle prolog scanning. 800 * 801 * @author Andy Clark, IBM 802 */ 803 protected final class PrologDriver 804 implements Driver { 805 806 /** 807 * Drives the parser to the next state/event on the input. Parser is guaranteed 808 * to stop at the next state/event. 809 * 810 * Internally XML document is divided into several states. Each state represents 811 * a sections of XML document. When this functions returns normally, it has read 812 * the section of XML document and returns the state corresponding to section of 813 * document which has been read. For optimizations, a particular driver 814 * can read ahead of the section of document (state returned) just read and 815 * can maintain a different internal state. 816 * 817 * @return state representing the section of document just read. 818 * 819 * @throws IOException Thrown on i/o error. 820 * @throws XNIException Thrown on parse error. 821 */ 822 823 public int next() throws IOException, XNIException { 824 //System.out.println("here in next"); 825 826 if(DEBUG_NEXT){ 827 System.out.println("NOW IN PrologDriver"); 828 } 829 try { 830 do { 831 switch (fScannerState) { 832 case SCANNER_STATE_PROLOG: { 833 fEntityScanner.skipSpaces(); 834 if (fEntityScanner.skipChar('<', null)) { 835 setScannerState(SCANNER_STATE_START_OF_MARKUP); 836 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 837 setScannerState(SCANNER_STATE_REFERENCE); 838 } else { 839 setScannerState(SCANNER_STATE_CONTENT); 840 } 841 break; 842 } 843 844 case SCANNER_STATE_START_OF_MARKUP: { 845 fMarkupDepth++; 846 if (isValidNameStartChar(fEntityScanner.peekChar()) || 847 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 848 setScannerState(SCANNER_STATE_ROOT_ELEMENT); 849 setDriver(fContentDriver); 850 //from now onwards this would be handled by fContentDriver,in the same next() call 851 return fContentDriver.next(); 852 } else if (fEntityScanner.skipChar('!', null)) { 853 if (fEntityScanner.skipChar('-', null)) { 854 if (!fEntityScanner.skipChar('-', null)) { 855 reportFatalError("InvalidCommentStart", 856 null); 857 } 858 setScannerState(SCANNER_STATE_COMMENT); 859 } else if (fEntityScanner.skipString(DOCTYPE)) { 860 setScannerState(SCANNER_STATE_DOCTYPE); 861 Entity entity = fEntityScanner.getCurrentEntity(); 862 if(entity instanceof Entity.ScannedEntity){ 863 fStartPos=((Entity.ScannedEntity)entity).position; 864 } 865 fReadingDTD=true; 866 if(fDTDDecl == null) 867 fDTDDecl = new XMLStringBuffer(); 868 fDTDDecl.append("<!DOCTYPE"); 869 870 } else { 871 reportFatalError("MarkupNotRecognizedInProlog", 872 null); 873 } 874 } else if (fEntityScanner.skipChar('?', null)) { 875 setScannerState(SCANNER_STATE_PI); 876 } else { 877 reportFatalError("MarkupNotRecognizedInProlog", 878 null); 879 } 880 break; 881 } 882 } 883 } while (fScannerState == SCANNER_STATE_PROLOG || fScannerState == SCANNER_STATE_START_OF_MARKUP ); 884 885 switch(fScannerState){ 886 /** 887 //this part is handled by FragmentContentHandler 888 case SCANNER_STATE_ROOT_ELEMENT: { 889 //we have read '<' and beginning of reading the start element tag 890 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 891 setDriver(fContentDriver); 892 //from now onwards this would be handled by fContentDriver,in the same next() call 893 return fContentDriver.next(); 894 } 895 */ 896 case SCANNER_STATE_COMMENT: { 897 //this function fills the data.. 898 scanComment(); 899 setScannerState(SCANNER_STATE_PROLOG); 900 return XMLEvent.COMMENT; 901 //setScannerState(SCANNER_STATE_PROLOG); 902 //break; 903 } 904 case SCANNER_STATE_PI: { 905 fContentBuffer.clear() ; 906 scanPI(fContentBuffer); 907 setScannerState(SCANNER_STATE_PROLOG); 908 return XMLEvent.PROCESSING_INSTRUCTION; 909 } 910 911 case SCANNER_STATE_DOCTYPE: { 912 if (fDisallowDoctype) { 913 reportFatalError("DoctypeNotAllowed", null); 914 } 915 916 if (fSeenDoctypeDecl) { 917 reportFatalError("AlreadySeenDoctype", null); 918 } 919 fSeenDoctypeDecl = true; 920 921 // scanDoctypeDecl() sends XNI doctypeDecl event that 922 // in SAX is converted to startDTD() event. 923 if (scanDoctypeDecl(fSupportDTD)) { 924 //allow parsing of entity decls to continue in order to stay well-formed 925 setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS); 926 fSeenInternalSubset = true; 927 if(fDTDDriver == null){ 928 fDTDDriver = new DTDDriver(); 929 } 930 setDriver(fContentDriver); 931 //always return DTD event, the event however, will not contain any entities 932 return fDTDDriver.next(); 933 } 934 935 if(fSeenDoctypeDecl){ 936 Entity entity = fEntityScanner.getCurrentEntity(); 937 if(entity instanceof Entity.ScannedEntity){ 938 fEndPos = ((Entity.ScannedEntity)entity).position; 939 } 940 fReadingDTD = false; 941 } 942 943 // handle external subset 944 if (fDoctypeSystemId != null) { 945 if (((fValidation || fLoadExternalDTD) 946 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 947 if (fSupportDTD) { 948 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 949 } else { 950 setScannerState(SCANNER_STATE_PROLOG); 951 } 952 953 setDriver(fContentDriver); 954 if(fDTDDriver == null) { 955 fDTDDriver = new DTDDriver(); 956 } 957 958 return fDTDDriver.next(); 959 } 960 } 961 else if (fExternalSubsetSource != null) { 962 if (((fValidation || fLoadExternalDTD) 963 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 964 // This handles the case of a DOCTYPE that had neither an internal subset or an external subset. 965 fDTDScanner.setInputSource(fExternalSubsetSource); 966 fExternalSubsetSource = null; 967 if (fSupportDTD) 968 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 969 else 970 setScannerState(SCANNER_STATE_PROLOG); 971 setDriver(fContentDriver); 972 if(fDTDDriver == null) 973 fDTDDriver = new DTDDriver(); 974 return fDTDDriver.next(); 975 } 976 } 977 978 // Send endDTD() call if: 979 // a) systemId is null or if an external subset resolver could not locate an external subset. 980 // b) "load-external-dtd" and validation are false 981 // c) DTD grammar is cached 982 983 // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD 984 // in SAX this results in 2 events: startDTD, endDTD 985 if (fDTDScanner != null) { 986 fDTDScanner.setInputSource(null); 987 } 988 setScannerState(SCANNER_STATE_PROLOG); 989 return XMLEvent.DTD; 990 } 991 992 case SCANNER_STATE_CONTENT: { 993 reportFatalError("ContentIllegalInProlog", null); 994 fEntityScanner.scanChar(null); 995 } 996 case SCANNER_STATE_REFERENCE: { 997 reportFatalError("ReferenceIllegalInProlog", null); 998 } 999 1000 /** 1001 * if (complete) { 1002 * if (fEntityScanner.scanChar() != '<') { 1003 * reportFatalError("RootElementRequired", null); 1004 * } 1005 * setScannerState(SCANNER_STATE_ROOT_ELEMENT); 1006 * setDriver(fContentDriver); 1007 * } 1008 */ 1009 } 1010 } 1011 // premature end of file 1012 catch (EOFException e) { 1013 reportFatalError("PrematureEOF", null); 1014 //xxx what should be returned here.... ??? 1015 return -1 ; 1016 //throw e; 1017 } 1018 //xxx what should be returned here.... ??? 1019 return -1; 1020 1021 } 1022 1023 1024 } // class PrologDriver 1025 1026 /** 1027 * Driver to handle the internal and external DTD subsets. 1028 * 1029 * @author Andy Clark, IBM 1030 */ 1031 protected final class DTDDriver 1032 implements Driver { 1033 1034 // 1035 // Driver methods 1036 // 1037 1038 public int next() throws IOException, XNIException{ 1039 // throw new XNIException("DTD Parsing is currently not supported"); 1040 if(DEBUG_NEXT){ 1041 System.out.println("Now in DTD Driver"); 1042 } 1043 1044 dispatch(true); 1045 1046 if(DEBUG_NEXT){ 1047 System.out.println("After calling dispatch(true) -- At this point whole DTD is read."); 1048 } 1049 1050 //xxx: remove this hack and align this with reusing DTD components 1051 //currently this routine will only be executed from Stax 1052 if(fPropertyManager != null){ 1053 dtdGrammarUtil = new DTDGrammarUtil(((XMLDTDScannerImpl)fDTDScanner).getGrammar(),fSymbolTable, fNamespaceContext); 1054 } 1055 1056 return XMLEvent.DTD ; 1057 } 1058 1059 /** 1060 * Dispatch an XML "event". 1061 * 1062 * @param complete True if this driver is intended to scan 1063 * and dispatch as much as possible. 1064 * 1065 * @return True if there is more to dispatch either from this 1066 * or a another driver. 1067 * 1068 * @throws IOException Thrown on i/o error. 1069 * @throws XNIException Thrown on parse error. 1070 */ 1071 public boolean dispatch(boolean complete) 1072 throws IOException, XNIException { 1073 fEntityManager.setEntityHandler(null); 1074 try { 1075 boolean again; 1076 XMLResourceIdentifierImpl resourceIdentifier = new XMLResourceIdentifierImpl(); 1077 if( fDTDScanner == null){ 1078 1079 if (fEntityManager.getEntityScanner() instanceof XML11EntityScanner){ 1080 fDTDScanner = new XML11DTDScannerImpl(); 1081 } else 1082 1083 fDTDScanner = new XMLDTDScannerImpl(); 1084 1085 ((XMLDTDScannerImpl)fDTDScanner).reset(fPropertyManager); 1086 } 1087 1088 fDTDScanner.setLimitAnalyzer(fLimitAnalyzer); 1089 do { 1090 again = false; 1091 switch (fScannerState) { 1092 case SCANNER_STATE_DTD_INTERNAL_DECLS: { 1093 boolean moreToScan = false; 1094 if (!fDTDScanner.skipDTD(fSupportDTD)) { 1095 // REVISIT: Should there be a feature for 1096 // the "complete" parameter? 1097 boolean completeDTD = true; 1098 1099 moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD); 1100 } 1101 Entity entity = fEntityScanner.getCurrentEntity(); 1102 if(entity instanceof Entity.ScannedEntity){ 1103 fEndPos=((Entity.ScannedEntity)entity).position; 1104 } 1105 fReadingDTD=false; 1106 if (!moreToScan) { 1107 // end doctype declaration 1108 if (!fEntityScanner.skipChar(']', null)) { 1109 reportFatalError("DoctypedeclNotClosed", new Object[]{fDoctypeName}); 1110 } 1111 fEntityScanner.skipSpaces(); 1112 if (!fEntityScanner.skipChar('>', null)) { 1113 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 1114 } 1115 fMarkupDepth--; 1116 1117 if (!fSupportDTD) { 1118 //simply reset the entity store without having to mess around 1119 //with the DTD Scanner code 1120 fEntityStore = fEntityManager.getEntityStore(); 1121 fEntityStore.reset(); 1122 } else { 1123 // scan external subset next unless we are ignoring DTDs 1124 if (fDoctypeSystemId != null && (fValidation || fLoadExternalDTD)) { 1125 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 1126 break; 1127 } 1128 } 1129 1130 setEndDTDScanState(); 1131 return true; 1132 1133 } 1134 break; 1135 } 1136 case SCANNER_STATE_DTD_EXTERNAL: { 1137 /** 1138 fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1139 fDTDDescription.setRootName(fDoctypeName); 1140 XMLInputSource xmlInputSource = 1141 fEntityManager.resolveEntity(fDTDDescription); 1142 fDTDScanner.setInputSource(xmlInputSource); 1143 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1144 again = true; 1145 break; 1146 */ 1147 1148 resourceIdentifier.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1149 XMLInputSource xmlInputSource = null ; 1150 StaxXMLInputSource staxInputSource = fEntityManager.resolveEntityAsPerStax(resourceIdentifier); 1151 1152 // Check access permission. If the source is resolved by a resolver, the check is skipped. 1153 if (!staxInputSource.hasResolver()) { 1154 String accessError = checkAccess(fDoctypeSystemId, fAccessExternalDTD); 1155 if (accessError != null) { 1156 reportFatalError("AccessExternalDTD", new Object[]{ SecuritySupport.sanitizePath(fDoctypeSystemId), accessError }); 1157 } 1158 } 1159 xmlInputSource = staxInputSource.getXMLInputSource(); 1160 fDTDScanner.setInputSource(xmlInputSource); 1161 if (fEntityScanner.fCurrentEntity != null) { 1162 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1163 } else { 1164 setScannerState(SCANNER_STATE_PROLOG); 1165 } 1166 again = true; 1167 break; 1168 } 1169 case SCANNER_STATE_DTD_EXTERNAL_DECLS: { 1170 // REVISIT: Should there be a feature for 1171 // the "complete" parameter? 1172 boolean completeDTD = true; 1173 boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD); 1174 if (!moreToScan) { 1175 setEndDTDScanState(); 1176 return true; 1177 } 1178 break; 1179 } 1180 case SCANNER_STATE_PROLOG : { 1181 // skip entity decls 1182 setEndDTDScanState(); 1183 return true; 1184 } 1185 default: { 1186 throw new XNIException("DTDDriver#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')'); 1187 } 1188 } 1189 } while (complete || again); 1190 } 1191 1192 // premature end of file 1193 catch (EOFException e) { 1194 e.printStackTrace(); 1195 reportFatalError("PrematureEOF", null); 1196 return false; 1197 //throw e; 1198 } 1199 1200 // cleanup 1201 finally { 1202 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1203 } 1204 1205 return true; 1206 1207 } 1208 1209 // dispatch(boolean):boolean 1210 1211 } // class DTDDriver 1212 1213 /** 1214 * Driver to handle content scanning. 1215 * 1216 * @author Andy Clark, IBM 1217 * @author Eric Ye, IBM 1218 */ 1219 protected class ContentDriver 1220 extends FragmentContentDriver { 1221 1222 // 1223 // Protected methods 1224 // 1225 1226 // hooks 1227 1228 // NOTE: These hook methods are added so that the full document 1229 // scanner can share the majority of code with this class. 1230 1231 /** 1232 * Scan for DOCTYPE hook. This method is a hook for subclasses 1233 * to add code to handle scanning for a the "DOCTYPE" string 1234 * after the string "<!" has been scanned. 1235 * 1236 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 1237 * was not scanned. 1238 */ 1239 protected boolean scanForDoctypeHook() 1240 throws IOException, XNIException { 1241 1242 if (fEntityScanner.skipString(DOCTYPE)) { 1243 setScannerState(SCANNER_STATE_DOCTYPE); 1244 // fEntityScanner.markStartOfDTD(); 1245 return true; 1246 } 1247 return false; 1248 1249 } // scanForDoctypeHook():boolean 1250 1251 /** 1252 * Element depth iz zero. This methos is a hook for subclasses 1253 * to add code to handle when the element depth hits zero. When 1254 * scanning a document fragment, an element depth of zero is 1255 * normal. However, when scanning a full XML document, the 1256 * scanner must handle the trailing miscellanous section of 1257 * the document after the end of the document's root element. 1258 * 1259 * @return True if the caller should stop and return true which 1260 * allows the scanner to switch to a new scanning 1261 * driver. A return value of false indicates that 1262 * the content driver should continue as normal. 1263 */ 1264 protected boolean elementDepthIsZeroHook() 1265 throws IOException, XNIException { 1266 1267 setScannerState(SCANNER_STATE_TRAILING_MISC); 1268 setDriver(fTrailingMiscDriver); 1269 return true; 1270 1271 } // elementDepthIsZeroHook():boolean 1272 1273 /** 1274 * Scan for root element hook. This method is a hook for 1275 * subclasses to add code that handles scanning for the root 1276 * element. When scanning a document fragment, there is no 1277 * "root" element. However, when scanning a full XML document, 1278 * the scanner must handle the root element specially. 1279 * 1280 * @return True if the caller should stop and return true which 1281 * allows the scanner to switch to a new scanning 1282 * driver. A return value of false indicates that 1283 * the content driver should continue as normal. 1284 */ 1285 protected boolean scanRootElementHook() 1286 throws IOException, XNIException { 1287 1288 if (scanStartElement()) { 1289 setScannerState(SCANNER_STATE_TRAILING_MISC); 1290 setDriver(fTrailingMiscDriver); 1291 return true; 1292 } 1293 return false; 1294 1295 } // scanRootElementHook():boolean 1296 1297 /** 1298 * End of file hook. This method is a hook for subclasses to 1299 * add code that handles the end of file. The end of file in 1300 * a document fragment is OK if the markup depth is zero. 1301 * However, when scanning a full XML document, an end of file 1302 * is always premature. 1303 */ 1304 protected void endOfFileHook(EOFException e) 1305 throws IOException, XNIException { 1306 1307 reportFatalError("PrematureEOF", null); 1308 // in case continue-after-fatal-error set, should not do this... 1309 //throw e; 1310 1311 } // endOfFileHook() 1312 1313 protected void resolveExternalSubsetAndRead() 1314 throws IOException, XNIException { 1315 1316 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 1317 fDTDDescription.setRootName(fElementQName.rawname); 1318 XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 1319 1320 if (src != null) { 1321 fDoctypeName = fElementQName.rawname; 1322 fDoctypePublicId = src.getPublicId(); 1323 fDoctypeSystemId = src.getSystemId(); 1324 // call document handler 1325 if (fDocumentHandler != null) { 1326 // This inserts a doctypeDecl event into the stream though no 1327 // DOCTYPE existed in the instance document. 1328 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 1329 } 1330 try { 1331 fDTDScanner.setInputSource(src); 1332 while (fDTDScanner.scanDTDExternalSubset(true)); 1333 } finally { 1334 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1335 } 1336 } 1337 } // resolveExternalSubsetAndRead() 1338 1339 1340 1341 } // class ContentDriver 1342 1343 /** 1344 * Driver to handle trailing miscellaneous section scanning. 1345 * 1346 * @author Andy Clark, IBM 1347 * @author Eric Ye, IBM 1348 */ 1349 protected final class TrailingMiscDriver 1350 implements Driver { 1351 1352 // 1353 // Driver methods 1354 // 1355 public int next() throws IOException, XNIException{ 1356 //this could for cases like <foo/> 1357 //look at scanRootElementHook 1358 if(fEmptyElement){ 1359 fEmptyElement = false; 1360 return XMLEvent.END_ELEMENT; 1361 } 1362 1363 try { 1364 if(fScannerState == SCANNER_STATE_TERMINATED){ 1365 return XMLEvent.END_DOCUMENT ;} 1366 do { 1367 switch (fScannerState) { 1368 case SCANNER_STATE_TRAILING_MISC: { 1369 1370 fEntityScanner.skipSpaces(); 1371 //we should have reached the end of the document in 1372 //most cases. 1373 if(fScannerState == SCANNER_STATE_TERMINATED ){ 1374 return XMLEvent.END_DOCUMENT ; 1375 } 1376 if (fEntityScanner.skipChar('<', null)) { 1377 setScannerState(SCANNER_STATE_START_OF_MARKUP); 1378 } else { 1379 setScannerState(SCANNER_STATE_CONTENT); 1380 } 1381 break; 1382 } 1383 case SCANNER_STATE_START_OF_MARKUP: { 1384 fMarkupDepth++; 1385 if (fEntityScanner.skipChar('?', null)) { 1386 setScannerState(SCANNER_STATE_PI); 1387 } else if (fEntityScanner.skipChar('!', null)) { 1388 setScannerState(SCANNER_STATE_COMMENT); 1389 } else if (fEntityScanner.skipChar('/', null)) { 1390 reportFatalError("MarkupNotRecognizedInMisc", 1391 null); 1392 } else if (isValidNameStartChar(fEntityScanner.peekChar()) || 1393 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 1394 reportFatalError("MarkupNotRecognizedInMisc", 1395 null); 1396 scanStartElement(); 1397 setScannerState(SCANNER_STATE_CONTENT); 1398 } else { 1399 reportFatalError("MarkupNotRecognizedInMisc", 1400 null); 1401 } 1402 break; 1403 } 1404 } 1405 }while(fScannerState == SCANNER_STATE_START_OF_MARKUP || fScannerState == SCANNER_STATE_TRAILING_MISC); 1406 if(DEBUG_NEXT){ 1407 System.out.println("State set by deciding while loop [TrailingMiscellaneous] is = " + getScannerStateName(fScannerState)); 1408 } 1409 switch (fScannerState){ 1410 case SCANNER_STATE_PI: { 1411 fContentBuffer.clear(); 1412 scanPI(fContentBuffer); 1413 setScannerState(SCANNER_STATE_TRAILING_MISC); 1414 return XMLEvent.PROCESSING_INSTRUCTION ; 1415 } 1416 case SCANNER_STATE_COMMENT: { 1417 if (!fEntityScanner.skipString(COMMENTSTRING)) { 1418 reportFatalError("InvalidCommentStart", null); 1419 } 1420 scanComment(); 1421 setScannerState(SCANNER_STATE_TRAILING_MISC); 1422 return XMLEvent.COMMENT; 1423 } 1424 case SCANNER_STATE_CONTENT: { 1425 int ch = fEntityScanner.peekChar(); 1426 if (ch == -1) { 1427 setScannerState(SCANNER_STATE_TERMINATED); 1428 return XMLEvent.END_DOCUMENT ; 1429 } else{ 1430 reportFatalError("ContentIllegalInTrailingMisc", 1431 null); 1432 fEntityScanner.scanChar(null); 1433 setScannerState(SCANNER_STATE_TRAILING_MISC); 1434 return XMLEvent.CHARACTERS; 1435 } 1436 1437 } 1438 case SCANNER_STATE_REFERENCE: { 1439 reportFatalError("ReferenceIllegalInTrailingMisc", 1440 null); 1441 setScannerState(SCANNER_STATE_TRAILING_MISC); 1442 return XMLEvent.ENTITY_REFERENCE ; 1443 } 1444 case SCANNER_STATE_TERMINATED: { 1445 //there can't be any element after SCANNER_STATE_TERMINATED or when the parser 1446 //has reached the end of document 1447 setScannerState(SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION); 1448 //xxx what to do when the scanner has reached the terminating state. 1449 return XMLEvent.END_DOCUMENT ; 1450 } 1451 case SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION:{ 1452 throw new java.util.NoSuchElementException("No more events to be parsed"); 1453 } 1454 default: throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 1455 }//switch 1456 1457 } catch (EOFException e) { 1458 // NOTE: This is the only place we're allowed to reach 1459 // the real end of the document stream. Unless the 1460 // end of file was reached prematurely. 1461 if (fMarkupDepth != 0) { 1462 reportFatalError("PrematureEOF", null); 1463 return -1; 1464 //throw e; 1465 } 1466 //System.out.println("EOFException thrown") ; 1467 setScannerState(SCANNER_STATE_TERMINATED); 1468 } 1469 1470 return XMLEvent.END_DOCUMENT; 1471 1472 }//next 1473 1474 } // class TrailingMiscDriver 1475 1476 /** 1477 * Implements XMLBufferListener interface. 1478 */ 1479 1480 1481 /** 1482 * receives callbacks from {@link XMLEntityReader } when buffer 1483 * is being changed. 1484 * @param refreshPosition 1485 */ 1486 public void refresh(int refreshPosition){ 1487 super.refresh(refreshPosition); 1488 if(fReadingDTD){ 1489 Entity entity = fEntityScanner.getCurrentEntity(); 1490 if(entity instanceof Entity.ScannedEntity){ 1491 fEndPos=((Entity.ScannedEntity)entity).position; 1492 } 1493 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 1494 fStartPos = refreshPosition; 1495 } 1496 } 1497 1498 } // class XMLDocumentScannerImpl