1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 24 import com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDDescription; 25 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 26 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 27 import com.sun.org.apache.xerces.internal.util.XMLChar; 28 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 29 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 30 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 31 import com.sun.org.apache.xerces.internal.xni.Augmentations; 32 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 33 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 34 import com.sun.org.apache.xerces.internal.xni.XMLString; 35 import com.sun.org.apache.xerces.internal.xni.XNIException; 36 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 37 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 38 import com.sun.org.apache.xerces.internal.xni.parser.XMLDTDScanner; 39 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 40 import com.sun.xml.internal.stream.Entity; 41 import com.sun.xml.internal.stream.StaxXMLInputSource; 42 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 43 import java.io.EOFException; 44 import java.io.IOException; 45 import javax.xml.stream.XMLInputFactory; 46 import javax.xml.stream.events.XMLEvent; 47 48 49 /** 50 * This class is responsible for scanning XML document structure 51 * and content. 52 * 53 * This class has been modified as per the new design which is more suited to 54 * efficiently build pull parser. Lot of improvements have been done and 55 * the code has been added to support stax functionality/features. 56 * 57 * @author Neeraj Bajaj, Sun Microsystems 58 * @author K.Venugopal, Sun Microsystems 59 * @author Glenn Marcy, IBM 60 * @author Andy Clark, IBM 61 * @author Arnaud Le Hors, IBM 62 * @author Eric Ye, IBM 63 * @author Sunitha Reddy, Sun Microsystems 64 * 65 * Refer to the table in unit-test javax.xml.stream.XMLStreamReaderTest.SupportDTD for changes 66 * related to property SupportDTD. 67 * @author Joe Wang, Sun Microsystems 68 */ 69 public class XMLDocumentScannerImpl 70 extends XMLDocumentFragmentScannerImpl{ 71 72 // 73 // Constants 74 // 75 76 // scanner states 77 78 /** Scanner state: XML declaration. */ 79 protected static final int SCANNER_STATE_XML_DECL = 42; 80 81 /** Scanner state: prolog. */ 82 protected static final int SCANNER_STATE_PROLOG = 43; 83 84 /** Scanner state: trailing misc. */ 85 protected static final int SCANNER_STATE_TRAILING_MISC = 44; 86 87 /** Scanner state: DTD internal declarations. */ 88 protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 45; 89 90 /** Scanner state: open DTD external subset. */ 91 protected static final int SCANNER_STATE_DTD_EXTERNAL = 46; 92 93 /** Scanner state: DTD external declarations. */ 94 protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 47; 95 96 /** Scanner state: NO MORE ELEMENTS. */ 97 protected static final int SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION = 48; 98 99 // feature identifiers 100 101 /** Property identifier document scanner: */ 102 protected static final String DOCUMENT_SCANNER = 103 Constants.XERCES_PROPERTY_PREFIX + Constants.DOCUMENT_SCANNER_PROPERTY; 104 105 /** Feature identifier: load external DTD. */ 106 protected static final String LOAD_EXTERNAL_DTD = 107 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 108 109 /** Feature identifier: load external DTD. */ 110 protected static final String DISALLOW_DOCTYPE_DECL_FEATURE = 111 Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE; 112 113 // property identifiers 114 115 /** Property identifier: DTD scanner. */ 116 protected static final String DTD_SCANNER = 117 Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY; 118 119 // property identifier: ValidationManager 120 protected static final String VALIDATION_MANAGER = 121 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 122 123 /** property identifier: NamespaceContext */ 124 protected static final String NAMESPACE_CONTEXT = 125 Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY; 126 127 // recognized features and properties 128 129 /** Recognized features. */ 130 private static final String[] RECOGNIZED_FEATURES = { 131 LOAD_EXTERNAL_DTD, 132 DISALLOW_DOCTYPE_DECL_FEATURE, 133 }; 134 135 /** Feature defaults. */ 136 private static final Boolean[] FEATURE_DEFAULTS = { 137 Boolean.TRUE, 138 Boolean.FALSE, 139 }; 140 141 /** Recognized properties. */ 142 private static final String[] RECOGNIZED_PROPERTIES = { 143 DTD_SCANNER, 144 VALIDATION_MANAGER 145 }; 146 147 /** Property defaults. */ 148 private static final Object[] PROPERTY_DEFAULTS = { 149 null, 150 null 151 }; 152 153 // 154 // Data((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 155 // 156 157 // properties 158 159 /** DTD scanner. */ 160 protected XMLDTDScanner fDTDScanner = null; 161 162 /** Validation manager . */ 163 //xxx: fValidationManager code needs to be added yet! 164 protected ValidationManager fValidationManager; 165 166 protected XMLStringBuffer fDTDDecl = null; 167 protected boolean fReadingDTD = false; 168 protected boolean fAddedListener = false; 169 170 // protected data 171 172 // other info 173 174 /** Doctype name. */ 175 protected String fDoctypeName; 176 177 /** Doctype declaration public identifier. */ 178 protected String fDoctypePublicId; 179 180 /** Doctype declaration system identifier. */ 181 protected String fDoctypeSystemId; 182 183 /** Namespace support. */ 184 protected NamespaceContext fNamespaceContext = new NamespaceSupport(); 185 186 // features 187 188 /** Load external DTD. */ 189 protected boolean fLoadExternalDTD = true; 190 191 // state 192 193 /** Seen doctype declaration. */ 194 protected boolean fSeenDoctypeDecl; 195 196 protected boolean fScanEndElement; 197 198 //protected int fScannerLastState ; 199 200 // drivers 201 202 /** XML declaration driver. */ 203 protected Driver fXMLDeclDriver = new XMLDeclDriver(); 204 205 /** Prolog driver. */ 206 protected Driver fPrologDriver = new PrologDriver(); 207 208 /** DTD driver. */ 209 protected Driver fDTDDriver = null ; 210 211 /** Trailing miscellaneous section driver. */ 212 protected Driver fTrailingMiscDriver = new TrailingMiscDriver(); 213 protected int fStartPos = 0; 214 protected int fEndPos = 0; 215 protected boolean fSeenInternalSubset= false; 216 // temporary variables 217 218 /** Array of 3 strings. */ 219 private String[] fStrings = new String[3]; 220 221 /** External subset source. */ 222 private XMLInputSource fExternalSubsetSource = null; 223 224 /** A DTD Description. */ 225 private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null); 226 227 /** String. */ 228 private XMLString fString = new XMLString(); 229 230 private static final char [] DOCTYPE = {'D','O','C','T','Y','P','E'}; 231 private static final char [] COMMENTSTRING = {'-','-'}; 232 233 // 234 // Constructors 235 // 236 237 /** Default constructor. */ 238 public XMLDocumentScannerImpl() {} // <init>() 239 240 241 // 242 // XMLDocumentScanner methods 243 // 244 245 246 /** 247 * Sets the input source. 248 * 249 * @param inputSource The input source. 250 * 251 * @throws IOException Thrown on i/o error. 252 */ 253 public void setInputSource(XMLInputSource inputSource) throws IOException { 254 fEntityManager.setEntityHandler(this); 255 //this starts a new entity and sets the current entity to the document entity. 256 fEntityManager.startDocumentEntity(inputSource); 257 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 258 setScannerState(XMLEvent.START_DOCUMENT); 259 } // setInputSource(XMLInputSource) 260 261 262 263 /**return the state of the scanner */ 264 public int getScannetState(){ 265 return fScannerState ; 266 } 267 268 269 270 271 public void reset(PropertyManager propertyManager) { 272 super.reset(propertyManager); 273 // other settings 274 fDoctypeName = null; 275 fDoctypePublicId = null; 276 fDoctypeSystemId = null; 277 fSeenDoctypeDecl = false; 278 fNamespaceContext.reset(); 279 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 280 281 // xerces features 282 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 283 setScannerState(XMLEvent.START_DOCUMENT); 284 setDriver(fXMLDeclDriver); 285 fSeenInternalSubset = false; 286 if(fDTDScanner != null){ 287 ((XMLDTDScannerImpl)fDTDScanner).reset(propertyManager); 288 } 289 fEndPos = 0; 290 fStartPos = 0; 291 if(fDTDDecl != null){ 292 fDTDDecl.clear(); 293 } 294 295 } 296 297 /** 298 * Resets the component. The component can query the component manager 299 * about any features and properties that affect the operation of the 300 * component. 301 * 302 * @param componentManager The component manager. 303 * 304 * @throws SAXException Thrown by component on initialization error. 305 * For example, if a feature or property is 306 * required for the operation of the component, the 307 * component manager may throw a 308 * SAXNotRecognizedException or a 309 * SAXNotSupportedException. 310 */ 311 public void reset(XMLComponentManager componentManager) 312 throws XMLConfigurationException { 313 314 super.reset(componentManager); 315 316 // other settings 317 fDoctypeName = null; 318 fDoctypePublicId = null; 319 fDoctypeSystemId = null; 320 fSeenDoctypeDecl = false; 321 fExternalSubsetSource = null; 322 323 // xerces features 324 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 325 fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE, false); 326 327 fNamespaces = componentManager.getFeature(NAMESPACES, true); 328 329 fSeenInternalSubset = false; 330 // xerces properties 331 fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER); 332 333 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 334 335 try { 336 fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT); 337 } 338 catch (XMLConfigurationException e) { } 339 if (fNamespaceContext == null) { 340 fNamespaceContext = new NamespaceSupport(); 341 } 342 fNamespaceContext.reset(); 343 344 fEndPos = 0; 345 fStartPos = 0; 346 if(fDTDDecl != null) 347 fDTDDecl.clear(); 348 349 350 //fEntityScanner.registerListener((XMLBufferListener)componentManager.getProperty(DOCUMENT_SCANNER)); 351 352 // setup driver 353 setScannerState(SCANNER_STATE_XML_DECL); 354 setDriver(fXMLDeclDriver); 355 356 } // reset(XMLComponentManager) 357 358 359 /** 360 * Returns a list of feature identifiers that are recognized by 361 * this component. This method may return null if no features 362 * are recognized by this component. 363 */ 364 public String[] getRecognizedFeatures() { 365 String[] featureIds = super.getRecognizedFeatures(); 366 int length = featureIds != null ? featureIds.length : 0; 367 String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length]; 368 if (featureIds != null) { 369 System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length); 370 } 371 System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length); 372 return combinedFeatureIds; 373 } // getRecognizedFeatures():String[] 374 375 /** 376 * Sets the state of a feature. This method is called by the component 377 * manager any time after reset when a feature changes state. 378 * <p> 379 * <strong>Note:</strong> Components should silently ignore features 380 * that do not affect the operation of the component. 381 * 382 * @param featureId The feature identifier. 383 * @param state The state of the feature. 384 * 385 * @throws SAXNotRecognizedException The component should not throw 386 * this exception. 387 * @throws SAXNotSupportedException The component should not throw 388 * this exception. 389 */ 390 public void setFeature(String featureId, boolean state) 391 throws XMLConfigurationException { 392 393 super.setFeature(featureId, state); 394 395 // Xerces properties 396 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 397 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 398 399 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 400 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 401 fLoadExternalDTD = state; 402 return; 403 } 404 else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() && 405 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) { 406 fDisallowDoctype = state; 407 return; 408 } 409 } 410 411 } // setFeature(String,boolean) 412 413 /** 414 * Returns a list of property identifiers that are recognized by 415 * this component. This method may return null if no properties 416 * are recognized by this component. 417 */ 418 public String[] getRecognizedProperties() { 419 String[] propertyIds = super.getRecognizedProperties(); 420 int length = propertyIds != null ? propertyIds.length : 0; 421 String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length]; 422 if (propertyIds != null) { 423 System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length); 424 } 425 System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length); 426 return combinedPropertyIds; 427 } // getRecognizedProperties():String[] 428 429 /** 430 * Sets the value of a property. This method is called by the component 431 * manager any time after reset when a property changes value. 432 * <p> 433 * <strong>Note:</strong> Components should silently ignore properties 434 * that do not affect the operation of the component. 435 * 436 * @param propertyId The property identifier. 437 * @param value The value of the property. 438 * 439 * @throws SAXNotRecognizedException The component should not throw 440 * this exception. 441 * @throws SAXNotSupportedException The component should not throw 442 * this exception. 443 */ 444 public void setProperty(String propertyId, Object value) 445 throws XMLConfigurationException { 446 447 super.setProperty(propertyId, value); 448 449 // Xerces properties 450 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 451 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 452 453 if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() && 454 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) { 455 fDTDScanner = (XMLDTDScanner)value; 456 } 457 if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() && 458 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) { 459 if (value != null) { 460 fNamespaceContext = (NamespaceContext)value; 461 } 462 } 463 464 return; 465 } 466 467 } // setProperty(String,Object) 468 469 /** 470 * Returns the default state for a feature, or null if this 471 * component does not want to report a default value for this 472 * feature. 473 * 474 * @param featureId The feature identifier. 475 * 476 * @since Xerces 2.2.0 477 */ 478 public Boolean getFeatureDefault(String featureId) { 479 480 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 481 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 482 return FEATURE_DEFAULTS[i]; 483 } 484 } 485 return super.getFeatureDefault(featureId); 486 } // getFeatureDefault(String):Boolean 487 488 /** 489 * Returns the default state for a property, or null if this 490 * component does not want to report a default value for this 491 * property. 492 * 493 * @param propertyId The property identifier. 494 * 495 * @since Xerces 2.2.0 496 */ 497 public Object getPropertyDefault(String propertyId) { 498 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 499 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 500 return PROPERTY_DEFAULTS[i]; 501 } 502 } 503 return super.getPropertyDefault(propertyId); 504 } // getPropertyDefault(String):Object 505 506 // 507 // XMLEntityHandler methods 508 // 509 510 /** 511 * This method notifies of the start of an entity. The DTD has the 512 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 513 * general entities are just specified by their name. 514 * 515 * @param name The name of the entity. 516 * @param identifier The resource identifier. 517 * @param encoding The auto-detected IANA encoding name of the entity 518 * stream. This value will be null in those situations 519 * where the entity encoding is not auto-detected (e.g. 520 * internal entities or a document entity that is 521 * parsed from a java.io.Reader). 522 * 523 * @throws XNIException Thrown by handler to signal an error. 524 */ 525 public void startEntity(String name, 526 XMLResourceIdentifier identifier, 527 String encoding, Augmentations augs) throws XNIException { 528 529 super.startEntity(name, identifier, encoding,augs); 530 531 //register current document scanner as a listener for XMLEntityScanner 532 fEntityScanner.registerListener(this); 533 534 // prepare to look for a TextDecl if external general entity 535 if (!name.equals("[xml]") && fEntityScanner.isExternal()) { 536 // Don't do this if we're skipping the entity! 537 if (augs == null || !((Boolean) augs.getItem(Constants.ENTITY_SKIPPED)).booleanValue()) { 538 setScannerState(SCANNER_STATE_TEXT_DECL); 539 } 540 } 541 542 // call handler 543 /** comment this part.. LOCATOR problem.. */ 544 if (fDocumentHandler != null && name.equals("[xml]")) { 545 fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null); 546 } 547 548 } // startEntity(String,identifier,String) 549 550 551 /** 552 * This method notifies the end of an entity. The DTD has the pseudo-name 553 * of "[dtd]" parameter entity names start with '%'; and general entities 554 * are just specified by their name. 555 * 556 * @param name The name of the entity. 557 * 558 * @throws XNIException Thrown by handler to signal an error. 559 */ 560 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 561 562 super.endEntity(name, augs); 563 564 if(name.equals("[xml]")){ 565 //if fMarkupDepth has reached 0. 566 //and driver is fTrailingMiscDriver (which 567 //handles end of document in normal case) 568 //set the scanner state of SCANNER_STATE_TERMINATED 569 if(fMarkupDepth == 0 && fDriver == fTrailingMiscDriver){ 570 //set the scanner set to SCANNER_STATE_TERMINATED 571 setScannerState(SCANNER_STATE_TERMINATED) ; 572 } else{ 573 //else we have reached the end of document prematurely 574 //so throw EOFException. 575 throw new java.io.EOFException(); 576 } 577 578 //this is taken care in wrapper which generates XNI callbacks, There are no next events 579 580 //if (fDocumentHandler != null) { 581 //fDocumentHandler.endDocument(null); 582 //} 583 } 584 } // endEntity(String) 585 586 587 public XMLStringBuffer getDTDDecl(){ 588 Entity entity = fEntityScanner.getCurrentEntity(); 589 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 590 if(fSeenInternalSubset) 591 fDTDDecl.append("]>"); 592 return fDTDDecl; 593 } 594 595 public String getCharacterEncodingScheme(){ 596 return fDeclaredEncoding; 597 } 598 599 /** return the next state on the input 600 * 601 * @return int 602 */ 603 604 public int next() throws IOException, XNIException { 605 return fDriver.next(); 606 } 607 608 //getNamespaceContext 609 public NamespaceContext getNamespaceContext(){ 610 return fNamespaceContext ; 611 } 612 613 614 615 // 616 // Protected methods 617 // 618 619 // driver factory methods 620 621 /** Creates a content driver. */ 622 protected Driver createContentDriver() { 623 return new ContentDriver(); 624 } // createContentDriver():Driver 625 626 // scanning methods 627 628 /** Scans a doctype declaration. */ 629 protected boolean scanDoctypeDecl(boolean supportDTD) throws IOException, XNIException { 630 631 // spaces 632 if (!fEntityScanner.skipSpaces()) { 633 reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL", 634 null); 635 } 636 637 // root element name 638 fDoctypeName = fEntityScanner.scanName(); 639 if (fDoctypeName == null) { 640 reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null); 641 } 642 643 // external id 644 if (fEntityScanner.skipSpaces()) { 645 scanExternalID(fStrings, false); 646 fDoctypeSystemId = fStrings[0]; 647 fDoctypePublicId = fStrings[1]; 648 fEntityScanner.skipSpaces(); 649 } 650 651 fHasExternalDTD = fDoctypeSystemId != null; 652 653 // Attempt to locate an external subset with an external subset resolver. 654 if (supportDTD && !fHasExternalDTD && fExternalSubsetResolver != null) { 655 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 656 fDTDDescription.setRootName(fDoctypeName); 657 fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 658 fHasExternalDTD = fExternalSubsetSource != null; 659 } 660 661 // call handler 662 if (supportDTD && fDocumentHandler != null) { 663 // NOTE: I don't like calling the doctypeDecl callback until 664 // end of the *full* doctype line (including internal 665 // subset) is parsed correctly but SAX2 requires that 666 // it knows the root element name and public and system 667 // identifier for the startDTD call. -Ac 668 if (fExternalSubsetSource == null) { 669 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 670 } 671 else { 672 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null); 673 } 674 } 675 676 // is there an internal subset? 677 boolean internalSubset = true; 678 if (!fEntityScanner.skipChar('[')) { 679 internalSubset = false; 680 fEntityScanner.skipSpaces(); 681 if (!fEntityScanner.skipChar('>')) { 682 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 683 } 684 fMarkupDepth--; 685 } 686 return internalSubset; 687 688 } // scanDoctypeDecl():boolean 689 690 // 691 // Private methods 692 // 693 /** Set the scanner state after scanning DTD */ 694 protected void setEndDTDScanState() { 695 setScannerState(SCANNER_STATE_PROLOG); 696 setDriver(fPrologDriver); 697 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 698 fReadingDTD=false; 699 } 700 701 /** Returns the scanner state name. */ 702 protected String getScannerStateName(int state) { 703 704 switch (state) { 705 case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL"; 706 case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG"; 707 case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC"; 708 case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS"; 709 case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL"; 710 case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS"; 711 } 712 return super.getScannerStateName(state); 713 714 } // getScannerStateName(int):String 715 716 // 717 // Classes 718 // 719 720 /** 721 * Driver to handle XMLDecl scanning. 722 * 723 * This class has been modified as per the new design which is more suited to 724 * efficiently build pull parser. Lots of performance improvements have been done and 725 * the code has been added to support stax functionality/features. 726 * 727 * @author Neeraj Bajaj, Sun Microsystems. 728 * 729 * @author Andy Clark, IBM 730 */ 731 protected final class XMLDeclDriver 732 implements Driver { 733 734 // 735 // Driver methods 736 // 737 738 739 public int next() throws IOException, XNIException { 740 if(DEBUG_NEXT){ 741 System.out.println("NOW IN XMLDeclDriver"); 742 } 743 744 // next driver is prolog regardless of whether there 745 // is an XMLDecl in this document 746 setScannerState(SCANNER_STATE_PROLOG); 747 setDriver(fPrologDriver); 748 749 //System.out.println("fEntityScanner = " + fEntityScanner); 750 // scan XMLDecl 751 try { 752 if (fEntityScanner.skipString(xmlDecl)) { 753 fMarkupDepth++; 754 // NOTE: special case where document starts with a PI 755 // whose name starts with "xml" (e.g. "xmlfoo") 756 if (XMLChar.isName(fEntityScanner.peekChar())) { 757 fStringBuffer.clear(); 758 fStringBuffer.append("xml"); 759 while (XMLChar.isName(fEntityScanner.peekChar())) { 760 fStringBuffer.append((char)fEntityScanner.scanChar()); 761 } 762 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 763 //this function should fill the data.. and set the fEvent object to this event. 764 fContentBuffer.clear() ; 765 scanPIData(target, fContentBuffer); 766 //REVISIT:where else we can set this value to 'true' 767 fEntityManager.fCurrentEntity.mayReadChunks = true; 768 //return PI event since PI was encountered 769 return XMLEvent.PROCESSING_INSTRUCTION ; 770 } 771 // standard XML declaration 772 else { 773 scanXMLDeclOrTextDecl(false); 774 //REVISIT:where else we can set this value to 'true' 775 fEntityManager.fCurrentEntity.mayReadChunks = true; 776 return XMLEvent.START_DOCUMENT; 777 } 778 } else{ 779 //REVISIT:where else we can set this value to 'true' 780 fEntityManager.fCurrentEntity.mayReadChunks = true; 781 //In both case return the START_DOCUMENT. ony difference is that first block will 782 //cosume the XML declaration if any. 783 return XMLEvent.START_DOCUMENT; 784 } 785 786 787 //START_OF_THE_DOCUMENT 788 789 790 } 791 792 // premature end of file 793 catch (EOFException e) { 794 reportFatalError("PrematureEOF", null); 795 return -1; 796 //throw e; 797 } 798 799 } 800 } // class XMLDeclDriver 801 802 /** 803 * Driver to handle prolog scanning. 804 * 805 * @author Andy Clark, IBM 806 */ 807 protected final class PrologDriver 808 implements Driver { 809 810 /** 811 * Drives the parser to the next state/event on the input. Parser is guaranteed 812 * to stop at the next state/event. 813 * 814 * Internally XML document is divided into several states. Each state represents 815 * a sections of XML document. When this functions returns normally, it has read 816 * the section of XML document and returns the state corresponding to section of 817 * document which has been read. For optimizations, a particular driver 818 * can read ahead of the section of document (state returned) just read and 819 * can maintain a different internal state. 820 * 821 * @return state representing the section of document just read. 822 * 823 * @throws IOException Thrown on i/o error. 824 * @throws XNIException Thrown on parse error. 825 */ 826 827 public int next() throws IOException, XNIException { 828 //System.out.println("here in next"); 829 830 if(DEBUG_NEXT){ 831 System.out.println("NOW IN PrologDriver"); 832 } 833 try { 834 do { 835 switch (fScannerState) { 836 case SCANNER_STATE_PROLOG: { 837 fEntityScanner.skipSpaces(); 838 if (fEntityScanner.skipChar('<')) { 839 setScannerState(SCANNER_STATE_START_OF_MARKUP); 840 } else if (fEntityScanner.skipChar('&')) { 841 setScannerState(SCANNER_STATE_REFERENCE); 842 } else { 843 setScannerState(SCANNER_STATE_CONTENT); 844 } 845 break; 846 } 847 848 case SCANNER_STATE_START_OF_MARKUP: { 849 fMarkupDepth++; 850 if (isValidNameStartChar(fEntityScanner.peekChar()) || 851 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 852 setScannerState(SCANNER_STATE_ROOT_ELEMENT); 853 setDriver(fContentDriver); 854 //from now onwards this would be handled by fContentDriver,in the same next() call 855 return fContentDriver.next(); 856 } else if (fEntityScanner.skipChar('!')) { 857 if (fEntityScanner.skipChar('-')) { 858 if (!fEntityScanner.skipChar('-')) { 859 reportFatalError("InvalidCommentStart", 860 null); 861 } 862 setScannerState(SCANNER_STATE_COMMENT); 863 } else if (fEntityScanner.skipString(DOCTYPE)) { 864 setScannerState(SCANNER_STATE_DOCTYPE); 865 Entity entity = fEntityScanner.getCurrentEntity(); 866 if(entity instanceof Entity.ScannedEntity){ 867 fStartPos=((Entity.ScannedEntity)entity).position; 868 } 869 fReadingDTD=true; 870 if(fDTDDecl == null) 871 fDTDDecl = new XMLStringBuffer(); 872 fDTDDecl.append("<!DOCTYPE"); 873 874 } else { 875 reportFatalError("MarkupNotRecognizedInProlog", 876 null); 877 } 878 } else if (fEntityScanner.skipChar('?')) { 879 setScannerState(SCANNER_STATE_PI); 880 } else { 881 reportFatalError("MarkupNotRecognizedInProlog", 882 null); 883 } 884 break; 885 } 886 } 887 } while (fScannerState == SCANNER_STATE_PROLOG || fScannerState == SCANNER_STATE_START_OF_MARKUP ); 888 889 switch(fScannerState){ 890 /** 891 //this part is handled by FragmentContentHandler 892 case SCANNER_STATE_ROOT_ELEMENT: { 893 //we have read '<' and beginning of reading the start element tag 894 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 895 setDriver(fContentDriver); 896 //from now onwards this would be handled by fContentDriver,in the same next() call 897 return fContentDriver.next(); 898 } 899 */ 900 case SCANNER_STATE_COMMENT: { 901 //this function fills the data.. 902 scanComment(); 903 setScannerState(SCANNER_STATE_PROLOG); 904 return XMLEvent.COMMENT; 905 //setScannerState(SCANNER_STATE_PROLOG); 906 //break; 907 } 908 case SCANNER_STATE_PI: { 909 fContentBuffer.clear() ; 910 scanPI(fContentBuffer); 911 setScannerState(SCANNER_STATE_PROLOG); 912 return XMLEvent.PROCESSING_INSTRUCTION; 913 } 914 915 case SCANNER_STATE_DOCTYPE: { 916 if (fDisallowDoctype) { 917 reportFatalError("DoctypeNotAllowed", null); 918 } 919 920 if (fSeenDoctypeDecl) { 921 reportFatalError("AlreadySeenDoctype", null); 922 } 923 fSeenDoctypeDecl = true; 924 925 // scanDoctypeDecl() sends XNI doctypeDecl event that 926 // in SAX is converted to startDTD() event. 927 if (scanDoctypeDecl(fSupportDTD)) { 928 //allow parsing of entity decls to continue in order to stay well-formed 929 setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS); 930 fSeenInternalSubset = true; 931 if(fDTDDriver == null){ 932 fDTDDriver = new DTDDriver(); 933 } 934 setDriver(fContentDriver); 935 //always return DTD event, the event however, will not contain any entities 936 return fDTDDriver.next(); 937 } 938 939 if(fSeenDoctypeDecl){ 940 Entity entity = fEntityScanner.getCurrentEntity(); 941 if(entity instanceof Entity.ScannedEntity){ 942 fEndPos = ((Entity.ScannedEntity)entity).position; 943 } 944 fReadingDTD = false; 945 } 946 947 // handle external subset 948 if (fDoctypeSystemId != null) { 949 if (((fValidation || fLoadExternalDTD) 950 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 951 if (fSupportDTD) { 952 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 953 } else { 954 setScannerState(SCANNER_STATE_PROLOG); 955 } 956 957 setDriver(fContentDriver); 958 if(fDTDDriver == null) { 959 fDTDDriver = new DTDDriver(); 960 } 961 962 return fDTDDriver.next(); 963 } 964 } 965 else if (fExternalSubsetSource != null) { 966 if (((fValidation || fLoadExternalDTD) 967 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 968 // This handles the case of a DOCTYPE that had neither an internal subset or an external subset. 969 fDTDScanner.setInputSource(fExternalSubsetSource); 970 fExternalSubsetSource = null; 971 if (fSupportDTD) 972 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 973 else 974 setScannerState(SCANNER_STATE_PROLOG); 975 setDriver(fContentDriver); 976 if(fDTDDriver == null) 977 fDTDDriver = new DTDDriver(); 978 return fDTDDriver.next(); 979 } 980 } 981 982 // Send endDTD() call if: 983 // a) systemId is null or if an external subset resolver could not locate an external subset. 984 // b) "load-external-dtd" and validation are false 985 // c) DTD grammar is cached 986 987 // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD 988 // in SAX this results in 2 events: startDTD, endDTD 989 if (fDTDScanner != null) { 990 fDTDScanner.setInputSource(null); 991 } 992 setScannerState(SCANNER_STATE_PROLOG); 993 return XMLEvent.DTD; 994 } 995 996 case SCANNER_STATE_CONTENT: { 997 reportFatalError("ContentIllegalInProlog", null); 998 fEntityScanner.scanChar(); 999 } 1000 case SCANNER_STATE_REFERENCE: { 1001 reportFatalError("ReferenceIllegalInProlog", null); 1002 } 1003 1004 /** 1005 * if (complete) { 1006 * if (fEntityScanner.scanChar() != '<') { 1007 * reportFatalError("RootElementRequired", null); 1008 * } 1009 * setScannerState(SCANNER_STATE_ROOT_ELEMENT); 1010 * setDriver(fContentDriver); 1011 * } 1012 */ 1013 } 1014 } 1015 // premature end of file 1016 catch (EOFException e) { 1017 reportFatalError("PrematureEOF", null); 1018 //xxx what should be returned here.... ??? 1019 return -1 ; 1020 //throw e; 1021 } 1022 //xxx what should be returned here.... ??? 1023 return -1; 1024 1025 } 1026 1027 1028 } // class PrologDriver 1029 1030 /** 1031 * Driver to handle the internal and external DTD subsets. 1032 * 1033 * @author Andy Clark, IBM 1034 */ 1035 protected final class DTDDriver 1036 implements Driver { 1037 1038 // 1039 // Driver methods 1040 // 1041 1042 public int next() throws IOException, XNIException{ 1043 // throw new XNIException("DTD Parsing is currently not supported"); 1044 if(DEBUG_NEXT){ 1045 System.out.println("Now in DTD Driver"); 1046 } 1047 1048 dispatch(true); 1049 1050 if(DEBUG_NEXT){ 1051 System.out.println("After calling dispatch(true) -- At this point whole DTD is read."); 1052 } 1053 1054 //xxx: remove this hack and align this with reusing DTD components 1055 //currently this routine will only be executed from Stax 1056 if(fPropertyManager != null){ 1057 dtdGrammarUtil = new DTDGrammarUtil(((XMLDTDScannerImpl)fDTDScanner).getGrammar(),fSymbolTable, fNamespaceContext); 1058 } 1059 1060 return XMLEvent.DTD ; 1061 } 1062 1063 /** 1064 * Dispatch an XML "event". 1065 * 1066 * @param complete True if this driver is intended to scan 1067 * and dispatch as much as possible. 1068 * 1069 * @return True if there is more to dispatch either from this 1070 * or a another driver. 1071 * 1072 * @throws IOException Thrown on i/o error. 1073 * @throws XNIException Thrown on parse error. 1074 */ 1075 public boolean dispatch(boolean complete) 1076 throws IOException, XNIException { 1077 fEntityManager.setEntityHandler(null); 1078 try { 1079 boolean again; 1080 XMLResourceIdentifierImpl resourceIdentifier = new XMLResourceIdentifierImpl(); 1081 if( fDTDScanner == null){ 1082 1083 if (fEntityManager.getEntityScanner() instanceof XML11EntityScanner){ 1084 fDTDScanner = new XML11DTDScannerImpl(); 1085 } else 1086 1087 fDTDScanner = new XMLDTDScannerImpl(); 1088 1089 ((XMLDTDScannerImpl)fDTDScanner).reset(fPropertyManager); 1090 } 1091 1092 fDTDScanner.setLimitAnalyzer(fLimitAnalyzer); 1093 do { 1094 again = false; 1095 switch (fScannerState) { 1096 case SCANNER_STATE_DTD_INTERNAL_DECLS: { 1097 boolean moreToScan = false; 1098 if (!fDTDScanner.skipDTD(fSupportDTD)) { 1099 // REVISIT: Should there be a feature for 1100 // the "complete" parameter? 1101 boolean completeDTD = true; 1102 1103 moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD); 1104 } 1105 Entity entity = fEntityScanner.getCurrentEntity(); 1106 if(entity instanceof Entity.ScannedEntity){ 1107 fEndPos=((Entity.ScannedEntity)entity).position; 1108 } 1109 fReadingDTD=false; 1110 if (!moreToScan) { 1111 // end doctype declaration 1112 if (!fEntityScanner.skipChar(']')) { 1113 reportFatalError("EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET", 1114 null); 1115 } 1116 fEntityScanner.skipSpaces(); 1117 if (!fEntityScanner.skipChar('>')) { 1118 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 1119 } 1120 fMarkupDepth--; 1121 1122 if (!fSupportDTD) { 1123 //simply reset the entity store without having to mess around 1124 //with the DTD Scanner code 1125 fEntityStore = fEntityManager.getEntityStore(); 1126 fEntityStore.reset(); 1127 } else { 1128 // scan external subset next unless we are ignoring DTDs 1129 if (fDoctypeSystemId != null && (fValidation || fLoadExternalDTD)) { 1130 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 1131 break; 1132 } 1133 } 1134 1135 setEndDTDScanState(); 1136 return true; 1137 1138 } 1139 break; 1140 } 1141 case SCANNER_STATE_DTD_EXTERNAL: { 1142 /** 1143 fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1144 fDTDDescription.setRootName(fDoctypeName); 1145 XMLInputSource xmlInputSource = 1146 fEntityManager.resolveEntity(fDTDDescription); 1147 fDTDScanner.setInputSource(xmlInputSource); 1148 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1149 again = true; 1150 break; 1151 */ 1152 1153 resourceIdentifier.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1154 XMLInputSource xmlInputSource = null ; 1155 StaxXMLInputSource staxInputSource = fEntityManager.resolveEntityAsPerStax(resourceIdentifier); 1156 1157 // Check access permission. If the source is resolved by a resolver, the check is skipped. 1158 if (!staxInputSource.hasResolver()) { 1159 String accessError = checkAccess(fDoctypeSystemId, fAccessExternalDTD); 1160 if (accessError != null) { 1161 reportFatalError("AccessExternalDTD", new Object[]{ SecuritySupport.sanitizePath(fDoctypeSystemId), accessError }); 1162 } 1163 } 1164 xmlInputSource = staxInputSource.getXMLInputSource(); 1165 fDTDScanner.setInputSource(xmlInputSource); 1166 if (fEntityScanner.fCurrentEntity != null) { 1167 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1168 } else { 1169 setScannerState(SCANNER_STATE_PROLOG); 1170 } 1171 again = true; 1172 break; 1173 } 1174 case SCANNER_STATE_DTD_EXTERNAL_DECLS: { 1175 // REVISIT: Should there be a feature for 1176 // the "complete" parameter? 1177 boolean completeDTD = true; 1178 boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD); 1179 if (!moreToScan) { 1180 setEndDTDScanState(); 1181 return true; 1182 } 1183 break; 1184 } 1185 case SCANNER_STATE_PROLOG : { 1186 // skip entity decls 1187 setEndDTDScanState(); 1188 return true; 1189 } 1190 default: { 1191 throw new XNIException("DTDDriver#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')'); 1192 } 1193 } 1194 } while (complete || again); 1195 } 1196 1197 // premature end of file 1198 catch (EOFException e) { 1199 e.printStackTrace(); 1200 reportFatalError("PrematureEOF", null); 1201 return false; 1202 //throw e; 1203 } 1204 1205 // cleanup 1206 finally { 1207 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1208 } 1209 1210 return true; 1211 1212 } 1213 1214 // dispatch(boolean):boolean 1215 1216 } // class DTDDriver 1217 1218 /** 1219 * Driver to handle content scanning. 1220 * 1221 * @author Andy Clark, IBM 1222 * @author Eric Ye, IBM 1223 */ 1224 protected class ContentDriver 1225 extends FragmentContentDriver { 1226 1227 // 1228 // Protected methods 1229 // 1230 1231 // hooks 1232 1233 // NOTE: These hook methods are added so that the full document 1234 // scanner can share the majority of code with this class. 1235 1236 /** 1237 * Scan for DOCTYPE hook. This method is a hook for subclasses 1238 * to add code to handle scanning for a the "DOCTYPE" string 1239 * after the string "<!" has been scanned. 1240 * 1241 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 1242 * was not scanned. 1243 */ 1244 protected boolean scanForDoctypeHook() 1245 throws IOException, XNIException { 1246 1247 if (fEntityScanner.skipString(DOCTYPE)) { 1248 setScannerState(SCANNER_STATE_DOCTYPE); 1249 // fEntityScanner.markStartOfDTD(); 1250 return true; 1251 } 1252 return false; 1253 1254 } // scanForDoctypeHook():boolean 1255 1256 /** 1257 * Element depth iz zero. This methos is a hook for subclasses 1258 * to add code to handle when the element depth hits zero. When 1259 * scanning a document fragment, an element depth of zero is 1260 * normal. However, when scanning a full XML document, the 1261 * scanner must handle the trailing miscellanous section of 1262 * the document after the end of the document's root element. 1263 * 1264 * @return True if the caller should stop and return true which 1265 * allows the scanner to switch to a new scanning 1266 * driver. A return value of false indicates that 1267 * the content driver should continue as normal. 1268 */ 1269 protected boolean elementDepthIsZeroHook() 1270 throws IOException, XNIException { 1271 1272 setScannerState(SCANNER_STATE_TRAILING_MISC); 1273 setDriver(fTrailingMiscDriver); 1274 return true; 1275 1276 } // elementDepthIsZeroHook():boolean 1277 1278 /** 1279 * Scan for root element hook. This method is a hook for 1280 * subclasses to add code that handles scanning for the root 1281 * element. When scanning a document fragment, there is no 1282 * "root" element. However, when scanning a full XML document, 1283 * the scanner must handle the root element specially. 1284 * 1285 * @return True if the caller should stop and return true which 1286 * allows the scanner to switch to a new scanning 1287 * driver. A return value of false indicates that 1288 * the content driver should continue as normal. 1289 */ 1290 protected boolean scanRootElementHook() 1291 throws IOException, XNIException { 1292 1293 if (scanStartElement()) { 1294 setScannerState(SCANNER_STATE_TRAILING_MISC); 1295 setDriver(fTrailingMiscDriver); 1296 return true; 1297 } 1298 return false; 1299 1300 } // scanRootElementHook():boolean 1301 1302 /** 1303 * End of file hook. This method is a hook for subclasses to 1304 * add code that handles the end of file. The end of file in 1305 * a document fragment is OK if the markup depth is zero. 1306 * However, when scanning a full XML document, an end of file 1307 * is always premature. 1308 */ 1309 protected void endOfFileHook(EOFException e) 1310 throws IOException, XNIException { 1311 1312 reportFatalError("PrematureEOF", null); 1313 // in case continue-after-fatal-error set, should not do this... 1314 //throw e; 1315 1316 } // endOfFileHook() 1317 1318 protected void resolveExternalSubsetAndRead() 1319 throws IOException, XNIException { 1320 1321 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 1322 fDTDDescription.setRootName(fElementQName.rawname); 1323 XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 1324 1325 if (src != null) { 1326 fDoctypeName = fElementQName.rawname; 1327 fDoctypePublicId = src.getPublicId(); 1328 fDoctypeSystemId = src.getSystemId(); 1329 // call document handler 1330 if (fDocumentHandler != null) { 1331 // This inserts a doctypeDecl event into the stream though no 1332 // DOCTYPE existed in the instance document. 1333 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 1334 } 1335 try { 1336 fDTDScanner.setInputSource(src); 1337 while (fDTDScanner.scanDTDExternalSubset(true)); 1338 } finally { 1339 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1340 } 1341 } 1342 } // resolveExternalSubsetAndRead() 1343 1344 1345 1346 } // class ContentDriver 1347 1348 /** 1349 * Driver to handle trailing miscellaneous section scanning. 1350 * 1351 * @author Andy Clark, IBM 1352 * @author Eric Ye, IBM 1353 */ 1354 protected final class TrailingMiscDriver 1355 implements Driver { 1356 1357 // 1358 // Driver methods 1359 // 1360 public int next() throws IOException, XNIException{ 1361 //this could for cases like <foo/> 1362 //look at scanRootElementHook 1363 if(fEmptyElement){ 1364 fEmptyElement = false; 1365 return XMLEvent.END_ELEMENT; 1366 } 1367 1368 try { 1369 if(fScannerState == SCANNER_STATE_TERMINATED){ 1370 return XMLEvent.END_DOCUMENT ;} 1371 do { 1372 switch (fScannerState) { 1373 case SCANNER_STATE_TRAILING_MISC: { 1374 1375 fEntityScanner.skipSpaces(); 1376 //we should have reached the end of the document in 1377 //most cases. 1378 if(fScannerState == SCANNER_STATE_TERMINATED ){ 1379 return XMLEvent.END_DOCUMENT ; 1380 } 1381 if (fEntityScanner.skipChar('<')) { 1382 setScannerState(SCANNER_STATE_START_OF_MARKUP); 1383 } else { 1384 setScannerState(SCANNER_STATE_CONTENT); 1385 } 1386 break; 1387 } 1388 case SCANNER_STATE_START_OF_MARKUP: { 1389 fMarkupDepth++; 1390 if (fEntityScanner.skipChar('?')) { 1391 setScannerState(SCANNER_STATE_PI); 1392 } else if (fEntityScanner.skipChar('!')) { 1393 setScannerState(SCANNER_STATE_COMMENT); 1394 } else if (fEntityScanner.skipChar('/')) { 1395 reportFatalError("MarkupNotRecognizedInMisc", 1396 null); 1397 } else if (isValidNameStartChar(fEntityScanner.peekChar()) || 1398 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 1399 reportFatalError("MarkupNotRecognizedInMisc", 1400 null); 1401 scanStartElement(); 1402 setScannerState(SCANNER_STATE_CONTENT); 1403 } else { 1404 reportFatalError("MarkupNotRecognizedInMisc", 1405 null); 1406 } 1407 break; 1408 } 1409 } 1410 }while(fScannerState == SCANNER_STATE_START_OF_MARKUP || fScannerState == SCANNER_STATE_TRAILING_MISC); 1411 if(DEBUG_NEXT){ 1412 System.out.println("State set by deciding while loop [TrailingMiscellaneous] is = " + getScannerStateName(fScannerState)); 1413 } 1414 switch (fScannerState){ 1415 case SCANNER_STATE_PI: { 1416 fContentBuffer.clear(); 1417 scanPI(fContentBuffer); 1418 setScannerState(SCANNER_STATE_TRAILING_MISC); 1419 return XMLEvent.PROCESSING_INSTRUCTION ; 1420 } 1421 case SCANNER_STATE_COMMENT: { 1422 if (!fEntityScanner.skipString(COMMENTSTRING)) { 1423 reportFatalError("InvalidCommentStart", null); 1424 } 1425 scanComment(); 1426 setScannerState(SCANNER_STATE_TRAILING_MISC); 1427 return XMLEvent.COMMENT; 1428 } 1429 case SCANNER_STATE_CONTENT: { 1430 int ch = fEntityScanner.peekChar(); 1431 if (ch == -1) { 1432 setScannerState(SCANNER_STATE_TERMINATED); 1433 return XMLEvent.END_DOCUMENT ; 1434 } else{ 1435 reportFatalError("ContentIllegalInTrailingMisc", 1436 null); 1437 fEntityScanner.scanChar(); 1438 setScannerState(SCANNER_STATE_TRAILING_MISC); 1439 return XMLEvent.CHARACTERS; 1440 } 1441 1442 } 1443 case SCANNER_STATE_REFERENCE: { 1444 reportFatalError("ReferenceIllegalInTrailingMisc", 1445 null); 1446 setScannerState(SCANNER_STATE_TRAILING_MISC); 1447 return XMLEvent.ENTITY_REFERENCE ; 1448 } 1449 case SCANNER_STATE_TERMINATED: { 1450 //there can't be any element after SCANNER_STATE_TERMINATED or when the parser 1451 //has reached the end of document 1452 setScannerState(SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION); 1453 //xxx what to do when the scanner has reached the terminating state. 1454 return XMLEvent.END_DOCUMENT ; 1455 } 1456 case SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION:{ 1457 throw new java.util.NoSuchElementException("No more events to be parsed"); 1458 } 1459 default: throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 1460 }//switch 1461 1462 } catch (EOFException e) { 1463 // NOTE: This is the only place we're allowed to reach 1464 // the real end of the document stream. Unless the 1465 // end of file was reached prematurely. 1466 if (fMarkupDepth != 0) { 1467 reportFatalError("PrematureEOF", null); 1468 return -1; 1469 //throw e; 1470 } 1471 //System.out.println("EOFException thrown") ; 1472 setScannerState(SCANNER_STATE_TERMINATED); 1473 } 1474 1475 return XMLEvent.END_DOCUMENT; 1476 1477 }//next 1478 1479 } // class TrailingMiscDriver 1480 1481 /** 1482 * Implements XMLBufferListener interface. 1483 */ 1484 1485 1486 /** 1487 * receives callbacks from {@link XMLEntityReader } when buffer 1488 * is being changed. 1489 * @param refreshPosition 1490 */ 1491 public void refresh(int refreshPosition){ 1492 super.refresh(refreshPosition); 1493 if(fReadingDTD){ 1494 Entity entity = fEntityScanner.getCurrentEntity(); 1495 if(entity instanceof Entity.ScannedEntity){ 1496 fEndPos=((Entity.ScannedEntity)entity).position; 1497 } 1498 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 1499 fStartPos = refreshPosition; 1500 } 1501 } 1502 1503 } // class XMLDocumentScannerImpl