1 /* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDDescription; 25 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 26 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 27 import com.sun.org.apache.xerces.internal.util.XMLChar; 28 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 29 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 30 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 31 import com.sun.org.apache.xerces.internal.xni.Augmentations; 32 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 33 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 34 import com.sun.org.apache.xerces.internal.xni.XNIException; 35 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 36 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 37 import com.sun.org.apache.xerces.internal.xni.parser.XMLDTDScanner; 38 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 39 import com.sun.xml.internal.stream.Entity; 40 import com.sun.xml.internal.stream.StaxXMLInputSource; 41 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 42 import java.io.EOFException; 43 import java.io.IOException; 44 import javax.xml.stream.XMLInputFactory; 45 import javax.xml.stream.events.XMLEvent; 46 47 48 /** 49 * This class is responsible for scanning XML document structure 50 * and content. 51 * 52 * This class has been modified as per the new design which is more suited to 53 * efficiently build pull parser. Lot of improvements have been done and 54 * the code has been added to support stax functionality/features. 55 * 56 * @author Neeraj Bajaj, Sun Microsystems 57 * @author K.Venugopal, Sun Microsystems 58 * @author Glenn Marcy, IBM 59 * @author Andy Clark, IBM 60 * @author Arnaud Le Hors, IBM 61 * @author Eric Ye, IBM 62 * @author Sunitha Reddy, Sun Microsystems 63 * 64 * Refer to the table in unit-test javax.xml.stream.XMLStreamReaderTest.SupportDTD for changes 65 * related to property SupportDTD. 66 * @author Joe Wang, Sun Microsystems 67 */ 68 public class XMLDocumentScannerImpl 69 extends XMLDocumentFragmentScannerImpl{ 70 71 // 72 // Constants 73 // 74 75 // scanner states 76 77 /** Scanner state: XML declaration. */ 78 protected static final int SCANNER_STATE_XML_DECL = 42; 79 80 /** Scanner state: prolog. */ 81 protected static final int SCANNER_STATE_PROLOG = 43; 82 83 /** Scanner state: trailing misc. */ 84 protected static final int SCANNER_STATE_TRAILING_MISC = 44; 85 86 /** Scanner state: DTD internal declarations. */ 87 protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 45; 88 89 /** Scanner state: open DTD external subset. */ 90 protected static final int SCANNER_STATE_DTD_EXTERNAL = 46; 91 92 /** Scanner state: DTD external declarations. */ 93 protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 47; 94 95 /** Scanner state: NO MORE ELEMENTS. */ 96 protected static final int SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION = 48; 97 98 // feature identifiers 99 100 /** Property identifier document scanner: */ 101 protected static final String DOCUMENT_SCANNER = 102 Constants.XERCES_PROPERTY_PREFIX + Constants.DOCUMENT_SCANNER_PROPERTY; 103 104 /** Feature identifier: load external DTD. */ 105 protected static final String LOAD_EXTERNAL_DTD = 106 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 107 108 /** Feature identifier: load external DTD. */ 109 protected static final String DISALLOW_DOCTYPE_DECL_FEATURE = 110 Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE; 111 112 // property identifiers 113 114 /** Property identifier: DTD scanner. */ 115 protected static final String DTD_SCANNER = 116 Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY; 117 118 // property identifier: ValidationManager 119 protected static final String VALIDATION_MANAGER = 120 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 121 122 /** property identifier: NamespaceContext */ 123 protected static final String NAMESPACE_CONTEXT = 124 Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY; 125 126 // recognized features and properties 127 128 /** Recognized features. */ 129 private static final String[] RECOGNIZED_FEATURES = { 130 LOAD_EXTERNAL_DTD, 131 DISALLOW_DOCTYPE_DECL_FEATURE, 132 }; 133 134 /** Feature defaults. */ 135 private static final Boolean[] FEATURE_DEFAULTS = { 136 Boolean.TRUE, 137 Boolean.FALSE, 138 }; 139 140 /** Recognized properties. */ 141 private static final String[] RECOGNIZED_PROPERTIES = { 142 DTD_SCANNER, 143 VALIDATION_MANAGER 144 }; 145 146 /** Property defaults. */ 147 private static final Object[] PROPERTY_DEFAULTS = { 148 null, 149 null 150 }; 151 152 // 153 // Data((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 154 // 155 156 // properties 157 158 /** DTD scanner. */ 159 protected XMLDTDScanner fDTDScanner = null; 160 161 /** Validation manager . */ 162 //xxx: fValidationManager code needs to be added yet! 163 protected ValidationManager fValidationManager; 164 165 protected XMLStringBuffer fDTDDecl = null; 166 protected boolean fReadingDTD = false; 167 protected boolean fAddedListener = false; 168 169 // protected data 170 171 // other info 172 173 /** Doctype name. */ 174 protected String fDoctypeName; 175 176 /** Doctype declaration public identifier. */ 177 protected String fDoctypePublicId; 178 179 /** Doctype declaration system identifier. */ 180 protected String fDoctypeSystemId; 181 182 /** Namespace support. */ 183 protected NamespaceContext fNamespaceContext = new NamespaceSupport(); 184 185 // features 186 187 /** Load external DTD. */ 188 protected boolean fLoadExternalDTD = true; 189 190 // state 191 192 /** Seen doctype declaration. */ 193 protected boolean fSeenDoctypeDecl; 194 195 protected boolean fScanEndElement; 196 197 //protected int fScannerLastState ; 198 199 // drivers 200 201 /** XML declaration driver. */ 202 protected Driver fXMLDeclDriver = new XMLDeclDriver(); 203 204 /** Prolog driver. */ 205 protected Driver fPrologDriver = new PrologDriver(); 206 207 /** DTD driver. */ 208 protected Driver fDTDDriver = null ; 209 210 /** Trailing miscellaneous section driver. */ 211 protected Driver fTrailingMiscDriver = new TrailingMiscDriver(); 212 protected int fStartPos = 0; 213 protected int fEndPos = 0; 214 protected boolean fSeenInternalSubset= false; 215 // temporary variables 216 217 /** Array of 3 strings. */ 218 private String[] fStrings = new String[3]; 219 220 /** External subset source. */ 221 private XMLInputSource fExternalSubsetSource = null; 222 223 /** A DTD Description. */ 224 private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null); 225 226 private static final char [] DOCTYPE = {'D','O','C','T','Y','P','E'}; 227 private static final char [] COMMENTSTRING = {'-','-'}; 228 229 // 230 // Constructors 231 // 232 233 /** Default constructor. */ 234 public XMLDocumentScannerImpl() {} // <init>() 235 236 237 // 238 // XMLDocumentScanner methods 239 // 240 241 242 /** 243 * Sets the input source. 244 * 245 * @param inputSource The input source. 246 * 247 * @throws IOException Thrown on i/o error. 248 */ 249 public void setInputSource(XMLInputSource inputSource) throws IOException { 250 fEntityManager.setEntityHandler(this); 251 //this starts a new entity and sets the current entity to the document entity. 252 fEntityManager.startDocumentEntity(inputSource); 253 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 254 setScannerState(XMLEvent.START_DOCUMENT); 255 } // setInputSource(XMLInputSource) 256 257 258 259 /**return the state of the scanner */ 260 public int getScannetState(){ 261 return fScannerState ; 262 } 263 264 265 266 267 public void reset(PropertyManager propertyManager) { 268 super.reset(propertyManager); 269 // other settings 270 fDoctypeName = null; 271 fDoctypePublicId = null; 272 fDoctypeSystemId = null; 273 fSeenDoctypeDecl = false; 274 fNamespaceContext.reset(); 275 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 276 277 // xerces features 278 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 279 setScannerState(XMLEvent.START_DOCUMENT); 280 setDriver(fXMLDeclDriver); 281 fSeenInternalSubset = false; 282 if(fDTDScanner != null){ 283 ((XMLDTDScannerImpl)fDTDScanner).reset(propertyManager); 284 } 285 fEndPos = 0; 286 fStartPos = 0; 287 if(fDTDDecl != null){ 288 fDTDDecl.clear(); 289 } 290 291 } 292 293 /** 294 * Resets the component. The component can query the component manager 295 * about any features and properties that affect the operation of the 296 * component. 297 * 298 * @param componentManager The component manager. 299 * 300 * @throws SAXException Thrown by component on initialization error. 301 * For example, if a feature or property is 302 * required for the operation of the component, the 303 * component manager may throw a 304 * SAXNotRecognizedException or a 305 * SAXNotSupportedException. 306 */ 307 public void reset(XMLComponentManager componentManager) 308 throws XMLConfigurationException { 309 310 super.reset(componentManager); 311 312 // other settings 313 fDoctypeName = null; 314 fDoctypePublicId = null; 315 fDoctypeSystemId = null; 316 fSeenDoctypeDecl = false; 317 fExternalSubsetSource = null; 318 319 // xerces features 320 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 321 fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE, false); 322 323 fNamespaces = componentManager.getFeature(NAMESPACES, true); 324 325 fSeenInternalSubset = false; 326 // xerces properties 327 fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER); 328 329 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 330 331 try { 332 fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT); 333 } 334 catch (XMLConfigurationException e) { } 335 if (fNamespaceContext == null) { 336 fNamespaceContext = new NamespaceSupport(); 337 } 338 fNamespaceContext.reset(); 339 340 fEndPos = 0; 341 fStartPos = 0; 342 if(fDTDDecl != null) 343 fDTDDecl.clear(); 344 345 346 //fEntityScanner.registerListener((XMLBufferListener)componentManager.getProperty(DOCUMENT_SCANNER)); 347 348 // setup driver 349 setScannerState(SCANNER_STATE_XML_DECL); 350 setDriver(fXMLDeclDriver); 351 352 } // reset(XMLComponentManager) 353 354 355 /** 356 * Returns a list of feature identifiers that are recognized by 357 * this component. This method may return null if no features 358 * are recognized by this component. 359 */ 360 public String[] getRecognizedFeatures() { 361 String[] featureIds = super.getRecognizedFeatures(); 362 int length = featureIds != null ? featureIds.length : 0; 363 String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length]; 364 if (featureIds != null) { 365 System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length); 366 } 367 System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length); 368 return combinedFeatureIds; 369 } // getRecognizedFeatures():String[] 370 371 /** 372 * Sets the state of a feature. This method is called by the component 373 * manager any time after reset when a feature changes state. 374 * <p> 375 * <strong>Note:</strong> Components should silently ignore features 376 * that do not affect the operation of the component. 377 * 378 * @param featureId The feature identifier. 379 * @param state The state of the feature. 380 * 381 * @throws SAXNotRecognizedException The component should not throw 382 * this exception. 383 * @throws SAXNotSupportedException The component should not throw 384 * this exception. 385 */ 386 public void setFeature(String featureId, boolean state) 387 throws XMLConfigurationException { 388 389 super.setFeature(featureId, state); 390 391 // Xerces properties 392 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 393 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 394 395 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 396 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 397 fLoadExternalDTD = state; 398 return; 399 } 400 else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() && 401 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) { 402 fDisallowDoctype = state; 403 return; 404 } 405 } 406 407 } // setFeature(String,boolean) 408 409 /** 410 * Returns a list of property identifiers that are recognized by 411 * this component. This method may return null if no properties 412 * are recognized by this component. 413 */ 414 public String[] getRecognizedProperties() { 415 String[] propertyIds = super.getRecognizedProperties(); 416 int length = propertyIds != null ? propertyIds.length : 0; 417 String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length]; 418 if (propertyIds != null) { 419 System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length); 420 } 421 System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length); 422 return combinedPropertyIds; 423 } // getRecognizedProperties():String[] 424 425 /** 426 * Sets the value of a property. This method is called by the component 427 * manager any time after reset when a property changes value. 428 * <p> 429 * <strong>Note:</strong> Components should silently ignore properties 430 * that do not affect the operation of the component. 431 * 432 * @param propertyId The property identifier. 433 * @param value The value of the property. 434 * 435 * @throws SAXNotRecognizedException The component should not throw 436 * this exception. 437 * @throws SAXNotSupportedException The component should not throw 438 * this exception. 439 */ 440 public void setProperty(String propertyId, Object value) 441 throws XMLConfigurationException { 442 443 super.setProperty(propertyId, value); 444 445 // Xerces properties 446 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 447 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 448 449 if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() && 450 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) { 451 fDTDScanner = (XMLDTDScanner)value; 452 } 453 if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() && 454 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) { 455 if (value != null) { 456 fNamespaceContext = (NamespaceContext)value; 457 } 458 } 459 460 return; 461 } 462 463 } // setProperty(String,Object) 464 465 /** 466 * Returns the default state for a feature, or null if this 467 * component does not want to report a default value for this 468 * feature. 469 * 470 * @param featureId The feature identifier. 471 * 472 * @since Xerces 2.2.0 473 */ 474 public Boolean getFeatureDefault(String featureId) { 475 476 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 477 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 478 return FEATURE_DEFAULTS[i]; 479 } 480 } 481 return super.getFeatureDefault(featureId); 482 } // getFeatureDefault(String):Boolean 483 484 /** 485 * Returns the default state for a property, or null if this 486 * component does not want to report a default value for this 487 * property. 488 * 489 * @param propertyId The property identifier. 490 * 491 * @since Xerces 2.2.0 492 */ 493 public Object getPropertyDefault(String propertyId) { 494 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 495 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 496 return PROPERTY_DEFAULTS[i]; 497 } 498 } 499 return super.getPropertyDefault(propertyId); 500 } // getPropertyDefault(String):Object 501 502 // 503 // XMLEntityHandler methods 504 // 505 506 /** 507 * This method notifies of the start of an entity. The DTD has the 508 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 509 * general entities are just specified by their name. 510 * 511 * @param name The name of the entity. 512 * @param identifier The resource identifier. 513 * @param encoding The auto-detected IANA encoding name of the entity 514 * stream. This value will be null in those situations 515 * where the entity encoding is not auto-detected (e.g. 516 * internal entities or a document entity that is 517 * parsed from a java.io.Reader). 518 * 519 * @throws XNIException Thrown by handler to signal an error. 520 */ 521 public void startEntity(String name, 522 XMLResourceIdentifier identifier, 523 String encoding, Augmentations augs) throws XNIException { 524 525 super.startEntity(name, identifier, encoding,augs); 526 527 //register current document scanner as a listener for XMLEntityScanner 528 fEntityScanner.registerListener(this); 529 530 // prepare to look for a TextDecl if external general entity 531 if (!name.equals("[xml]") && fEntityScanner.isExternal()) { 532 // Don't do this if we're skipping the entity! 533 if (augs == null || !((Boolean) augs.getItem(Constants.ENTITY_SKIPPED)).booleanValue()) { 534 setScannerState(SCANNER_STATE_TEXT_DECL); 535 } 536 } 537 538 // call handler 539 /** comment this part.. LOCATOR problem.. */ 540 if (fDocumentHandler != null && name.equals("[xml]")) { 541 fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null); 542 } 543 544 } // startEntity(String,identifier,String) 545 546 547 /** 548 * This method notifies the end of an entity. The DTD has the pseudo-name 549 * of "[dtd]" parameter entity names start with '%'; and general entities 550 * are just specified by their name. 551 * 552 * @param name The name of the entity. 553 * 554 * @throws XNIException Thrown by handler to signal an error. 555 */ 556 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 557 558 super.endEntity(name, augs); 559 560 if(name.equals("[xml]")){ 561 //if fMarkupDepth has reached 0. 562 //and driver is fTrailingMiscDriver (which 563 //handles end of document in normal case) 564 //set the scanner state of SCANNER_STATE_TERMINATED 565 if(fMarkupDepth == 0 && fDriver == fTrailingMiscDriver){ 566 //set the scanner set to SCANNER_STATE_TERMINATED 567 setScannerState(SCANNER_STATE_TERMINATED) ; 568 } else{ 569 //else we have reached the end of document prematurely 570 //so throw EOFException. 571 throw new java.io.EOFException(); 572 } 573 574 //this is taken care in wrapper which generates XNI callbacks, There are no next events 575 576 //if (fDocumentHandler != null) { 577 //fDocumentHandler.endDocument(null); 578 //} 579 } 580 } // endEntity(String) 581 582 583 public XMLStringBuffer getDTDDecl(){ 584 Entity entity = fEntityScanner.getCurrentEntity(); 585 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 586 if(fSeenInternalSubset) 587 fDTDDecl.append("]>"); 588 return fDTDDecl; 589 } 590 591 public String getCharacterEncodingScheme(){ 592 return fDeclaredEncoding; 593 } 594 595 /** return the next state on the input 596 * 597 * @return int 598 */ 599 600 public int next() throws IOException, XNIException { 601 return fDriver.next(); 602 } 603 604 //getNamespaceContext 605 public NamespaceContext getNamespaceContext(){ 606 return fNamespaceContext ; 607 } 608 609 610 611 // 612 // Protected methods 613 // 614 615 // driver factory methods 616 617 /** Creates a content driver. */ 618 protected Driver createContentDriver() { 619 return new ContentDriver(); 620 } // createContentDriver():Driver 621 622 // scanning methods 623 624 /** Scans a doctype declaration. */ 625 protected boolean scanDoctypeDecl(boolean supportDTD) throws IOException, XNIException { 626 627 // spaces 628 if (!fEntityScanner.skipSpaces()) { 629 reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL", 630 null); 631 } 632 633 // root element name 634 fDoctypeName = fEntityScanner.scanName(NameType.DOCTYPE); 635 if (fDoctypeName == null) { 636 reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null); 637 } 638 639 // external id 640 if (fEntityScanner.skipSpaces()) { 641 scanExternalID(fStrings, false); 642 fDoctypeSystemId = fStrings[0]; 643 fDoctypePublicId = fStrings[1]; 644 fEntityScanner.skipSpaces(); 645 } 646 647 fHasExternalDTD = fDoctypeSystemId != null; 648 649 // Attempt to locate an external subset with an external subset resolver. 650 if (supportDTD && !fHasExternalDTD && fExternalSubsetResolver != null) { 651 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 652 fDTDDescription.setRootName(fDoctypeName); 653 fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 654 fHasExternalDTD = fExternalSubsetSource != null; 655 } 656 657 // call handler 658 if (supportDTD && fDocumentHandler != null) { 659 // NOTE: I don't like calling the doctypeDecl callback until 660 // end of the *full* doctype line (including internal 661 // subset) is parsed correctly but SAX2 requires that 662 // it knows the root element name and public and system 663 // identifier for the startDTD call. -Ac 664 if (fExternalSubsetSource == null) { 665 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 666 } 667 else { 668 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null); 669 } 670 } 671 672 // is there an internal subset? 673 boolean internalSubset = true; 674 if (!fEntityScanner.skipChar('[', null)) { 675 internalSubset = false; 676 fEntityScanner.skipSpaces(); 677 if (!fEntityScanner.skipChar('>', null)) { 678 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 679 } 680 fMarkupDepth--; 681 } 682 return internalSubset; 683 684 } // scanDoctypeDecl():boolean 685 686 // 687 // Private methods 688 // 689 /** Set the scanner state after scanning DTD */ 690 protected void setEndDTDScanState() { 691 setScannerState(SCANNER_STATE_PROLOG); 692 setDriver(fPrologDriver); 693 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 694 fReadingDTD=false; 695 } 696 697 /** Returns the scanner state name. */ 698 protected String getScannerStateName(int state) { 699 700 switch (state) { 701 case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL"; 702 case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG"; 703 case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC"; 704 case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS"; 705 case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL"; 706 case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS"; 707 } 708 return super.getScannerStateName(state); 709 710 } // getScannerStateName(int):String 711 712 // 713 // Classes 714 // 715 716 /** 717 * Driver to handle XMLDecl scanning. 718 * 719 * This class has been modified as per the new design which is more suited to 720 * efficiently build pull parser. Lots of performance improvements have been done and 721 * the code has been added to support stax functionality/features. 722 * 723 * @author Neeraj Bajaj, Sun Microsystems. 724 * 725 * @author Andy Clark, IBM 726 */ 727 protected final class XMLDeclDriver 728 implements Driver { 729 730 // 731 // Driver methods 732 // 733 734 735 public int next() throws IOException, XNIException { 736 737 // next driver is prolog regardless of whether there 738 // is an XMLDecl in this document 739 setScannerState(SCANNER_STATE_PROLOG); 740 setDriver(fPrologDriver); 741 742 //System.out.println("fEntityScanner = " + fEntityScanner); 743 // scan XMLDecl 744 try { 745 if (fEntityScanner.skipString(XMLDECL)) { 746 if (fEntityScanner.peekChar() == ' ') { 747 fMarkupDepth++; 748 scanXMLDeclOrTextDecl(false); 749 } else { 750 // PI, reset position 751 fEntityManager.fCurrentEntity.position = 0; 752 } 753 } 754 755 //START_OF_THE_DOCUMENT 756 fEntityManager.fCurrentEntity.mayReadChunks = true; 757 return XMLEvent.START_DOCUMENT; 758 759 } 760 761 // premature end of file 762 catch (EOFException e) { 763 reportFatalError("PrematureEOF", null); 764 return -1; 765 //throw e; 766 } 767 768 } 769 } // class XMLDeclDriver 770 771 /** 772 * Driver to handle prolog scanning. 773 * 774 * @author Andy Clark, IBM 775 */ 776 protected final class PrologDriver 777 implements Driver { 778 779 /** 780 * Drives the parser to the next state/event on the input. Parser is guaranteed 781 * to stop at the next state/event. 782 * 783 * Internally XML document is divided into several states. Each state represents 784 * a sections of XML document. When this functions returns normally, it has read 785 * the section of XML document and returns the state corresponding to section of 786 * document which has been read. For optimizations, a particular driver 787 * can read ahead of the section of document (state returned) just read and 788 * can maintain a different internal state. 789 * 790 * @return state representing the section of document just read. 791 * 792 * @throws IOException Thrown on i/o error. 793 * @throws XNIException Thrown on parse error. 794 */ 795 796 public int next() throws IOException, XNIException { 797 798 try { 799 do { 800 switch (fScannerState) { 801 case SCANNER_STATE_PROLOG: { 802 fEntityScanner.skipSpaces(); 803 if (fEntityScanner.skipChar('<', null)) { 804 setScannerState(SCANNER_STATE_START_OF_MARKUP); 805 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 806 setScannerState(SCANNER_STATE_REFERENCE); 807 } else { 808 setScannerState(SCANNER_STATE_CONTENT); 809 } 810 break; 811 } 812 813 case SCANNER_STATE_START_OF_MARKUP: { 814 fMarkupDepth++; 815 if (isValidNameStartChar(fEntityScanner.peekChar()) || 816 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 817 setScannerState(SCANNER_STATE_ROOT_ELEMENT); 818 setDriver(fContentDriver); 819 //from now onwards this would be handled by fContentDriver,in the same next() call 820 return fContentDriver.next(); 821 } else if (fEntityScanner.skipChar('!', null)) { 822 if (fEntityScanner.skipChar('-', null)) { 823 if (!fEntityScanner.skipChar('-', null)) { 824 reportFatalError("InvalidCommentStart", 825 null); 826 } 827 setScannerState(SCANNER_STATE_COMMENT); 828 } else if (fEntityScanner.skipString(DOCTYPE)) { 829 setScannerState(SCANNER_STATE_DOCTYPE); 830 Entity entity = fEntityScanner.getCurrentEntity(); 831 if(entity instanceof Entity.ScannedEntity){ 832 fStartPos=((Entity.ScannedEntity)entity).position; 833 } 834 fReadingDTD=true; 835 if(fDTDDecl == null) 836 fDTDDecl = new XMLStringBuffer(); 837 fDTDDecl.append("<!DOCTYPE"); 838 839 } else { 840 reportFatalError("MarkupNotRecognizedInProlog", 841 null); 842 } 843 } else if (fEntityScanner.skipChar('?', null)) { 844 setScannerState(SCANNER_STATE_PI); 845 } else { 846 reportFatalError("MarkupNotRecognizedInProlog", 847 null); 848 } 849 break; 850 } 851 } 852 } while (fScannerState == SCANNER_STATE_PROLOG || fScannerState == SCANNER_STATE_START_OF_MARKUP ); 853 854 switch(fScannerState){ 855 /** 856 //this part is handled by FragmentContentHandler 857 case SCANNER_STATE_ROOT_ELEMENT: { 858 //we have read '<' and beginning of reading the start element tag 859 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 860 setDriver(fContentDriver); 861 //from now onwards this would be handled by fContentDriver,in the same next() call 862 return fContentDriver.next(); 863 } 864 */ 865 case SCANNER_STATE_COMMENT: { 866 //this function fills the data.. 867 scanComment(); 868 setScannerState(SCANNER_STATE_PROLOG); 869 return XMLEvent.COMMENT; 870 //setScannerState(SCANNER_STATE_PROLOG); 871 //break; 872 } 873 case SCANNER_STATE_PI: { 874 fContentBuffer.clear() ; 875 scanPI(fContentBuffer); 876 setScannerState(SCANNER_STATE_PROLOG); 877 return XMLEvent.PROCESSING_INSTRUCTION; 878 } 879 880 case SCANNER_STATE_DOCTYPE: { 881 if (fDisallowDoctype) { 882 reportFatalError("DoctypeNotAllowed", null); 883 } 884 885 if (fSeenDoctypeDecl) { 886 reportFatalError("AlreadySeenDoctype", null); 887 } 888 fSeenDoctypeDecl = true; 889 890 // scanDoctypeDecl() sends XNI doctypeDecl event that 891 // in SAX is converted to startDTD() event. 892 if (scanDoctypeDecl(fSupportDTD)) { 893 //allow parsing of entity decls to continue in order to stay well-formed 894 setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS); 895 fSeenInternalSubset = true; 896 if(fDTDDriver == null){ 897 fDTDDriver = new DTDDriver(); 898 } 899 setDriver(fContentDriver); 900 //always return DTD event, the event however, will not contain any entities 901 return fDTDDriver.next(); 902 } 903 904 if(fSeenDoctypeDecl){ 905 Entity entity = fEntityScanner.getCurrentEntity(); 906 if(entity instanceof Entity.ScannedEntity){ 907 fEndPos = ((Entity.ScannedEntity)entity).position; 908 } 909 fReadingDTD = false; 910 } 911 912 // handle external subset 913 if (fDoctypeSystemId != null) { 914 if (((fValidation || fLoadExternalDTD) 915 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 916 if (fSupportDTD) { 917 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 918 } else { 919 setScannerState(SCANNER_STATE_PROLOG); 920 } 921 922 setDriver(fContentDriver); 923 if(fDTDDriver == null) { 924 fDTDDriver = new DTDDriver(); 925 } 926 927 return fDTDDriver.next(); 928 } 929 } 930 else if (fExternalSubsetSource != null) { 931 if (((fValidation || fLoadExternalDTD) 932 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 933 // This handles the case of a DOCTYPE that had neither an internal subset or an external subset. 934 fDTDScanner.setInputSource(fExternalSubsetSource); 935 fExternalSubsetSource = null; 936 if (fSupportDTD) 937 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 938 else 939 setScannerState(SCANNER_STATE_PROLOG); 940 setDriver(fContentDriver); 941 if(fDTDDriver == null) 942 fDTDDriver = new DTDDriver(); 943 return fDTDDriver.next(); 944 } 945 } 946 947 // Send endDTD() call if: 948 // a) systemId is null or if an external subset resolver could not locate an external subset. 949 // b) "load-external-dtd" and validation are false 950 // c) DTD grammar is cached 951 952 // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD 953 // in SAX this results in 2 events: startDTD, endDTD 954 if (fDTDScanner != null) { 955 fDTDScanner.setInputSource(null); 956 } 957 setScannerState(SCANNER_STATE_PROLOG); 958 return XMLEvent.DTD; 959 } 960 961 case SCANNER_STATE_CONTENT: { 962 reportFatalError("ContentIllegalInProlog", null); 963 fEntityScanner.scanChar(null); 964 return -1; 965 } 966 case SCANNER_STATE_REFERENCE: { 967 reportFatalError("ReferenceIllegalInProlog", null); 968 return -1; 969 } 970 971 /** 972 * if (complete) { 973 * if (fEntityScanner.scanChar() != '<') { 974 * reportFatalError("RootElementRequired", null); 975 * } 976 * setScannerState(SCANNER_STATE_ROOT_ELEMENT); 977 * setDriver(fContentDriver); 978 * } 979 */ 980 } 981 } 982 // premature end of file 983 catch (EOFException e) { 984 reportFatalError("PrematureEOF", null); 985 //xxx what should be returned here.... ??? 986 return -1 ; 987 //throw e; 988 } 989 //xxx what should be returned here.... ??? 990 return -1; 991 992 } 993 994 995 } // class PrologDriver 996 997 /** 998 * Driver to handle the internal and external DTD subsets. 999 * 1000 * @author Andy Clark, IBM 1001 */ 1002 protected final class DTDDriver 1003 implements Driver { 1004 1005 // 1006 // Driver methods 1007 // 1008 1009 public int next() throws IOException, XNIException{ 1010 1011 dispatch(true); 1012 1013 //xxx: remove this hack and align this with reusing DTD components 1014 //currently this routine will only be executed from Stax 1015 if(fPropertyManager != null){ 1016 dtdGrammarUtil = new DTDGrammarUtil(((XMLDTDScannerImpl)fDTDScanner).getGrammar(),fSymbolTable, fNamespaceContext); 1017 } 1018 1019 return XMLEvent.DTD ; 1020 } 1021 1022 /** 1023 * Dispatch an XML "event". 1024 * 1025 * @param complete True if this driver is intended to scan 1026 * and dispatch as much as possible. 1027 * 1028 * @return True if there is more to dispatch either from this 1029 * or a another driver. 1030 * 1031 * @throws IOException Thrown on i/o error. 1032 * @throws XNIException Thrown on parse error. 1033 */ 1034 public boolean dispatch(boolean complete) 1035 throws IOException, XNIException { 1036 fEntityManager.setEntityHandler(null); 1037 try { 1038 boolean again; 1039 XMLResourceIdentifierImpl resourceIdentifier = new XMLResourceIdentifierImpl(); 1040 if( fDTDScanner == null){ 1041 1042 if (fEntityManager.getEntityScanner() instanceof XML11EntityScanner){ 1043 fDTDScanner = new XML11DTDScannerImpl(); 1044 } else 1045 1046 fDTDScanner = new XMLDTDScannerImpl(); 1047 1048 ((XMLDTDScannerImpl)fDTDScanner).reset(fPropertyManager); 1049 } 1050 1051 fDTDScanner.setLimitAnalyzer(fLimitAnalyzer); 1052 do { 1053 again = false; 1054 switch (fScannerState) { 1055 case SCANNER_STATE_DTD_INTERNAL_DECLS: { 1056 boolean moreToScan = false; 1057 if (!fDTDScanner.skipDTD(fSupportDTD)) { 1058 // REVISIT: Should there be a feature for 1059 // the "complete" parameter? 1060 boolean completeDTD = true; 1061 1062 moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD); 1063 } 1064 Entity entity = fEntityScanner.getCurrentEntity(); 1065 if(entity instanceof Entity.ScannedEntity){ 1066 fEndPos=((Entity.ScannedEntity)entity).position; 1067 } 1068 fReadingDTD=false; 1069 if (!moreToScan) { 1070 // end doctype declaration 1071 if (!fEntityScanner.skipChar(']', null)) { 1072 reportFatalError("DoctypedeclNotClosed", new Object[]{fDoctypeName}); 1073 } 1074 fEntityScanner.skipSpaces(); 1075 if (!fEntityScanner.skipChar('>', null)) { 1076 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 1077 } 1078 fMarkupDepth--; 1079 1080 if (!fSupportDTD) { 1081 //simply reset the entity store without having to mess around 1082 //with the DTD Scanner code 1083 fEntityStore = fEntityManager.getEntityStore(); 1084 fEntityStore.reset(); 1085 } else { 1086 // scan external subset next unless we are ignoring DTDs 1087 if (fDoctypeSystemId != null && (fValidation || fLoadExternalDTD)) { 1088 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 1089 break; 1090 } 1091 } 1092 1093 setEndDTDScanState(); 1094 return true; 1095 1096 } 1097 break; 1098 } 1099 case SCANNER_STATE_DTD_EXTERNAL: { 1100 /** 1101 fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1102 fDTDDescription.setRootName(fDoctypeName); 1103 XMLInputSource xmlInputSource = 1104 fEntityManager.resolveEntity(fDTDDescription); 1105 fDTDScanner.setInputSource(xmlInputSource); 1106 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1107 again = true; 1108 break; 1109 */ 1110 1111 resourceIdentifier.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1112 XMLInputSource xmlInputSource = null ; 1113 StaxXMLInputSource staxInputSource = fEntityManager.resolveEntityAsPerStax(resourceIdentifier); 1114 1115 // Check access permission. If the source is resolved by a resolver, the check is skipped. 1116 if (!staxInputSource.isCreatedByResolver()) { 1117 String accessError = checkAccess(fDoctypeSystemId, fAccessExternalDTD); 1118 if (accessError != null) { 1119 reportFatalError("AccessExternalDTD", new Object[]{ SecuritySupport.sanitizePath(fDoctypeSystemId), accessError }); 1120 } 1121 } 1122 xmlInputSource = staxInputSource.getXMLInputSource(); 1123 fDTDScanner.setInputSource(xmlInputSource); 1124 if (fEntityScanner.fCurrentEntity != null) { 1125 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1126 } else { 1127 setScannerState(SCANNER_STATE_PROLOG); 1128 } 1129 again = true; 1130 break; 1131 } 1132 case SCANNER_STATE_DTD_EXTERNAL_DECLS: { 1133 // REVISIT: Should there be a feature for 1134 // the "complete" parameter? 1135 boolean completeDTD = true; 1136 boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD); 1137 if (!moreToScan) { 1138 setEndDTDScanState(); 1139 return true; 1140 } 1141 break; 1142 } 1143 case SCANNER_STATE_PROLOG : { 1144 // skip entity decls 1145 setEndDTDScanState(); 1146 return true; 1147 } 1148 default: { 1149 throw new XNIException("DTDDriver#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')'); 1150 } 1151 } 1152 } while (complete || again); 1153 } 1154 1155 // premature end of file 1156 catch (EOFException e) { 1157 e.printStackTrace(); 1158 reportFatalError("PrematureEOF", null); 1159 return false; 1160 //throw e; 1161 } 1162 1163 // cleanup 1164 finally { 1165 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1166 } 1167 1168 return true; 1169 1170 } 1171 1172 // dispatch(boolean):boolean 1173 1174 } // class DTDDriver 1175 1176 /** 1177 * Driver to handle content scanning. 1178 * 1179 * @author Andy Clark, IBM 1180 * @author Eric Ye, IBM 1181 */ 1182 protected class ContentDriver 1183 extends FragmentContentDriver { 1184 1185 // 1186 // Protected methods 1187 // 1188 1189 // hooks 1190 1191 // NOTE: These hook methods are added so that the full document 1192 // scanner can share the majority of code with this class. 1193 1194 /** 1195 * Scan for DOCTYPE hook. This method is a hook for subclasses 1196 * to add code to handle scanning for a the "DOCTYPE" string 1197 * after the string "<!" has been scanned. 1198 * 1199 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 1200 * was not scanned. 1201 */ 1202 protected boolean scanForDoctypeHook() 1203 throws IOException, XNIException { 1204 1205 if (fEntityScanner.skipString(DOCTYPE)) { 1206 setScannerState(SCANNER_STATE_DOCTYPE); 1207 // fEntityScanner.markStartOfDTD(); 1208 return true; 1209 } 1210 return false; 1211 1212 } // scanForDoctypeHook():boolean 1213 1214 /** 1215 * Element depth iz zero. This methos is a hook for subclasses 1216 * to add code to handle when the element depth hits zero. When 1217 * scanning a document fragment, an element depth of zero is 1218 * normal. However, when scanning a full XML document, the 1219 * scanner must handle the trailing miscellanous section of 1220 * the document after the end of the document's root element. 1221 * 1222 * @return True if the caller should stop and return true which 1223 * allows the scanner to switch to a new scanning 1224 * driver. A return value of false indicates that 1225 * the content driver should continue as normal. 1226 */ 1227 protected boolean elementDepthIsZeroHook() 1228 throws IOException, XNIException { 1229 1230 setScannerState(SCANNER_STATE_TRAILING_MISC); 1231 setDriver(fTrailingMiscDriver); 1232 return true; 1233 1234 } // elementDepthIsZeroHook():boolean 1235 1236 /** 1237 * Scan for root element hook. This method is a hook for 1238 * subclasses to add code that handles scanning for the root 1239 * element. When scanning a document fragment, there is no 1240 * "root" element. However, when scanning a full XML document, 1241 * the scanner must handle the root element specially. 1242 * 1243 * @return True if the caller should stop and return true which 1244 * allows the scanner to switch to a new scanning 1245 * driver. A return value of false indicates that 1246 * the content driver should continue as normal. 1247 */ 1248 protected boolean scanRootElementHook() 1249 throws IOException, XNIException { 1250 1251 if (scanStartElement()) { 1252 setScannerState(SCANNER_STATE_TRAILING_MISC); 1253 setDriver(fTrailingMiscDriver); 1254 return true; 1255 } 1256 return false; 1257 1258 } // scanRootElementHook():boolean 1259 1260 /** 1261 * End of file hook. This method is a hook for subclasses to 1262 * add code that handles the end of file. The end of file in 1263 * a document fragment is OK if the markup depth is zero. 1264 * However, when scanning a full XML document, an end of file 1265 * is always premature. 1266 */ 1267 protected void endOfFileHook(EOFException e) 1268 throws IOException, XNIException { 1269 1270 reportFatalError("PrematureEOF", null); 1271 // in case continue-after-fatal-error set, should not do this... 1272 //throw e; 1273 1274 } // endOfFileHook() 1275 1276 protected void resolveExternalSubsetAndRead() 1277 throws IOException, XNIException { 1278 1279 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 1280 fDTDDescription.setRootName(fElementQName.rawname); 1281 XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 1282 1283 if (src != null) { 1284 fDoctypeName = fElementQName.rawname; 1285 fDoctypePublicId = src.getPublicId(); 1286 fDoctypeSystemId = src.getSystemId(); 1287 // call document handler 1288 if (fDocumentHandler != null) { 1289 // This inserts a doctypeDecl event into the stream though no 1290 // DOCTYPE existed in the instance document. 1291 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 1292 } 1293 try { 1294 fDTDScanner.setInputSource(src); 1295 while (fDTDScanner.scanDTDExternalSubset(true)); 1296 } finally { 1297 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1298 } 1299 } 1300 } // resolveExternalSubsetAndRead() 1301 1302 1303 1304 } // class ContentDriver 1305 1306 /** 1307 * Driver to handle trailing miscellaneous section scanning. 1308 * 1309 * @author Andy Clark, IBM 1310 * @author Eric Ye, IBM 1311 */ 1312 protected final class TrailingMiscDriver 1313 implements Driver { 1314 1315 // 1316 // Driver methods 1317 // 1318 public int next() throws IOException, XNIException{ 1319 //this could for cases like <foo/> 1320 //look at scanRootElementHook 1321 if(fEmptyElement){ 1322 fEmptyElement = false; 1323 return XMLEvent.END_ELEMENT; 1324 } 1325 1326 try { 1327 if(fScannerState == SCANNER_STATE_TERMINATED){ 1328 return XMLEvent.END_DOCUMENT ;} 1329 do { 1330 switch (fScannerState) { 1331 case SCANNER_STATE_TRAILING_MISC: { 1332 1333 fEntityScanner.skipSpaces(); 1334 //we should have reached the end of the document in 1335 //most cases. 1336 if(fScannerState == SCANNER_STATE_TERMINATED ){ 1337 return XMLEvent.END_DOCUMENT ; 1338 } 1339 if (fEntityScanner.skipChar('<', null)) { 1340 setScannerState(SCANNER_STATE_START_OF_MARKUP); 1341 } else { 1342 setScannerState(SCANNER_STATE_CONTENT); 1343 } 1344 break; 1345 } 1346 case SCANNER_STATE_START_OF_MARKUP: { 1347 fMarkupDepth++; 1348 if (fEntityScanner.skipChar('?', null)) { 1349 setScannerState(SCANNER_STATE_PI); 1350 } else if (fEntityScanner.skipChar('!', null)) { 1351 setScannerState(SCANNER_STATE_COMMENT); 1352 } else if (fEntityScanner.skipChar('/', null)) { 1353 reportFatalError("MarkupNotRecognizedInMisc", 1354 null); 1355 } else if (isValidNameStartChar(fEntityScanner.peekChar()) || 1356 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 1357 reportFatalError("MarkupNotRecognizedInMisc", 1358 null); 1359 scanStartElement(); 1360 setScannerState(SCANNER_STATE_CONTENT); 1361 } else { 1362 reportFatalError("MarkupNotRecognizedInMisc", 1363 null); 1364 } 1365 break; 1366 } 1367 } 1368 } while(fScannerState == SCANNER_STATE_START_OF_MARKUP || 1369 fScannerState == SCANNER_STATE_TRAILING_MISC); 1370 1371 switch (fScannerState){ 1372 case SCANNER_STATE_PI: { 1373 fContentBuffer.clear(); 1374 scanPI(fContentBuffer); 1375 setScannerState(SCANNER_STATE_TRAILING_MISC); 1376 return XMLEvent.PROCESSING_INSTRUCTION ; 1377 } 1378 case SCANNER_STATE_COMMENT: { 1379 if (!fEntityScanner.skipString(COMMENTSTRING)) { 1380 reportFatalError("InvalidCommentStart", null); 1381 } 1382 scanComment(); 1383 setScannerState(SCANNER_STATE_TRAILING_MISC); 1384 return XMLEvent.COMMENT; 1385 } 1386 case SCANNER_STATE_CONTENT: { 1387 int ch = fEntityScanner.peekChar(); 1388 if (ch == -1) { 1389 setScannerState(SCANNER_STATE_TERMINATED); 1390 return XMLEvent.END_DOCUMENT ; 1391 } else{ 1392 reportFatalError("ContentIllegalInTrailingMisc", 1393 null); 1394 fEntityScanner.scanChar(null); 1395 setScannerState(SCANNER_STATE_TRAILING_MISC); 1396 return XMLEvent.CHARACTERS; 1397 } 1398 1399 } 1400 case SCANNER_STATE_REFERENCE: { 1401 reportFatalError("ReferenceIllegalInTrailingMisc", 1402 null); 1403 setScannerState(SCANNER_STATE_TRAILING_MISC); 1404 return XMLEvent.ENTITY_REFERENCE ; 1405 } 1406 case SCANNER_STATE_TERMINATED: { 1407 //there can't be any element after SCANNER_STATE_TERMINATED or when the parser 1408 //has reached the end of document 1409 setScannerState(SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION); 1410 //xxx what to do when the scanner has reached the terminating state. 1411 return XMLEvent.END_DOCUMENT ; 1412 } 1413 case SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION:{ 1414 throw new java.util.NoSuchElementException("No more events to be parsed"); 1415 } 1416 default: throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 1417 }//switch 1418 1419 } catch (EOFException e) { 1420 // NOTE: This is the only place we're allowed to reach 1421 // the real end of the document stream. Unless the 1422 // end of file was reached prematurely. 1423 if (fMarkupDepth != 0) { 1424 reportFatalError("PrematureEOF", null); 1425 return -1; 1426 //throw e; 1427 } 1428 //System.out.println("EOFException thrown") ; 1429 setScannerState(SCANNER_STATE_TERMINATED); 1430 } 1431 1432 return XMLEvent.END_DOCUMENT; 1433 1434 }//next 1435 1436 } // class TrailingMiscDriver 1437 1438 /** 1439 * Implements XMLBufferListener interface. 1440 */ 1441 1442 1443 /** 1444 * receives callbacks from {@link XMLEntityReader } when buffer 1445 * is being changed. 1446 * @param refreshPosition 1447 */ 1448 public void refresh(int refreshPosition){ 1449 super.refresh(refreshPosition); 1450 if(fReadingDTD){ 1451 Entity entity = fEntityScanner.getCurrentEntity(); 1452 if(entity instanceof Entity.ScannedEntity){ 1453 fEndPos=((Entity.ScannedEntity)entity).position; 1454 } 1455 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 1456 fStartPos = refreshPosition; 1457 } 1458 } 1459 1460 } // class XMLDocumentScannerImpl