1 /* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl ; 22 23 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 24 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 25 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 26 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 27 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 28 import com.sun.org.apache.xerces.internal.util.*; 29 import com.sun.org.apache.xerces.internal.util.URI; 30 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 31 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 32 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 34 import com.sun.org.apache.xerces.internal.xni.Augmentations; 35 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 36 import com.sun.org.apache.xerces.internal.xni.XNIException; 37 import com.sun.org.apache.xerces.internal.xni.parser.*; 38 import com.sun.xml.internal.stream.Entity; 39 import com.sun.xml.internal.stream.StaxEntityResolverWrapper; 40 import com.sun.xml.internal.stream.StaxXMLInputSource; 41 import com.sun.xml.internal.stream.XMLEntityStorage; 42 import java.io.*; 43 import java.lang.reflect.Method; 44 import java.net.HttpURLConnection; 45 import java.net.URISyntaxException; 46 import java.net.URL; 47 import java.net.URLConnection; 48 import java.util.HashMap; 49 import java.util.Iterator; 50 import java.util.Locale; 51 import java.util.Map; 52 import java.util.Stack; 53 import javax.xml.stream.XMLInputFactory; 54 55 56 /** 57 * Will keep track of current entity. 58 * 59 * The entity manager handles the registration of general and parameter 60 * entities; resolves entities; and starts entities. The entity manager 61 * is a central component in a standard parser configuration and this 62 * class works directly with the entity scanner to manage the underlying 63 * xni. 64 * <p> 65 * This component requires the following features and properties from the 66 * component manager that uses it: 67 * <ul> 68 * <li>http://xml.org/sax/features/validation</li> 69 * <li>http://xml.org/sax/features/external-general-entities</li> 70 * <li>http://xml.org/sax/features/external-parameter-entities</li> 71 * <li>http://apache.org/xml/features/allow-java-encodings</li> 72 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 73 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 74 * <li>http://apache.org/xml/properties/internal/entity-resolver</li> 75 * </ul> 76 * 77 * 78 * @author Andy Clark, IBM 79 * @author Arnaud Le Hors, IBM 80 * @author K.Venugopal SUN Microsystems 81 * @author Neeraj Bajaj SUN Microsystems 82 * @author Sunitha Reddy SUN Microsystems 83 * @version $Id: XMLEntityManager.java,v 1.17 2010-11-01 04:39:41 joehw Exp $ 84 */ 85 public class XMLEntityManager implements XMLComponent, XMLEntityResolver { 86 87 // 88 // Constants 89 // 90 91 /** Default buffer size (2048). */ 92 public static final int DEFAULT_BUFFER_SIZE = 8192; 93 94 /** Default buffer size before we've finished with the XMLDecl: */ 95 public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; 96 97 /** Default internal entity buffer size (1024). */ 98 public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 1024; 99 100 // feature identifiers 101 102 /** Feature identifier: validation. */ 103 protected static final String VALIDATION = 104 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 105 106 /** 107 * standard uri conformant (strict uri). 108 * http://apache.org/xml/features/standard-uri-conformant 109 */ 110 protected boolean fStrictURI; 111 112 113 /** Feature identifier: external general entities. */ 114 protected static final String EXTERNAL_GENERAL_ENTITIES = 115 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE; 116 117 /** Feature identifier: external parameter entities. */ 118 protected static final String EXTERNAL_PARAMETER_ENTITIES = 119 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE; 120 121 /** Feature identifier: allow Java encodings. */ 122 protected static final String ALLOW_JAVA_ENCODINGS = 123 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 124 125 /** Feature identifier: warn on duplicate EntityDef */ 126 protected static final String WARN_ON_DUPLICATE_ENTITYDEF = 127 Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE; 128 129 /** Feature identifier: load external DTD. */ 130 protected static final String LOAD_EXTERNAL_DTD = 131 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 132 133 // property identifiers 134 135 /** Property identifier: symbol table. */ 136 protected static final String SYMBOL_TABLE = 137 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 138 139 /** Property identifier: error reporter. */ 140 protected static final String ERROR_REPORTER = 141 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 142 143 /** Feature identifier: standard uri conformant */ 144 protected static final String STANDARD_URI_CONFORMANT = 145 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 146 147 /** Property identifier: entity resolver. */ 148 protected static final String ENTITY_RESOLVER = 149 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 150 151 protected static final String STAX_ENTITY_RESOLVER = 152 Constants.XERCES_PROPERTY_PREFIX + Constants.STAX_ENTITY_RESOLVER_PROPERTY; 153 154 // property identifier: ValidationManager 155 protected static final String VALIDATION_MANAGER = 156 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 157 158 /** property identifier: buffer size. */ 159 protected static final String BUFFER_SIZE = 160 Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY; 161 162 /** property identifier: security manager. */ 163 protected static final String SECURITY_MANAGER = 164 Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY; 165 166 protected static final String PARSER_SETTINGS = 167 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 168 169 /** Property identifier: Security property manager. */ 170 private static final String XML_SECURITY_PROPERTY_MANAGER = 171 Constants.XML_SECURITY_PROPERTY_MANAGER; 172 173 /** access external dtd: file protocol */ 174 static final String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 175 176 // recognized features and properties 177 178 /** Recognized features. */ 179 private static final String[] RECOGNIZED_FEATURES = { 180 VALIDATION, 181 EXTERNAL_GENERAL_ENTITIES, 182 EXTERNAL_PARAMETER_ENTITIES, 183 ALLOW_JAVA_ENCODINGS, 184 WARN_ON_DUPLICATE_ENTITYDEF, 185 STANDARD_URI_CONFORMANT 186 }; 187 188 /** Feature defaults. */ 189 private static final Boolean[] FEATURE_DEFAULTS = { 190 null, 191 Boolean.TRUE, 192 Boolean.TRUE, 193 Boolean.TRUE, 194 Boolean.FALSE, 195 Boolean.FALSE 196 }; 197 198 /** Recognized properties. */ 199 private static final String[] RECOGNIZED_PROPERTIES = { 200 SYMBOL_TABLE, 201 ERROR_REPORTER, 202 ENTITY_RESOLVER, 203 VALIDATION_MANAGER, 204 BUFFER_SIZE, 205 SECURITY_MANAGER, 206 XML_SECURITY_PROPERTY_MANAGER 207 }; 208 209 /** Property defaults. */ 210 private static final Object[] PROPERTY_DEFAULTS = { 211 null, 212 null, 213 null, 214 null, 215 new Integer(DEFAULT_BUFFER_SIZE), 216 null, 217 null 218 }; 219 220 private static final String XMLEntity = "[xml]".intern(); 221 private static final String DTDEntity = "[dtd]".intern(); 222 223 // debugging 224 225 /** 226 * Debug printing of buffer. This debugging flag works best when you 227 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 228 * 64 characters. 229 */ 230 private static final boolean DEBUG_BUFFER = false; 231 232 /** warn on duplicate Entity declaration. 233 * http://apache.org/xml/features/warn-on-duplicate-entitydef 234 */ 235 protected boolean fWarnDuplicateEntityDef; 236 237 /** Debug some basic entities. */ 238 private static final boolean DEBUG_ENTITIES = false; 239 240 /** Debug switching readers for encodings. */ 241 private static final boolean DEBUG_ENCODINGS = false; 242 243 // should be diplayed trace resolving messages 244 private static final boolean DEBUG_RESOLVER = false ; 245 246 // 247 // Data 248 // 249 250 // features 251 252 /** 253 * Validation. This feature identifier is: 254 * http://xml.org/sax/features/validation 255 */ 256 protected boolean fValidation; 257 258 /** 259 * External general entities. This feature identifier is: 260 * http://xml.org/sax/features/external-general-entities 261 */ 262 protected boolean fExternalGeneralEntities; 263 264 /** 265 * External parameter entities. This feature identifier is: 266 * http://xml.org/sax/features/external-parameter-entities 267 */ 268 protected boolean fExternalParameterEntities; 269 270 /** 271 * Allow Java encoding names. This feature identifier is: 272 * http://apache.org/xml/features/allow-java-encodings 273 */ 274 protected boolean fAllowJavaEncodings = true ; 275 276 /** Load external DTD. */ 277 protected boolean fLoadExternalDTD = true; 278 279 // properties 280 281 /** 282 * Symbol table. This property identifier is: 283 * http://apache.org/xml/properties/internal/symbol-table 284 */ 285 protected SymbolTable fSymbolTable; 286 287 /** 288 * Error reporter. This property identifier is: 289 * http://apache.org/xml/properties/internal/error-reporter 290 */ 291 protected XMLErrorReporter fErrorReporter; 292 293 /** 294 * Entity resolver. This property identifier is: 295 * http://apache.org/xml/properties/internal/entity-resolver 296 */ 297 protected XMLEntityResolver fEntityResolver; 298 299 /** Stax Entity Resolver. This property identifier is XMLInputFactory.ENTITY_RESOLVER */ 300 301 protected StaxEntityResolverWrapper fStaxEntityResolver; 302 303 /** Property Manager. This is used from Stax */ 304 protected PropertyManager fPropertyManager ; 305 306 /** StAX properties */ 307 boolean fSupportDTD = true; 308 boolean fReplaceEntityReferences = true; 309 boolean fSupportExternalEntities = true; 310 311 /** used to restrict external access */ 312 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 313 314 // settings 315 316 /** 317 * Validation manager. This property identifier is: 318 * http://apache.org/xml/properties/internal/validation-manager 319 */ 320 protected ValidationManager fValidationManager; 321 322 // settings 323 324 /** 325 * Buffer size. We get this value from a property. The default size 326 * is used if the input buffer size property is not specified. 327 * REVISIT: do we need a property for internal entity buffer size? 328 */ 329 protected int fBufferSize = DEFAULT_BUFFER_SIZE; 330 331 /** Security Manager */ 332 protected XMLSecurityManager fSecurityManager = null; 333 334 protected XMLLimitAnalyzer fLimitAnalyzer = null; 335 336 protected int entityExpansionIndex; 337 338 /** 339 * True if the document entity is standalone. This should really 340 * only be set by the document source (e.g. XMLDocumentScanner). 341 */ 342 protected boolean fStandalone; 343 344 // are the entities being parsed in the external subset? 345 // NOTE: this *is not* the same as whether they're external entities! 346 protected boolean fInExternalSubset = false; 347 348 349 // handlers 350 /** Entity handler. */ 351 protected XMLEntityHandler fEntityHandler; 352 353 /** Current entity scanner */ 354 protected XMLEntityScanner fEntityScanner ; 355 356 /** XML 1.0 entity scanner. */ 357 protected XMLEntityScanner fXML10EntityScanner; 358 359 /** XML 1.1 entity scanner. */ 360 protected XMLEntityScanner fXML11EntityScanner; 361 362 /** count of entities expanded: */ 363 protected int fEntityExpansionCount = 0; 364 365 // entities 366 367 /** Entities. */ 368 protected Map<String, Entity> fEntities = new HashMap<>(); 369 370 /** Entity stack. */ 371 protected Stack<Entity> fEntityStack = new Stack<>(); 372 373 /** Current entity. */ 374 protected Entity.ScannedEntity fCurrentEntity = null; 375 376 /** identify if the InputSource is created by a resolver */ 377 boolean fISCreatedByResolver = false; 378 379 // shared context 380 381 protected XMLEntityStorage fEntityStorage ; 382 383 protected final Object [] defaultEncoding = new Object[]{"UTF-8", null}; 384 385 386 // temp vars 387 388 /** Resource identifer. */ 389 private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 390 391 /** Augmentations for entities. */ 392 private final Augmentations fEntityAugs = new AugmentationsImpl(); 393 394 /** Pool of character buffers. */ 395 private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); 396 397 // 398 // Constructors 399 // 400 401 /** 402 * If this constructor is used to create the object, reset() should be invoked on this object 403 */ 404 public XMLEntityManager() { 405 //for entity managers not created by parsers 406 fSecurityManager = new XMLSecurityManager(true); 407 fEntityStorage = new XMLEntityStorage(this) ; 408 setScannerVersion(Constants.XML_VERSION_1_0); 409 } // <init>() 410 411 /** Default constructor. */ 412 public XMLEntityManager(PropertyManager propertyManager) { 413 fPropertyManager = propertyManager ; 414 //pass a reference to current entity being scanned 415 //fEntityStorage = new XMLEntityStorage(fCurrentEntity) ; 416 fEntityStorage = new XMLEntityStorage(this) ; 417 fEntityScanner = new XMLEntityScanner(propertyManager, this) ; 418 reset(propertyManager); 419 } // <init>() 420 421 /** 422 * Adds an internal entity declaration. 423 * <p> 424 * <strong>Note:</strong> This method ignores subsequent entity 425 * declarations. 426 * <p> 427 * <strong>Note:</strong> The name should be a unique symbol. The 428 * SymbolTable can be used for this purpose. 429 * 430 * @param name The name of the entity. 431 * @param text The text of the entity. 432 * 433 * @see SymbolTable 434 */ 435 public void addInternalEntity(String name, String text) { 436 if (!fEntities.containsKey(name)) { 437 Entity entity = new Entity.InternalEntity(name, text, fInExternalSubset); 438 fEntities.put(name, entity); 439 } else{ 440 if(fWarnDuplicateEntityDef){ 441 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 442 "MSG_DUPLICATE_ENTITY_DEFINITION", 443 new Object[]{ name }, 444 XMLErrorReporter.SEVERITY_WARNING ); 445 } 446 } 447 448 } // addInternalEntity(String,String) 449 450 /** 451 * Adds an external entity declaration. 452 * <p> 453 * <strong>Note:</strong> This method ignores subsequent entity 454 * declarations. 455 * <p> 456 * <strong>Note:</strong> The name should be a unique symbol. The 457 * SymbolTable can be used for this purpose. 458 * 459 * @param name The name of the entity. 460 * @param publicId The public identifier of the entity. 461 * @param literalSystemId The system identifier of the entity. 462 * @param baseSystemId The base system identifier of the entity. 463 * This is the system identifier of the entity 464 * where <em>the entity being added</em> and 465 * is used to expand the system identifier when 466 * the system identifier is a relative URI. 467 * When null the system identifier of the first 468 * external entity on the stack is used instead. 469 * 470 * @see SymbolTable 471 */ 472 public void addExternalEntity(String name, 473 String publicId, String literalSystemId, 474 String baseSystemId) throws IOException { 475 if (!fEntities.containsKey(name)) { 476 if (baseSystemId == null) { 477 // search for the first external entity on the stack 478 int size = fEntityStack.size(); 479 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 480 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 481 } 482 for (int i = size - 1; i >= 0 ; i--) { 483 Entity.ScannedEntity externalEntity = 484 (Entity.ScannedEntity)fEntityStack.elementAt(i); 485 if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) { 486 baseSystemId = externalEntity.entityLocation.getExpandedSystemId(); 487 break; 488 } 489 } 490 } 491 Entity entity = new Entity.ExternalEntity(name, 492 new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId, 493 expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset); 494 fEntities.put(name, entity); 495 } else{ 496 if(fWarnDuplicateEntityDef){ 497 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 498 "MSG_DUPLICATE_ENTITY_DEFINITION", 499 new Object[]{ name }, 500 XMLErrorReporter.SEVERITY_WARNING ); 501 } 502 } 503 504 } // addExternalEntity(String,String,String,String) 505 506 507 /** 508 * Adds an unparsed entity declaration. 509 * <p> 510 * <strong>Note:</strong> This method ignores subsequent entity 511 * declarations. 512 * <p> 513 * <strong>Note:</strong> The name should be a unique symbol. The 514 * SymbolTable can be used for this purpose. 515 * 516 * @param name The name of the entity. 517 * @param publicId The public identifier of the entity. 518 * @param systemId The system identifier of the entity. 519 * @param notation The name of the notation. 520 * 521 * @see SymbolTable 522 */ 523 public void addUnparsedEntity(String name, 524 String publicId, String systemId, 525 String baseSystemId, String notation) { 526 if (!fEntities.containsKey(name)) { 527 Entity.ExternalEntity entity = new Entity.ExternalEntity(name, 528 new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null), 529 notation, fInExternalSubset); 530 fEntities.put(name, entity); 531 } else{ 532 if(fWarnDuplicateEntityDef){ 533 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 534 "MSG_DUPLICATE_ENTITY_DEFINITION", 535 new Object[]{ name }, 536 XMLErrorReporter.SEVERITY_WARNING ); 537 } 538 } 539 } // addUnparsedEntity(String,String,String,String) 540 541 542 /** get the entity storage object from entity manager */ 543 public XMLEntityStorage getEntityStore(){ 544 return fEntityStorage ; 545 } 546 547 /** return the entity responsible for reading the entity */ 548 public XMLEntityScanner getEntityScanner(){ 549 if(fEntityScanner == null) { 550 // default to 1.0 551 if(fXML10EntityScanner == null) { 552 fXML10EntityScanner = new XMLEntityScanner(); 553 } 554 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 555 fEntityScanner = fXML10EntityScanner; 556 } 557 return fEntityScanner; 558 559 } 560 561 public void setScannerVersion(short version) { 562 563 if(version == Constants.XML_VERSION_1_0) { 564 if(fXML10EntityScanner == null) { 565 fXML10EntityScanner = new XMLEntityScanner(); 566 } 567 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 568 fEntityScanner = fXML10EntityScanner; 569 fEntityScanner.setCurrentEntity(fCurrentEntity); 570 } else { 571 if(fXML11EntityScanner == null) { 572 fXML11EntityScanner = new XML11EntityScanner(); 573 } 574 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 575 fEntityScanner = fXML11EntityScanner; 576 fEntityScanner.setCurrentEntity(fCurrentEntity); 577 } 578 579 } 580 581 /** 582 * This method uses the passed-in XMLInputSource to make 583 * fCurrentEntity usable for reading. 584 * 585 * @param reference flag to indicate whether the entity is an Entity Reference. 586 * @param name name of the entity (XML is it's the document entity) 587 * @param xmlInputSource the input source, with sufficient information 588 * to begin scanning characters. 589 * @param literal True if this entity is started within a 590 * literal value. 591 * @param isExternal whether this entity should be treated as an internal or external entity. 592 * @throws IOException if anything can't be read 593 * XNIException If any parser-specific goes wrong. 594 * @return the encoding of the new entity or null if a character stream was employed 595 */ 596 public String setupCurrentEntity(boolean reference, String name, XMLInputSource xmlInputSource, 597 boolean literal, boolean isExternal) 598 throws IOException, XNIException { 599 // get information 600 601 final String publicId = xmlInputSource.getPublicId(); 602 String literalSystemId = xmlInputSource.getSystemId(); 603 String baseSystemId = xmlInputSource.getBaseSystemId(); 604 String encoding = xmlInputSource.getEncoding(); 605 final boolean encodingExternallySpecified = (encoding != null); 606 Boolean isBigEndian = null; 607 608 // create reader 609 InputStream stream = null; 610 Reader reader = xmlInputSource.getCharacterStream(); 611 612 // First chance checking strict URI 613 String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI); 614 if (baseSystemId == null) { 615 baseSystemId = expandedSystemId; 616 } 617 if (reader == null) { 618 stream = xmlInputSource.getByteStream(); 619 if (stream == null) { 620 URL location = new URL(expandedSystemId); 621 URLConnection connect = location.openConnection(); 622 if (!(connect instanceof HttpURLConnection)) { 623 stream = connect.getInputStream(); 624 } 625 else { 626 boolean followRedirects = true; 627 628 // setup URLConnection if we have an HTTPInputSource 629 if (xmlInputSource instanceof HTTPInputSource) { 630 final HttpURLConnection urlConnection = (HttpURLConnection) connect; 631 final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource; 632 633 // set request properties 634 Iterator<Map.Entry<String, String>> propIter = httpInputSource.getHTTPRequestProperties(); 635 while (propIter.hasNext()) { 636 Map.Entry<String, String> entry = propIter.next(); 637 urlConnection.setRequestProperty(entry.getKey(), entry.getValue()); 638 } 639 640 // set preference for redirection 641 followRedirects = httpInputSource.getFollowHTTPRedirects(); 642 if (!followRedirects) { 643 setInstanceFollowRedirects(urlConnection, followRedirects); 644 } 645 } 646 647 stream = connect.getInputStream(); 648 649 // REVISIT: If the URLConnection has external encoding 650 // information, we should be reading it here. It's located 651 // in the charset parameter of Content-Type. -- mrglavas 652 653 if (followRedirects) { 654 String redirect = connect.getURL().toString(); 655 // E43: Check if the URL was redirected, and then 656 // update literal and expanded system IDs if needed. 657 if (!redirect.equals(expandedSystemId)) { 658 literalSystemId = redirect; 659 expandedSystemId = redirect; 660 } 661 } 662 } 663 } 664 665 // wrap this stream in RewindableInputStream 666 stream = new RewindableInputStream(stream); 667 668 // perform auto-detect of encoding if necessary 669 if (encoding == null) { 670 // read first four bytes and determine encoding 671 final byte[] b4 = new byte[4]; 672 int count = 0; 673 for (; count<4; count++ ) { 674 b4[count] = (byte)stream.read(); 675 } 676 if (count == 4) { 677 Object [] encodingDesc = getEncodingName(b4, count); 678 encoding = (String)(encodingDesc[0]); 679 isBigEndian = (Boolean)(encodingDesc[1]); 680 681 stream.reset(); 682 // Special case UTF-8 files with BOM created by Microsoft 683 // tools. It's more efficient to consume the BOM than make 684 // the reader perform extra checks. -Ac 685 if (count > 2 && encoding.equals("UTF-8")) { 686 int b0 = b4[0] & 0xFF; 687 int b1 = b4[1] & 0xFF; 688 int b2 = b4[2] & 0xFF; 689 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 690 // ignore first three bytes... 691 stream.skip(3); 692 } 693 } 694 reader = createReader(stream, encoding, isBigEndian); 695 } else { 696 reader = createReader(stream, encoding, isBigEndian); 697 } 698 } 699 700 // use specified encoding 701 else { 702 encoding = encoding.toUpperCase(Locale.ENGLISH); 703 704 // If encoding is UTF-8, consume BOM if one is present. 705 if (encoding.equals("UTF-8")) { 706 final int[] b3 = new int[3]; 707 int count = 0; 708 for (; count < 3; ++count) { 709 b3[count] = stream.read(); 710 if (b3[count] == -1) 711 break; 712 } 713 if (count == 3) { 714 if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { 715 // First three bytes are not BOM, so reset. 716 stream.reset(); 717 } 718 } else { 719 stream.reset(); 720 } 721 } 722 // If encoding is UTF-16, we still need to read the first four bytes 723 // in order to discover the byte order. 724 else if (encoding.equals("UTF-16")) { 725 final int[] b4 = new int[4]; 726 int count = 0; 727 for (; count < 4; ++count) { 728 b4[count] = stream.read(); 729 if (b4[count] == -1) 730 break; 731 } 732 stream.reset(); 733 734 String utf16Encoding = "UTF-16"; 735 if (count >= 2) { 736 final int b0 = b4[0]; 737 final int b1 = b4[1]; 738 if (b0 == 0xFE && b1 == 0xFF) { 739 // UTF-16, big-endian 740 utf16Encoding = "UTF-16BE"; 741 isBigEndian = Boolean.TRUE; 742 } 743 else if (b0 == 0xFF && b1 == 0xFE) { 744 // UTF-16, little-endian 745 utf16Encoding = "UTF-16LE"; 746 isBigEndian = Boolean.FALSE; 747 } 748 else if (count == 4) { 749 final int b2 = b4[2]; 750 final int b3 = b4[3]; 751 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 752 // UTF-16, big-endian, no BOM 753 utf16Encoding = "UTF-16BE"; 754 isBigEndian = Boolean.TRUE; 755 } 756 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 757 // UTF-16, little-endian, no BOM 758 utf16Encoding = "UTF-16LE"; 759 isBigEndian = Boolean.FALSE; 760 } 761 } 762 } 763 reader = createReader(stream, utf16Encoding, isBigEndian); 764 } 765 // If encoding is UCS-4, we still need to read the first four bytes 766 // in order to discover the byte order. 767 else if (encoding.equals("ISO-10646-UCS-4")) { 768 final int[] b4 = new int[4]; 769 int count = 0; 770 for (; count < 4; ++count) { 771 b4[count] = stream.read(); 772 if (b4[count] == -1) 773 break; 774 } 775 stream.reset(); 776 777 // Ignore unusual octet order for now. 778 if (count == 4) { 779 // UCS-4, big endian (1234) 780 if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) { 781 isBigEndian = Boolean.TRUE; 782 } 783 // UCS-4, little endian (1234) 784 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) { 785 isBigEndian = Boolean.FALSE; 786 } 787 } 788 } 789 // If encoding is UCS-2, we still need to read the first four bytes 790 // in order to discover the byte order. 791 else if (encoding.equals("ISO-10646-UCS-2")) { 792 final int[] b4 = new int[4]; 793 int count = 0; 794 for (; count < 4; ++count) { 795 b4[count] = stream.read(); 796 if (b4[count] == -1) 797 break; 798 } 799 stream.reset(); 800 801 if (count == 4) { 802 // UCS-2, big endian 803 if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) { 804 isBigEndian = Boolean.TRUE; 805 } 806 // UCS-2, little endian 807 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) { 808 isBigEndian = Boolean.FALSE; 809 } 810 } 811 } 812 813 reader = createReader(stream, encoding, isBigEndian); 814 } 815 816 // read one character at a time so we don't jump too far 817 // ahead, converting characters from the byte stream in 818 // the wrong encoding 819 if (DEBUG_ENCODINGS) { 820 System.out.println("$$$ no longer wrapping reader in OneCharReader"); 821 } 822 //reader = new OneCharReader(reader); 823 } 824 825 // We've seen a new Reader. 826 // Push it on the stack so we can close it later. 827 //fOwnReaders.add(reader); 828 829 // push entity on stack 830 if (fCurrentEntity != null) { 831 fEntityStack.push(fCurrentEntity); 832 } 833 834 // create entity 835 /* if encoding is specified externally, 'encoding' information present 836 * in the prolog of the XML document is not considered. Hence, prolog can 837 * be read in Chunks of data instead of byte by byte. 838 */ 839 fCurrentEntity = new Entity.ScannedEntity(reference, name, 840 new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId), 841 stream, reader, encoding, literal, encodingExternallySpecified, isExternal); 842 fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified); 843 fEntityScanner.setCurrentEntity(fCurrentEntity); 844 fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 845 if (fLimitAnalyzer != null) { 846 fLimitAnalyzer.startEntity(name); 847 } 848 return encoding; 849 } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String 850 851 852 /** 853 * Checks whether an entity given by name is external. 854 * 855 * @param entityName The name of the entity to check. 856 * @return True if the entity is external, false otherwise 857 * (including when the entity is not declared). 858 */ 859 public boolean isExternalEntity(String entityName) { 860 861 Entity entity = fEntities.get(entityName); 862 if (entity == null) { 863 return false; 864 } 865 return entity.isExternal(); 866 } 867 868 /** 869 * Checks whether the declaration of an entity given by name is 870 * // in the external subset. 871 * 872 * @param entityName The name of the entity to check. 873 * @return True if the entity was declared in the external subset, false otherwise 874 * (including when the entity is not declared). 875 */ 876 public boolean isEntityDeclInExternalSubset(String entityName) { 877 878 Entity entity = fEntities.get(entityName); 879 if (entity == null) { 880 return false; 881 } 882 return entity.isEntityDeclInExternalSubset(); 883 } 884 885 886 887 // 888 // Public methods 889 // 890 891 /** 892 * Sets whether the document entity is standalone. 893 * 894 * @param standalone True if document entity is standalone. 895 */ 896 public void setStandalone(boolean standalone) { 897 fStandalone = standalone; 898 } 899 // setStandalone(boolean) 900 901 /** Returns true if the document entity is standalone. */ 902 public boolean isStandalone() { 903 return fStandalone; 904 } //isStandalone():boolean 905 906 public boolean isDeclaredEntity(String entityName) { 907 908 Entity entity = fEntities.get(entityName); 909 return entity != null; 910 } 911 912 public boolean isUnparsedEntity(String entityName) { 913 914 Entity entity = fEntities.get(entityName); 915 if (entity == null) { 916 return false; 917 } 918 return entity.isUnparsed(); 919 } 920 921 922 923 // this simply returns the fResourceIdentifier object; 924 // this should only be used with caution by callers that 925 // carefully manage the entity manager's behaviour, so that 926 // this doesn't returning meaningless or misleading data. 927 // @return a reference to the current fResourceIdentifier object 928 public XMLResourceIdentifier getCurrentResourceIdentifier() { 929 return fResourceIdentifier; 930 } 931 932 /** 933 * Sets the entity handler. When an entity starts and ends, the 934 * entity handler is notified of the change. 935 * 936 * @param entityHandler The new entity handler. 937 */ 938 939 public void setEntityHandler(com.sun.org.apache.xerces.internal.impl.XMLEntityHandler entityHandler) { 940 fEntityHandler = (XMLEntityHandler) entityHandler; 941 } // setEntityHandler(XMLEntityHandler) 942 943 //this function returns StaxXMLInputSource 944 public StaxXMLInputSource resolveEntityAsPerStax(XMLResourceIdentifier resourceIdentifier) throws java.io.IOException{ 945 946 if(resourceIdentifier == null ) return null; 947 948 String publicId = resourceIdentifier.getPublicId(); 949 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 950 String baseSystemId = resourceIdentifier.getBaseSystemId(); 951 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 952 // if no base systemId given, assume that it's relative 953 // to the systemId of the current scanned entity 954 // Sometimes the system id is not (properly) expanded. 955 // We need to expand the system id if: 956 // a. the expanded one was null; or 957 // b. the base system id was null, but becomes non-null from the current entity. 958 boolean needExpand = (expandedSystemId == null); 959 // REVISIT: why would the baseSystemId ever be null? if we 960 // didn't have to make this check we wouldn't have to reuse the 961 // fXMLResourceIdentifier object... 962 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 963 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 964 if (baseSystemId != null) 965 needExpand = true; 966 } 967 if (needExpand) 968 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 969 970 // give the entity resolver a chance 971 StaxXMLInputSource staxInputSource = null; 972 XMLInputSource xmlInputSource = null; 973 974 XMLResourceIdentifierImpl ri = null; 975 976 if (resourceIdentifier instanceof XMLResourceIdentifierImpl) { 977 ri = (XMLResourceIdentifierImpl)resourceIdentifier; 978 } else { 979 fResourceIdentifier.clear(); 980 ri = fResourceIdentifier; 981 } 982 ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 983 if(DEBUG_RESOLVER){ 984 System.out.println("BEFORE Calling resolveEntity") ; 985 } 986 987 fISCreatedByResolver = false; 988 //either of Stax or Xerces would be null 989 if(fStaxEntityResolver != null){ 990 staxInputSource = fStaxEntityResolver.resolveEntity(ri); 991 if(staxInputSource != null) { 992 fISCreatedByResolver = true; 993 } 994 } 995 996 if(fEntityResolver != null){ 997 xmlInputSource = fEntityResolver.resolveEntity(ri); 998 if(xmlInputSource != null) { 999 fISCreatedByResolver = true; 1000 } 1001 } 1002 1003 if(xmlInputSource != null){ 1004 //wrap this XMLInputSource to StaxInputSource 1005 staxInputSource = new StaxXMLInputSource(xmlInputSource, fISCreatedByResolver); 1006 } 1007 1008 // do default resolution 1009 //this works for both stax & Xerces, if staxInputSource is null, it means parser need to revert to default resolution 1010 if (staxInputSource == null) { 1011 // REVISIT: when systemId is null, I think we should return null. 1012 // is this the right solution? -SG 1013 //if (systemId != null) 1014 staxInputSource = new StaxXMLInputSource(new XMLInputSource(publicId, literalSystemId, baseSystemId)); 1015 }else if(staxInputSource.hasXMLStreamOrXMLEventReader()){ 1016 //Waiting for the clarification from EG. - nb 1017 } 1018 1019 if (DEBUG_RESOLVER) { 1020 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1021 System.err.println(" = " + xmlInputSource); 1022 } 1023 1024 return staxInputSource; 1025 1026 } 1027 1028 /** 1029 * Resolves the specified public and system identifiers. This 1030 * method first attempts to resolve the entity based on the 1031 * EntityResolver registered by the application. If no entity 1032 * resolver is registered or if the registered entity handler 1033 * is unable to resolve the entity, then default entity 1034 * resolution will occur. 1035 * 1036 * @param publicId The public identifier of the entity. 1037 * @param systemId The system identifier of the entity. 1038 * @param baseSystemId The base system identifier of the entity. 1039 * This is the system identifier of the current 1040 * entity and is used to expand the system 1041 * identifier when the system identifier is a 1042 * relative URI. 1043 * 1044 * @return Returns an input source that wraps the resolved entity. 1045 * This method will never return null. 1046 * 1047 * @throws IOException Thrown on i/o error. 1048 * @throws XNIException Thrown by entity resolver to signal an error. 1049 */ 1050 public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier) throws IOException, XNIException { 1051 if(resourceIdentifier == null ) return null; 1052 String publicId = resourceIdentifier.getPublicId(); 1053 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 1054 String baseSystemId = resourceIdentifier.getBaseSystemId(); 1055 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 1056 1057 // if no base systemId given, assume that it's relative 1058 // to the systemId of the current scanned entity 1059 // Sometimes the system id is not (properly) expanded. 1060 // We need to expand the system id if: 1061 // a. the expanded one was null; or 1062 // b. the base system id was null, but becomes non-null from the current entity. 1063 boolean needExpand = (expandedSystemId == null); 1064 // REVISIT: why would the baseSystemId ever be null? if we 1065 // didn't have to make this check we wouldn't have to reuse the 1066 // fXMLResourceIdentifier object... 1067 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 1068 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 1069 if (baseSystemId != null) 1070 needExpand = true; 1071 } 1072 if (needExpand) 1073 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 1074 1075 // give the entity resolver a chance 1076 XMLInputSource xmlInputSource = null; 1077 1078 if (fEntityResolver != null) { 1079 resourceIdentifier.setBaseSystemId(baseSystemId); 1080 resourceIdentifier.setExpandedSystemId(expandedSystemId); 1081 xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier); 1082 } 1083 1084 // do default resolution 1085 // REVISIT: what's the correct behavior if the user provided an entity 1086 // resolver (fEntityResolver != null), but resolveEntity doesn't return 1087 // an input source (xmlInputSource == null)? 1088 // do we do default resolution, or do we just return null? -SG 1089 if (xmlInputSource == null) { 1090 // REVISIT: when systemId is null, I think we should return null. 1091 // is this the right solution? -SG 1092 //if (systemId != null) 1093 xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId); 1094 } 1095 1096 if (DEBUG_RESOLVER) { 1097 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1098 System.err.println(" = " + xmlInputSource); 1099 } 1100 1101 return xmlInputSource; 1102 1103 } // resolveEntity(XMLResourceIdentifier):XMLInputSource 1104 1105 /** 1106 * Starts a named entity. 1107 * 1108 * @param isGE flag to indicate whether the entity is a General Entity 1109 * @param entityName The name of the entity to start. 1110 * @param literal True if this entity is started within a literal 1111 * value. 1112 * 1113 * @throws IOException Thrown on i/o error. 1114 * @throws XNIException Thrown by entity handler to signal an error. 1115 */ 1116 public void startEntity(boolean isGE, String entityName, boolean literal) 1117 throws IOException, XNIException { 1118 1119 // was entity declared? 1120 Entity entity = fEntityStorage.getEntity(entityName); 1121 if (entity == null) { 1122 if (fEntityHandler != null) { 1123 String encoding = null; 1124 fResourceIdentifier.clear(); 1125 fEntityAugs.removeAllItems(); 1126 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1127 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1128 fEntityAugs.removeAllItems(); 1129 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1130 fEntityHandler.endEntity(entityName, fEntityAugs); 1131 } 1132 return; 1133 } 1134 1135 // should we skip external entities? 1136 boolean external = entity.isExternal(); 1137 Entity.ExternalEntity externalEntity = null; 1138 String extLitSysId = null, extBaseSysId = null, expandedSystemId = null; 1139 if (external) { 1140 externalEntity = (Entity.ExternalEntity)entity; 1141 extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1142 extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1143 expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1144 boolean unparsed = entity.isUnparsed(); 1145 boolean parameter = entityName.startsWith("%"); 1146 boolean general = !parameter; 1147 if (unparsed || (general && !fExternalGeneralEntities) || 1148 (parameter && !fExternalParameterEntities) || 1149 !fSupportDTD || !fSupportExternalEntities) { 1150 1151 if (fEntityHandler != null) { 1152 fResourceIdentifier.clear(); 1153 final String encoding = null; 1154 fResourceIdentifier.setValues( 1155 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1156 extLitSysId, extBaseSysId, expandedSystemId); 1157 fEntityAugs.removeAllItems(); 1158 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1159 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1160 fEntityAugs.removeAllItems(); 1161 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1162 fEntityHandler.endEntity(entityName, fEntityAugs); 1163 } 1164 return; 1165 } 1166 } 1167 1168 // is entity recursive? 1169 int size = fEntityStack.size(); 1170 for (int i = size; i >= 0; i--) { 1171 Entity activeEntity = i == size 1172 ? fCurrentEntity 1173 : (Entity)fEntityStack.elementAt(i); 1174 if (activeEntity.name == entityName) { 1175 String path = entityName; 1176 for (int j = i + 1; j < size; j++) { 1177 activeEntity = (Entity)fEntityStack.elementAt(j); 1178 path = path + " -> " + activeEntity.name; 1179 } 1180 path = path + " -> " + fCurrentEntity.name; 1181 path = path + " -> " + entityName; 1182 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1183 "RecursiveReference", 1184 new Object[] { entityName, path }, 1185 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1186 1187 if (fEntityHandler != null) { 1188 fResourceIdentifier.clear(); 1189 final String encoding = null; 1190 if (external) { 1191 fResourceIdentifier.setValues( 1192 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1193 extLitSysId, extBaseSysId, expandedSystemId); 1194 } 1195 fEntityAugs.removeAllItems(); 1196 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1197 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1198 fEntityAugs.removeAllItems(); 1199 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1200 fEntityHandler.endEntity(entityName, fEntityAugs); 1201 } 1202 1203 return; 1204 } 1205 } 1206 1207 // resolve external entity 1208 StaxXMLInputSource staxInputSource = null; 1209 XMLInputSource xmlInputSource = null ; 1210 1211 if (external) { 1212 staxInputSource = resolveEntityAsPerStax(externalEntity.entityLocation); 1213 /** xxx: Waiting from the EG 1214 * //simply return if there was entity resolver registered and application 1215 * //returns either XMLStreamReader or XMLEventReader. 1216 * if(staxInputSource.hasXMLStreamOrXMLEventReader()) return ; 1217 */ 1218 xmlInputSource = staxInputSource.getXMLInputSource() ; 1219 if (!fISCreatedByResolver) { 1220 //let the not-LoadExternalDTD or not-SupportDTD process to handle the situation 1221 if (fLoadExternalDTD) { 1222 String accessError = SecuritySupport.checkAccess(expandedSystemId, fAccessExternalDTD, Constants.ACCESS_EXTERNAL_ALL); 1223 if (accessError != null) { 1224 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1225 "AccessExternalEntity", 1226 new Object[] { SecuritySupport.sanitizePath(expandedSystemId), accessError }, 1227 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1228 } 1229 } 1230 } 1231 } 1232 // wrap internal entity 1233 else { 1234 Entity.InternalEntity internalEntity = (Entity.InternalEntity)entity; 1235 Reader reader = new StringReader(internalEntity.text); 1236 xmlInputSource = new XMLInputSource(null, null, null, reader, null); 1237 } 1238 1239 // start the entity 1240 startEntity(isGE, entityName, xmlInputSource, literal, external); 1241 1242 } // startEntity(String,boolean) 1243 1244 /** 1245 * Starts the document entity. The document entity has the "[xml]" 1246 * pseudo-name. 1247 * 1248 * @param xmlInputSource The input source of the document entity. 1249 * 1250 * @throws IOException Thrown on i/o error. 1251 * @throws XNIException Thrown by entity handler to signal an error. 1252 */ 1253 public void startDocumentEntity(XMLInputSource xmlInputSource) 1254 throws IOException, XNIException { 1255 startEntity(false, XMLEntity, xmlInputSource, false, true); 1256 } // startDocumentEntity(XMLInputSource) 1257 1258 //xxx these methods are not required. 1259 /** 1260 * Starts the DTD entity. The DTD entity has the "[dtd]" 1261 * pseudo-name. 1262 * 1263 * @param xmlInputSource The input source of the DTD entity. 1264 * 1265 * @throws IOException Thrown on i/o error. 1266 * @throws XNIException Thrown by entity handler to signal an error. 1267 */ 1268 public void startDTDEntity(XMLInputSource xmlInputSource) 1269 throws IOException, XNIException { 1270 startEntity(false, DTDEntity, xmlInputSource, false, true); 1271 } // startDTDEntity(XMLInputSource) 1272 1273 // indicate start of external subset so that 1274 // location of entity decls can be tracked 1275 public void startExternalSubset() { 1276 fInExternalSubset = true; 1277 } 1278 1279 public void endExternalSubset() { 1280 fInExternalSubset = false; 1281 } 1282 1283 /** 1284 * Starts an entity. 1285 * <p> 1286 * This method can be used to insert an application defined XML 1287 * entity stream into the parsing stream. 1288 * 1289 * @param isGE flag to indicate whether the entity is a General Entity 1290 * @param name The name of the entity. 1291 * @param xmlInputSource The input source of the entity. 1292 * @param literal True if this entity is started within a 1293 * literal value. 1294 * @param isExternal whether this entity should be treated as an internal or external entity. 1295 * 1296 * @throws IOException Thrown on i/o error. 1297 * @throws XNIException Thrown by entity handler to signal an error. 1298 */ 1299 public void startEntity(boolean isGE, String name, 1300 XMLInputSource xmlInputSource, 1301 boolean literal, boolean isExternal) 1302 throws IOException, XNIException { 1303 1304 String encoding = setupCurrentEntity(isGE, name, xmlInputSource, literal, isExternal); 1305 1306 //when entity expansion limit is set by the Application, we need to 1307 //check for the entity expansion limit set by the parser, if number of entity 1308 //expansions exceeds the entity expansion limit, parser will throw fatal error. 1309 // Note that this represents the nesting level of open entities. 1310 fEntityExpansionCount++; 1311 if(fLimitAnalyzer != null) { 1312 fLimitAnalyzer.addValue(entityExpansionIndex, name, 1); 1313 } 1314 if( fSecurityManager != null && fSecurityManager.isOverLimit(entityExpansionIndex, fLimitAnalyzer)){ 1315 fSecurityManager.debugPrint(fLimitAnalyzer); 1316 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,"EntityExpansionLimit", 1317 new Object[]{fSecurityManager.getLimitValueByIndex(entityExpansionIndex)}, 1318 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1319 // is there anything better to do than reset the counter? 1320 // at least one can envision debugging applications where this might 1321 // be useful... 1322 fEntityExpansionCount = 0; 1323 } 1324 1325 // call handler 1326 if (fEntityHandler != null) { 1327 fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null); 1328 } 1329 1330 } // startEntity(String,XMLInputSource) 1331 1332 /** 1333 * Return the current entity being scanned. Current entity is SET using startEntity function. 1334 * @return Entity.ScannedEntity 1335 */ 1336 1337 public Entity.ScannedEntity getCurrentEntity(){ 1338 return fCurrentEntity ; 1339 } 1340 1341 /** 1342 * Return the top level entity handled by this manager, or null 1343 * if no entity was added. 1344 */ 1345 public Entity.ScannedEntity getTopLevelEntity() { 1346 return (Entity.ScannedEntity) 1347 (fEntityStack.empty() ? null : fEntityStack.elementAt(0)); 1348 } 1349 1350 1351 /** 1352 * Close all opened InputStreams and Readers opened by this parser. 1353 */ 1354 public void closeReaders() { 1355 /** this call actually does nothing, readers are closed in the endEntity method 1356 * through the current entity. 1357 * The change seems to have happened during the jdk6 development with the 1358 * addition of StAX 1359 **/ 1360 } 1361 1362 public void endEntity() throws IOException, XNIException { 1363 1364 // call handler 1365 if (DEBUG_BUFFER) { 1366 System.out.print("(endEntity: "); 1367 print(); 1368 System.out.println(); 1369 } 1370 //pop the entity from the stack 1371 Entity.ScannedEntity entity = fEntityStack.size() > 0 ? (Entity.ScannedEntity)fEntityStack.pop() : null ; 1372 1373 /** need to close the reader first since the program can end 1374 * prematurely (e.g. fEntityHandler.endEntity may throw exception) 1375 * leaving the reader open 1376 */ 1377 //close the reader 1378 if(fCurrentEntity != null){ 1379 //close the reader 1380 try{ 1381 if (fLimitAnalyzer != null) { 1382 fLimitAnalyzer.endEntity(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntity.name); 1383 if (fCurrentEntity.name.equals("[xml]")) { 1384 fSecurityManager.debugPrint(fLimitAnalyzer); 1385 } 1386 } 1387 fCurrentEntity.close(); 1388 }catch(IOException ex){ 1389 throw new XNIException(ex); 1390 } 1391 } 1392 1393 if (fEntityHandler != null) { 1394 //so this is the last opened entity, signal it to current fEntityHandler using Augmentation 1395 if(entity == null){ 1396 fEntityAugs.removeAllItems(); 1397 fEntityAugs.putItem(Constants.LAST_ENTITY, Boolean.TRUE); 1398 fEntityHandler.endEntity(fCurrentEntity.name, fEntityAugs); 1399 fEntityAugs.removeAllItems(); 1400 }else{ 1401 fEntityHandler.endEntity(fCurrentEntity.name, null); 1402 } 1403 } 1404 //check if it is a document entity 1405 boolean documentEntity = fCurrentEntity.name == XMLEntity; 1406 1407 //set popped entity as current entity 1408 fCurrentEntity = entity; 1409 fEntityScanner.setCurrentEntity(fCurrentEntity); 1410 1411 //check if there are any entity left in the stack -- if there are 1412 //no entries EOF has been reached. 1413 // throw exception when it is the last entity but it is not a document entity 1414 1415 if(fCurrentEntity == null & !documentEntity){ 1416 throw new EOFException() ; 1417 } 1418 1419 if (DEBUG_BUFFER) { 1420 System.out.print(")endEntity: "); 1421 print(); 1422 System.out.println(); 1423 } 1424 1425 } // endEntity() 1426 1427 1428 // 1429 // XMLComponent methods 1430 // 1431 public void reset(PropertyManager propertyManager){ 1432 // xerces properties 1433 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 1434 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 1435 try { 1436 fStaxEntityResolver = (StaxEntityResolverWrapper)propertyManager.getProperty(STAX_ENTITY_RESOLVER); 1437 } catch (XMLConfigurationException e) { 1438 fStaxEntityResolver = null; 1439 } 1440 1441 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 1442 fReplaceEntityReferences = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES)).booleanValue(); 1443 fSupportExternalEntities = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES)).booleanValue(); 1444 1445 // Zephyr feature ignore-external-dtd is the opposite of Xerces' load-external-dtd 1446 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 1447 1448 // JAXP 1.5 feature 1449 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 1450 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1451 1452 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 1453 1454 fLimitAnalyzer = new XMLLimitAnalyzer(); 1455 //reset fEntityStorage 1456 fEntityStorage.reset(propertyManager); 1457 //reset XMLEntityReaderImpl 1458 fEntityScanner.reset(propertyManager); 1459 1460 // initialize state 1461 //fStandalone = false; 1462 fEntities.clear(); 1463 fEntityStack.removeAllElements(); 1464 fCurrentEntity = null; 1465 fValidation = false; 1466 fExternalGeneralEntities = true; 1467 fExternalParameterEntities = true; 1468 fAllowJavaEncodings = true ; 1469 } 1470 1471 /** 1472 * Resets the component. The component can query the component manager 1473 * about any features and properties that affect the operation of the 1474 * component. 1475 * 1476 * @param componentManager The component manager. 1477 * 1478 * @throws SAXException Thrown by component on initialization error. 1479 * For example, if a feature or property is 1480 * required for the operation of the component, the 1481 * component manager may throw a 1482 * SAXNotRecognizedException or a 1483 * SAXNotSupportedException. 1484 */ 1485 public void reset(XMLComponentManager componentManager) 1486 throws XMLConfigurationException { 1487 1488 boolean parser_settings = componentManager.getFeature(PARSER_SETTINGS, true); 1489 1490 if (!parser_settings) { 1491 // parser settings have not been changed 1492 reset(); 1493 if(fEntityScanner != null){ 1494 fEntityScanner.reset(componentManager); 1495 } 1496 if(fEntityStorage != null){ 1497 fEntityStorage.reset(componentManager); 1498 } 1499 return; 1500 } 1501 1502 // sax features 1503 fValidation = componentManager.getFeature(VALIDATION, false); 1504 fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES, true); 1505 fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES, true); 1506 1507 // xerces features 1508 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 1509 fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF, false); 1510 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 1511 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 1512 1513 // xerces properties 1514 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 1515 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 1516 fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER, null); 1517 fStaxEntityResolver = (StaxEntityResolverWrapper)componentManager.getProperty(STAX_ENTITY_RESOLVER, null); 1518 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 1519 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER, null); 1520 entityExpansionIndex = fSecurityManager.getIndex(Constants.JDK_ENTITY_EXPANSION_LIMIT); 1521 1522 //StAX Property 1523 fSupportDTD = true; 1524 fReplaceEntityReferences = true; 1525 fSupportExternalEntities = true; 1526 1527 // JAXP 1.5 feature 1528 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 1529 if (spm == null) { 1530 spm = new XMLSecurityPropertyManager(); 1531 } 1532 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1533 1534 //reset general state 1535 reset(); 1536 1537 fEntityScanner.reset(componentManager); 1538 fEntityStorage.reset(componentManager); 1539 1540 } // reset(XMLComponentManager) 1541 1542 // reset general state. Should not be called other than by 1543 // a class acting as a component manager but not 1544 // implementing that interface for whatever reason. 1545 public void reset() { 1546 fLimitAnalyzer = new XMLLimitAnalyzer(); 1547 // initialize state 1548 fStandalone = false; 1549 fEntities.clear(); 1550 fEntityStack.removeAllElements(); 1551 fEntityExpansionCount = 0; 1552 1553 fCurrentEntity = null; 1554 // reset scanner 1555 if(fXML10EntityScanner != null){ 1556 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1557 } 1558 if(fXML11EntityScanner != null) { 1559 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1560 } 1561 1562 // DEBUG 1563 if (DEBUG_ENTITIES) { 1564 addInternalEntity("text", "Hello, World."); 1565 addInternalEntity("empty-element", "<foo/>"); 1566 addInternalEntity("balanced-element", "<foo></foo>"); 1567 addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 1568 addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 1569 addInternalEntity("unbalanced-entity", "<foo>"); 1570 addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 1571 addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 1572 addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 1573 try { 1574 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml"); 1575 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml"); 1576 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml"); 1577 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml"); 1578 } 1579 catch (IOException ex) { 1580 // should never happen 1581 } 1582 } 1583 1584 fEntityHandler = null; 1585 1586 // reset scanner 1587 //if(fEntityScanner!=null) 1588 // fEntityScanner.reset(fSymbolTable, this,fErrorReporter); 1589 1590 } 1591 /** 1592 * Returns a list of feature identifiers that are recognized by 1593 * this component. This method may return null if no features 1594 * are recognized by this component. 1595 */ 1596 public String[] getRecognizedFeatures() { 1597 return (String[])(RECOGNIZED_FEATURES.clone()); 1598 } // getRecognizedFeatures():String[] 1599 1600 /** 1601 * Sets the state of a feature. This method is called by the component 1602 * manager any time after reset when a feature changes state. 1603 * <p> 1604 * <strong>Note:</strong> Components should silently ignore features 1605 * that do not affect the operation of the component. 1606 * 1607 * @param featureId The feature identifier. 1608 * @param state The state of the feature. 1609 * 1610 * @throws SAXNotRecognizedException The component should not throw 1611 * this exception. 1612 * @throws SAXNotSupportedException The component should not throw 1613 * this exception. 1614 */ 1615 public void setFeature(String featureId, boolean state) 1616 throws XMLConfigurationException { 1617 1618 // xerces features 1619 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 1620 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 1621 if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() && 1622 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) { 1623 fAllowJavaEncodings = state; 1624 } 1625 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 1626 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 1627 fLoadExternalDTD = state; 1628 return; 1629 } 1630 } 1631 1632 } // setFeature(String,boolean) 1633 1634 /** 1635 * Sets the value of a property. This method is called by the component 1636 * manager any time after reset when a property changes value. 1637 * <p> 1638 * <strong>Note:</strong> Components should silently ignore properties 1639 * that do not affect the operation of the component. 1640 * 1641 * @param propertyId The property identifier. 1642 * @param value The value of the property. 1643 * 1644 * @throws SAXNotRecognizedException The component should not throw 1645 * this exception. 1646 * @throws SAXNotSupportedException The component should not throw 1647 * this exception. 1648 */ 1649 public void setProperty(String propertyId, Object value){ 1650 // Xerces properties 1651 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 1652 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 1653 1654 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() && 1655 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) { 1656 fSymbolTable = (SymbolTable)value; 1657 return; 1658 } 1659 if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() && 1660 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) { 1661 fErrorReporter = (XMLErrorReporter)value; 1662 return; 1663 } 1664 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 1665 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 1666 fEntityResolver = (XMLEntityResolver)value; 1667 return; 1668 } 1669 if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() && 1670 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) { 1671 Integer bufferSize = (Integer)value; 1672 if (bufferSize != null && 1673 bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { 1674 fBufferSize = bufferSize.intValue(); 1675 fEntityScanner.setBufferSize(fBufferSize); 1676 fBufferPool.setExternalBufferSize(fBufferSize); 1677 } 1678 } 1679 if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && 1680 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { 1681 fSecurityManager = (XMLSecurityManager)value; 1682 } 1683 } 1684 1685 //JAXP 1.5 properties 1686 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 1687 { 1688 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 1689 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1690 } 1691 } 1692 1693 public void setLimitAnalyzer(XMLLimitAnalyzer fLimitAnalyzer) { 1694 this.fLimitAnalyzer = fLimitAnalyzer; 1695 } 1696 1697 /** 1698 * Returns a list of property identifiers that are recognized by 1699 * this component. This method may return null if no properties 1700 * are recognized by this component. 1701 */ 1702 public String[] getRecognizedProperties() { 1703 return (String[])(RECOGNIZED_PROPERTIES.clone()); 1704 } // getRecognizedProperties():String[] 1705 /** 1706 * Returns the default state for a feature, or null if this 1707 * component does not want to report a default value for this 1708 * feature. 1709 * 1710 * @param featureId The feature identifier. 1711 * 1712 * @since Xerces 2.2.0 1713 */ 1714 public Boolean getFeatureDefault(String featureId) { 1715 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 1716 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 1717 return FEATURE_DEFAULTS[i]; 1718 } 1719 } 1720 return null; 1721 } // getFeatureDefault(String):Boolean 1722 1723 /** 1724 * Returns the default state for a property, or null if this 1725 * component does not want to report a default value for this 1726 * property. 1727 * 1728 * @param propertyId The property identifier. 1729 * 1730 * @since Xerces 2.2.0 1731 */ 1732 public Object getPropertyDefault(String propertyId) { 1733 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 1734 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 1735 return PROPERTY_DEFAULTS[i]; 1736 } 1737 } 1738 return null; 1739 } // getPropertyDefault(String):Object 1740 1741 // 1742 // Public static methods 1743 // 1744 1745 /** 1746 * Expands a system id and returns the system id as a URI, if 1747 * it can be expanded. A return value of null means that the 1748 * identifier is already expanded. An exception thrown 1749 * indicates a failure to expand the id. 1750 * 1751 * @param systemId The systemId to be expanded. 1752 * 1753 * @return Returns the URI string representing the expanded system 1754 * identifier. A null value indicates that the given 1755 * system identifier is already expanded. 1756 * 1757 */ 1758 public static String expandSystemId(String systemId) { 1759 return expandSystemId(systemId, null); 1760 } // expandSystemId(String):String 1761 1762 // 1763 // Public static methods 1764 // 1765 1766 // current value of the "user.dir" property 1767 private static String gUserDir; 1768 // cached URI object for the current value of the escaped "user.dir" property stored as a URI 1769 private static URI gUserDirURI; 1770 // which ASCII characters need to be escaped 1771 private static boolean gNeedEscaping[] = new boolean[128]; 1772 // the first hex character if a character needs to be escaped 1773 private static char gAfterEscaping1[] = new char[128]; 1774 // the second hex character if a character needs to be escaped 1775 private static char gAfterEscaping2[] = new char[128]; 1776 private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', 1777 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 1778 // initialize the above 3 arrays 1779 static { 1780 for (int i = 0; i <= 0x1f; i++) { 1781 gNeedEscaping[i] = true; 1782 gAfterEscaping1[i] = gHexChs[i >> 4]; 1783 gAfterEscaping2[i] = gHexChs[i & 0xf]; 1784 } 1785 gNeedEscaping[0x7f] = true; 1786 gAfterEscaping1[0x7f] = '7'; 1787 gAfterEscaping2[0x7f] = 'F'; 1788 char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}', 1789 '|', '\\', '^', '~', '[', ']', '`'}; 1790 int len = escChs.length; 1791 char ch; 1792 for (int i = 0; i < len; i++) { 1793 ch = escChs[i]; 1794 gNeedEscaping[ch] = true; 1795 gAfterEscaping1[ch] = gHexChs[ch >> 4]; 1796 gAfterEscaping2[ch] = gHexChs[ch & 0xf]; 1797 } 1798 } 1799 1800 // To escape the "user.dir" system property, by using %HH to represent 1801 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%' 1802 // and '"'. It's a static method, so needs to be synchronized. 1803 // this method looks heavy, but since the system property isn't expected 1804 // to change often, so in most cases, we only need to return the URI 1805 // that was escaped before. 1806 // According to the URI spec, non-ASCII characters (whose value >= 128) 1807 // need to be escaped too. 1808 // REVISIT: don't know how to escape non-ASCII characters, especially 1809 // which encoding to use. Leave them for now. 1810 private static synchronized URI getUserDir() throws URI.MalformedURIException { 1811 // get the user.dir property 1812 String userDir = ""; 1813 try { 1814 userDir = SecuritySupport.getSystemProperty("user.dir"); 1815 } 1816 catch (SecurityException se) { 1817 } 1818 1819 // return empty string if property value is empty string. 1820 if (userDir.length() == 0) 1821 return new URI("file", "", "", null, null); 1822 // compute the new escaped value if the new property value doesn't 1823 // match the previous one 1824 if (gUserDirURI != null && userDir.equals(gUserDir)) { 1825 return gUserDirURI; 1826 } 1827 1828 // record the new value as the global property value 1829 gUserDir = userDir; 1830 1831 char separator = java.io.File.separatorChar; 1832 userDir = userDir.replace(separator, '/'); 1833 1834 int len = userDir.length(), ch; 1835 StringBuffer buffer = new StringBuffer(len*3); 1836 // change C:/blah to /C:/blah 1837 if (len >= 2 && userDir.charAt(1) == ':') { 1838 ch = Character.toUpperCase(userDir.charAt(0)); 1839 if (ch >= 'A' && ch <= 'Z') { 1840 buffer.append('/'); 1841 } 1842 } 1843 1844 // for each character in the path 1845 int i = 0; 1846 for (; i < len; i++) { 1847 ch = userDir.charAt(i); 1848 // if it's not an ASCII character, break here, and use UTF-8 encoding 1849 if (ch >= 128) 1850 break; 1851 if (gNeedEscaping[ch]) { 1852 buffer.append('%'); 1853 buffer.append(gAfterEscaping1[ch]); 1854 buffer.append(gAfterEscaping2[ch]); 1855 // record the fact that it's escaped 1856 } 1857 else { 1858 buffer.append((char)ch); 1859 } 1860 } 1861 1862 // we saw some non-ascii character 1863 if (i < len) { 1864 // get UTF-8 bytes for the remaining sub-string 1865 byte[] bytes = null; 1866 byte b; 1867 try { 1868 bytes = userDir.substring(i).getBytes("UTF-8"); 1869 } catch (java.io.UnsupportedEncodingException e) { 1870 // should never happen 1871 return new URI("file", "", userDir, null, null); 1872 } 1873 len = bytes.length; 1874 1875 // for each byte 1876 for (i = 0; i < len; i++) { 1877 b = bytes[i]; 1878 // for non-ascii character: make it positive, then escape 1879 if (b < 0) { 1880 ch = b + 256; 1881 buffer.append('%'); 1882 buffer.append(gHexChs[ch >> 4]); 1883 buffer.append(gHexChs[ch & 0xf]); 1884 } 1885 else if (gNeedEscaping[b]) { 1886 buffer.append('%'); 1887 buffer.append(gAfterEscaping1[b]); 1888 buffer.append(gAfterEscaping2[b]); 1889 } 1890 else { 1891 buffer.append((char)b); 1892 } 1893 } 1894 } 1895 1896 // change blah/blah to blah/blah/ 1897 if (!userDir.endsWith("/")) 1898 buffer.append('/'); 1899 1900 gUserDirURI = new URI("file", "", buffer.toString(), null, null); 1901 1902 return gUserDirURI; 1903 } 1904 1905 /** 1906 * Absolutizes a URI using the current value 1907 * of the "user.dir" property as the base URI. If 1908 * the URI is already absolute, this is a no-op. 1909 * 1910 * @param uri the URI to absolutize 1911 */ 1912 public static void absolutizeAgainstUserDir(URI uri) 1913 throws URI.MalformedURIException { 1914 uri.absolutize(getUserDir()); 1915 } 1916 1917 /** 1918 * Expands a system id and returns the system id as a URI, if 1919 * it can be expanded. A return value of null means that the 1920 * identifier is already expanded. An exception thrown 1921 * indicates a failure to expand the id. 1922 * 1923 * @param systemId The systemId to be expanded. 1924 * 1925 * @return Returns the URI string representing the expanded system 1926 * identifier. A null value indicates that the given 1927 * system identifier is already expanded. 1928 * 1929 */ 1930 public static String expandSystemId(String systemId, String baseSystemId) { 1931 1932 // check for bad parameters id 1933 if (systemId == null || systemId.length() == 0) { 1934 return systemId; 1935 } 1936 // if id already expanded, return 1937 try { 1938 URI uri = new URI(systemId); 1939 if (uri != null) { 1940 return systemId; 1941 } 1942 } catch (URI.MalformedURIException e) { 1943 // continue on... 1944 } 1945 // normalize id 1946 String id = fixURI(systemId); 1947 1948 // normalize base 1949 URI base = null; 1950 URI uri = null; 1951 try { 1952 if (baseSystemId == null || baseSystemId.length() == 0 || 1953 baseSystemId.equals(systemId)) { 1954 String dir = getUserDir().toString(); 1955 base = new URI("file", "", dir, null, null); 1956 } else { 1957 try { 1958 base = new URI(fixURI(baseSystemId)); 1959 } catch (URI.MalformedURIException e) { 1960 if (baseSystemId.indexOf(':') != -1) { 1961 // for xml schemas we might have baseURI with 1962 // a specified drive 1963 base = new URI("file", "", fixURI(baseSystemId), null, null); 1964 } else { 1965 String dir = getUserDir().toString(); 1966 dir = dir + fixURI(baseSystemId); 1967 base = new URI("file", "", dir, null, null); 1968 } 1969 } 1970 } 1971 // expand id 1972 uri = new URI(base, id); 1973 } catch (Exception e) { 1974 // let it go through 1975 1976 } 1977 1978 if (uri == null) { 1979 return systemId; 1980 } 1981 return uri.toString(); 1982 1983 } // expandSystemId(String,String):String 1984 1985 /** 1986 * Expands a system id and returns the system id as a URI, if 1987 * it can be expanded. A return value of null means that the 1988 * identifier is already expanded. An exception thrown 1989 * indicates a failure to expand the id. 1990 * 1991 * @param systemId The systemId to be expanded. 1992 * 1993 * @return Returns the URI string representing the expanded system 1994 * identifier. A null value indicates that the given 1995 * system identifier is already expanded. 1996 * 1997 */ 1998 public static String expandSystemId(String systemId, String baseSystemId, 1999 boolean strict) 2000 throws URI.MalformedURIException { 2001 2002 // check if there is a system id before 2003 // trying to expand it. 2004 if (systemId == null) { 2005 return null; 2006 } 2007 2008 // system id has to be a valid URI 2009 if (strict) { 2010 try { 2011 // if it's already an absolute one, return it 2012 new URI(systemId); 2013 return systemId; 2014 } 2015 catch (URI.MalformedURIException ex) { 2016 } 2017 URI base = null; 2018 // if there isn't a base uri, use the working directory 2019 if (baseSystemId == null || baseSystemId.length() == 0) { 2020 base = new URI("file", "", getUserDir().toString(), null, null); 2021 } 2022 // otherwise, use the base uri 2023 else { 2024 try { 2025 base = new URI(baseSystemId); 2026 } 2027 catch (URI.MalformedURIException e) { 2028 // assume "base" is also a relative uri 2029 String dir = getUserDir().toString(); 2030 dir = dir + baseSystemId; 2031 base = new URI("file", "", dir, null, null); 2032 } 2033 } 2034 // absolutize the system id using the base 2035 URI uri = new URI(base, systemId); 2036 // return the string rep of the new uri (an absolute one) 2037 return uri.toString(); 2038 2039 // if any exception is thrown, it'll get thrown to the caller. 2040 } 2041 2042 // Assume the URIs are well-formed. If it turns out they're not, try fixing them up. 2043 try { 2044 return expandSystemIdStrictOff(systemId, baseSystemId); 2045 } 2046 catch (URI.MalformedURIException e) { 2047 /** Xerces URI rejects unicode, try java.net.URI 2048 * this is not ideal solution, but it covers known cases which either 2049 * Xerces URI or java.net.URI can handle alone 2050 * will file bug against java.net.URI 2051 */ 2052 try { 2053 return expandSystemIdStrictOff1(systemId, baseSystemId); 2054 } catch (URISyntaxException ex) { 2055 // continue on... 2056 } 2057 } 2058 // check for bad parameters id 2059 if (systemId.length() == 0) { 2060 return systemId; 2061 } 2062 2063 // normalize id 2064 String id = fixURI(systemId); 2065 2066 // normalize base 2067 URI base = null; 2068 URI uri = null; 2069 try { 2070 if (baseSystemId == null || baseSystemId.length() == 0 || 2071 baseSystemId.equals(systemId)) { 2072 base = getUserDir(); 2073 } 2074 else { 2075 try { 2076 base = new URI(fixURI(baseSystemId).trim()); 2077 } 2078 catch (URI.MalformedURIException e) { 2079 if (baseSystemId.indexOf(':') != -1) { 2080 // for xml schemas we might have baseURI with 2081 // a specified drive 2082 base = new URI("file", "", fixURI(baseSystemId).trim(), null, null); 2083 } 2084 else { 2085 base = new URI(getUserDir(), fixURI(baseSystemId)); 2086 } 2087 } 2088 } 2089 // expand id 2090 uri = new URI(base, id.trim()); 2091 } 2092 catch (Exception e) { 2093 // let it go through 2094 2095 } 2096 2097 if (uri == null) { 2098 return systemId; 2099 } 2100 return uri.toString(); 2101 2102 } // expandSystemId(String,String,boolean):String 2103 2104 /** 2105 * Helper method for expandSystemId(String,String,boolean):String 2106 */ 2107 private static String expandSystemIdStrictOn(String systemId, String baseSystemId) 2108 throws URI.MalformedURIException { 2109 2110 URI systemURI = new URI(systemId, true); 2111 // If it's already an absolute one, return it 2112 if (systemURI.isAbsoluteURI()) { 2113 return systemId; 2114 } 2115 2116 // If there isn't a base URI, use the working directory 2117 URI baseURI = null; 2118 if (baseSystemId == null || baseSystemId.length() == 0) { 2119 baseURI = getUserDir(); 2120 } 2121 else { 2122 baseURI = new URI(baseSystemId, true); 2123 if (!baseURI.isAbsoluteURI()) { 2124 // assume "base" is also a relative uri 2125 baseURI.absolutize(getUserDir()); 2126 } 2127 } 2128 2129 // absolutize the system identifier using the base URI 2130 systemURI.absolutize(baseURI); 2131 2132 // return the string rep of the new uri (an absolute one) 2133 return systemURI.toString(); 2134 2135 // if any exception is thrown, it'll get thrown to the caller. 2136 2137 } // expandSystemIdStrictOn(String,String):String 2138 2139 /** 2140 * Attempt to set whether redirects will be followed for an <code>HttpURLConnection</code>. 2141 * This may fail on earlier JDKs which do not support setting this preference. 2142 */ 2143 public static void setInstanceFollowRedirects(HttpURLConnection urlCon, boolean followRedirects) { 2144 try { 2145 Method method = HttpURLConnection.class.getMethod("setInstanceFollowRedirects", new Class[] {Boolean.TYPE}); 2146 method.invoke(urlCon, new Object[] {followRedirects ? Boolean.TRUE : Boolean.FALSE}); 2147 } 2148 // setInstanceFollowRedirects doesn't exist. 2149 catch (Exception exc) {} 2150 } 2151 2152 2153 /** 2154 * Helper method for expandSystemId(String,String,boolean):String 2155 */ 2156 private static String expandSystemIdStrictOff(String systemId, String baseSystemId) 2157 throws URI.MalformedURIException { 2158 2159 URI systemURI = new URI(systemId, true); 2160 // If it's already an absolute one, return it 2161 if (systemURI.isAbsoluteURI()) { 2162 if (systemURI.getScheme().length() > 1) { 2163 return systemId; 2164 } 2165 /** 2166 * If the scheme's length is only one character, 2167 * it's likely that this was intended as a file 2168 * path. Fixing this up in expandSystemId to 2169 * maintain backwards compatibility. 2170 */ 2171 throw new URI.MalformedURIException(); 2172 } 2173 2174 // If there isn't a base URI, use the working directory 2175 URI baseURI = null; 2176 if (baseSystemId == null || baseSystemId.length() == 0) { 2177 baseURI = getUserDir(); 2178 } 2179 else { 2180 baseURI = new URI(baseSystemId, true); 2181 if (!baseURI.isAbsoluteURI()) { 2182 // assume "base" is also a relative uri 2183 baseURI.absolutize(getUserDir()); 2184 } 2185 } 2186 2187 // absolutize the system identifier using the base URI 2188 systemURI.absolutize(baseURI); 2189 2190 // return the string rep of the new uri (an absolute one) 2191 return systemURI.toString(); 2192 2193 // if any exception is thrown, it'll get thrown to the caller. 2194 2195 } // expandSystemIdStrictOff(String,String):String 2196 2197 private static String expandSystemIdStrictOff1(String systemId, String baseSystemId) 2198 throws URISyntaxException, URI.MalformedURIException { 2199 2200 java.net.URI systemURI = new java.net.URI(systemId); 2201 // If it's already an absolute one, return it 2202 if (systemURI.isAbsolute()) { 2203 if (systemURI.getScheme().length() > 1) { 2204 return systemId; 2205 } 2206 /** 2207 * If the scheme's length is only one character, 2208 * it's likely that this was intended as a file 2209 * path. Fixing this up in expandSystemId to 2210 * maintain backwards compatibility. 2211 */ 2212 throw new URISyntaxException(systemId, "the scheme's length is only one character"); 2213 } 2214 2215 // If there isn't a base URI, use the working directory 2216 URI baseURI = null; 2217 if (baseSystemId == null || baseSystemId.length() == 0) { 2218 baseURI = getUserDir(); 2219 } 2220 else { 2221 baseURI = new URI(baseSystemId, true); 2222 if (!baseURI.isAbsoluteURI()) { 2223 // assume "base" is also a relative uri 2224 baseURI.absolutize(getUserDir()); 2225 } 2226 } 2227 2228 // absolutize the system identifier using the base URI 2229 // systemURI.absolutize(baseURI); 2230 systemURI = (new java.net.URI(baseURI.toString())).resolve(systemURI); 2231 2232 // return the string rep of the new uri (an absolute one) 2233 return systemURI.toString(); 2234 2235 // if any exception is thrown, it'll get thrown to the caller. 2236 2237 } // expandSystemIdStrictOff(String,String):String 2238 2239 // 2240 // Protected methods 2241 // 2242 2243 2244 /** 2245 * Returns the IANA encoding name that is auto-detected from 2246 * the bytes specified, with the endian-ness of that encoding where appropriate. 2247 * 2248 * @param b4 The first four bytes of the input. 2249 * @param count The number of bytes actually read. 2250 * @return a 2-element array: the first element, an IANA-encoding string, 2251 * the second element a Boolean which is true iff the document is big endian, false 2252 * if it's little-endian, and null if the distinction isn't relevant. 2253 */ 2254 protected Object[] getEncodingName(byte[] b4, int count) { 2255 2256 if (count < 2) { 2257 return defaultEncoding; 2258 } 2259 2260 // UTF-16, with BOM 2261 int b0 = b4[0] & 0xFF; 2262 int b1 = b4[1] & 0xFF; 2263 if (b0 == 0xFE && b1 == 0xFF) { 2264 // UTF-16, big-endian 2265 return new Object [] {"UTF-16BE", new Boolean(true)}; 2266 } 2267 if (b0 == 0xFF && b1 == 0xFE) { 2268 // UTF-16, little-endian 2269 return new Object [] {"UTF-16LE", new Boolean(false)}; 2270 } 2271 2272 // default to UTF-8 if we don't have enough bytes to make a 2273 // good determination of the encoding 2274 if (count < 3) { 2275 return defaultEncoding; 2276 } 2277 2278 // UTF-8 with a BOM 2279 int b2 = b4[2] & 0xFF; 2280 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 2281 return defaultEncoding; 2282 } 2283 2284 // default to UTF-8 if we don't have enough bytes to make a 2285 // good determination of the encoding 2286 if (count < 4) { 2287 return defaultEncoding; 2288 } 2289 2290 // other encodings 2291 int b3 = b4[3] & 0xFF; 2292 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2293 // UCS-4, big endian (1234) 2294 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2295 } 2296 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2297 // UCS-4, little endian (4321) 2298 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2299 } 2300 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2301 // UCS-4, unusual octet order (2143) 2302 // REVISIT: What should this be? 2303 return new Object [] {"ISO-10646-UCS-4", null}; 2304 } 2305 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2306 // UCS-4, unusual octect order (3412) 2307 // REVISIT: What should this be? 2308 return new Object [] {"ISO-10646-UCS-4", null}; 2309 } 2310 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2311 // UTF-16, big-endian, no BOM 2312 // (or could turn out to be UCS-2... 2313 // REVISIT: What should this be? 2314 return new Object [] {"UTF-16BE", new Boolean(true)}; 2315 } 2316 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2317 // UTF-16, little-endian, no BOM 2318 // (or could turn out to be UCS-2... 2319 return new Object [] {"UTF-16LE", new Boolean(false)}; 2320 } 2321 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2322 // EBCDIC 2323 // a la xerces1, return CP037 instead of EBCDIC here 2324 return new Object [] {"CP037", null}; 2325 } 2326 2327 return defaultEncoding; 2328 2329 } // getEncodingName(byte[],int):Object[] 2330 2331 /** 2332 * Creates a reader capable of reading the given input stream in 2333 * the specified encoding. 2334 * 2335 * @param inputStream The input stream. 2336 * @param encoding The encoding name that the input stream is 2337 * encoded using. If the user has specified that 2338 * Java encoding names are allowed, then the 2339 * encoding name may be a Java encoding name; 2340 * otherwise, it is an ianaEncoding name. 2341 * @param isBigEndian For encodings (like uCS-4), whose names cannot 2342 * specify a byte order, this tells whether the order is bigEndian. null menas 2343 * unknown or not relevant. 2344 * 2345 * @return Returns a reader. 2346 */ 2347 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 2348 throws IOException { 2349 2350 // normalize encoding name 2351 if (encoding == null) { 2352 encoding = "UTF-8"; 2353 } 2354 2355 // try to use an optimized reader 2356 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 2357 if (ENCODING.equals("UTF-8")) { 2358 if (DEBUG_ENCODINGS) { 2359 System.out.println("$$$ creating UTF8Reader"); 2360 } 2361 return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 2362 } 2363 if (ENCODING.equals("US-ASCII")) { 2364 if (DEBUG_ENCODINGS) { 2365 System.out.println("$$$ creating ASCIIReader"); 2366 } 2367 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 2368 } 2369 if(ENCODING.equals("ISO-10646-UCS-4")) { 2370 if(isBigEndian != null) { 2371 boolean isBE = isBigEndian.booleanValue(); 2372 if(isBE) { 2373 return new UCSReader(inputStream, UCSReader.UCS4BE); 2374 } else { 2375 return new UCSReader(inputStream, UCSReader.UCS4LE); 2376 } 2377 } else { 2378 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2379 "EncodingByteOrderUnsupported", 2380 new Object[] { encoding }, 2381 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2382 } 2383 } 2384 if(ENCODING.equals("ISO-10646-UCS-2")) { 2385 if(isBigEndian != null) { // sould never happen with this encoding... 2386 boolean isBE = isBigEndian.booleanValue(); 2387 if(isBE) { 2388 return new UCSReader(inputStream, UCSReader.UCS2BE); 2389 } else { 2390 return new UCSReader(inputStream, UCSReader.UCS2LE); 2391 } 2392 } else { 2393 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2394 "EncodingByteOrderUnsupported", 2395 new Object[] { encoding }, 2396 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2397 } 2398 } 2399 2400 // check for valid name 2401 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 2402 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 2403 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 2404 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2405 "EncodingDeclInvalid", 2406 new Object[] { encoding }, 2407 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2408 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 2409 // because every byte is a valid ISO Latin 1 character. 2410 // It may not translate correctly but if we failed on 2411 // the encoding anyway, then we're expecting the content 2412 // of the document to be bad. This will just prevent an 2413 // invalid UTF-8 sequence to be detected. This is only 2414 // important when continue-after-fatal-error is turned 2415 // on. -Ac 2416 encoding = "ISO-8859-1"; 2417 } 2418 2419 // try to use a Java reader 2420 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 2421 if (javaEncoding == null) { 2422 if(fAllowJavaEncodings) { 2423 javaEncoding = encoding; 2424 } else { 2425 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2426 "EncodingDeclInvalid", 2427 new Object[] { encoding }, 2428 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2429 // see comment above. 2430 javaEncoding = "ISO8859_1"; 2431 } 2432 } 2433 if (DEBUG_ENCODINGS) { 2434 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 2435 if (javaEncoding == encoding) { 2436 System.out.print(" (IANA encoding)"); 2437 } 2438 System.out.println(); 2439 } 2440 return new BufferedReader( new InputStreamReader(inputStream, javaEncoding)); 2441 2442 } // createReader(InputStream,String, Boolean): Reader 2443 2444 2445 /** 2446 * Return the public identifier for the current document event. 2447 * <p> 2448 * The return value is the public identifier of the document 2449 * entity or of the external parsed entity in which the markup 2450 * triggering the event appears. 2451 * 2452 * @return A string containing the public identifier, or 2453 * null if none is available. 2454 */ 2455 public String getPublicId() { 2456 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 2457 } // getPublicId():String 2458 2459 /** 2460 * Return the expanded system identifier for the current document event. 2461 * <p> 2462 * The return value is the expanded system identifier of the document 2463 * entity or of the external parsed entity in which the markup 2464 * triggering the event appears. 2465 * <p> 2466 * If the system identifier is a URL, the parser must resolve it 2467 * fully before passing it to the application. 2468 * 2469 * @return A string containing the expanded system identifier, or null 2470 * if none is available. 2471 */ 2472 public String getExpandedSystemId() { 2473 if (fCurrentEntity != null) { 2474 if (fCurrentEntity.entityLocation != null && 2475 fCurrentEntity.entityLocation.getExpandedSystemId() != null ) { 2476 return fCurrentEntity.entityLocation.getExpandedSystemId(); 2477 } else { 2478 // search for the first external entity on the stack 2479 int size = fEntityStack.size(); 2480 for (int i = size - 1; i >= 0 ; i--) { 2481 Entity.ScannedEntity externalEntity = 2482 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2483 2484 if (externalEntity.entityLocation != null && 2485 externalEntity.entityLocation.getExpandedSystemId() != null) { 2486 return externalEntity.entityLocation.getExpandedSystemId(); 2487 } 2488 } 2489 } 2490 } 2491 return null; 2492 } // getExpandedSystemId():String 2493 2494 /** 2495 * Return the literal system identifier for the current document event. 2496 * <p> 2497 * The return value is the literal system identifier of the document 2498 * entity or of the external parsed entity in which the markup 2499 * triggering the event appears. 2500 * <p> 2501 * @return A string containing the literal system identifier, or null 2502 * if none is available. 2503 */ 2504 public String getLiteralSystemId() { 2505 if (fCurrentEntity != null) { 2506 if (fCurrentEntity.entityLocation != null && 2507 fCurrentEntity.entityLocation.getLiteralSystemId() != null ) { 2508 return fCurrentEntity.entityLocation.getLiteralSystemId(); 2509 } else { 2510 // search for the first external entity on the stack 2511 int size = fEntityStack.size(); 2512 for (int i = size - 1; i >= 0 ; i--) { 2513 Entity.ScannedEntity externalEntity = 2514 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2515 2516 if (externalEntity.entityLocation != null && 2517 externalEntity.entityLocation.getLiteralSystemId() != null) { 2518 return externalEntity.entityLocation.getLiteralSystemId(); 2519 } 2520 } 2521 } 2522 } 2523 return null; 2524 } // getLiteralSystemId():String 2525 2526 /** 2527 * Return the line number where the current document event ends. 2528 * <p> 2529 * <strong>Warning:</strong> The return value from the method 2530 * is intended only as an approximation for the sake of error 2531 * reporting; it is not intended to provide sufficient information 2532 * to edit the character content of the original XML document. 2533 * <p> 2534 * The return value is an approximation of the line number 2535 * in the document entity or external parsed entity where the 2536 * markup triggering the event appears. 2537 * <p> 2538 * If possible, the SAX driver should provide the line position 2539 * of the first character after the text associated with the document 2540 * event. The first line in the document is line 1. 2541 * 2542 * @return The line number, or -1 if none is available. 2543 */ 2544 public int getLineNumber() { 2545 if (fCurrentEntity != null) { 2546 if (fCurrentEntity.isExternal()) { 2547 return fCurrentEntity.lineNumber; 2548 } else { 2549 // search for the first external entity on the stack 2550 int size = fEntityStack.size(); 2551 for (int i=size-1; i>0 ; i--) { 2552 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2553 if (firstExternalEntity.isExternal()) { 2554 return firstExternalEntity.lineNumber; 2555 } 2556 } 2557 } 2558 } 2559 2560 return -1; 2561 2562 } // getLineNumber():int 2563 2564 /** 2565 * Return the column number where the current document event ends. 2566 * <p> 2567 * <strong>Warning:</strong> The return value from the method 2568 * is intended only as an approximation for the sake of error 2569 * reporting; it is not intended to provide sufficient information 2570 * to edit the character content of the original XML document. 2571 * <p> 2572 * The return value is an approximation of the column number 2573 * in the document entity or external parsed entity where the 2574 * markup triggering the event appears. 2575 * <p> 2576 * If possible, the SAX driver should provide the line position 2577 * of the first character after the text associated with the document 2578 * event. 2579 * <p> 2580 * If possible, the SAX driver should provide the line position 2581 * of the first character after the text associated with the document 2582 * event. The first column in each line is column 1. 2583 * 2584 * @return The column number, or -1 if none is available. 2585 */ 2586 public int getColumnNumber() { 2587 if (fCurrentEntity != null) { 2588 if (fCurrentEntity.isExternal()) { 2589 return fCurrentEntity.columnNumber; 2590 } else { 2591 // search for the first external entity on the stack 2592 int size = fEntityStack.size(); 2593 for (int i=size-1; i>0 ; i--) { 2594 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2595 if (firstExternalEntity.isExternal()) { 2596 return firstExternalEntity.columnNumber; 2597 } 2598 } 2599 } 2600 } 2601 2602 return -1; 2603 } // getColumnNumber():int 2604 2605 2606 // 2607 // Protected static methods 2608 // 2609 2610 /** 2611 * Fixes a platform dependent filename to standard URI form. 2612 * 2613 * @param str The string to fix. 2614 * 2615 * @return Returns the fixed URI string. 2616 */ 2617 protected static String fixURI(String str) { 2618 2619 // handle platform dependent strings 2620 str = str.replace(java.io.File.separatorChar, '/'); 2621 2622 // Windows fix 2623 if (str.length() >= 2) { 2624 char ch1 = str.charAt(1); 2625 // change "C:blah" to "/C:blah" 2626 if (ch1 == ':') { 2627 char ch0 = Character.toUpperCase(str.charAt(0)); 2628 if (ch0 >= 'A' && ch0 <= 'Z') { 2629 str = "/" + str; 2630 } 2631 } 2632 // change "//blah" to "file://blah" 2633 else if (ch1 == '/' && str.charAt(0) == '/') { 2634 str = "file:" + str; 2635 } 2636 } 2637 2638 // replace spaces in file names with %20. 2639 // Original comment from JDK5: the following algorithm might not be 2640 // very performant, but people who want to use invalid URI's have to 2641 // pay the price. 2642 int pos = str.indexOf(' '); 2643 if (pos >= 0) { 2644 StringBuilder sb = new StringBuilder(str.length()); 2645 // put characters before ' ' into the string builder 2646 for (int i = 0; i < pos; i++) 2647 sb.append(str.charAt(i)); 2648 // and %20 for the space 2649 sb.append("%20"); 2650 // for the remamining part, also convert ' ' to "%20". 2651 for (int i = pos+1; i < str.length(); i++) { 2652 if (str.charAt(i) == ' ') 2653 sb.append("%20"); 2654 else 2655 sb.append(str.charAt(i)); 2656 } 2657 str = sb.toString(); 2658 } 2659 2660 // done 2661 return str; 2662 2663 } // fixURI(String):String 2664 2665 2666 // 2667 // Package visible methods 2668 // 2669 /** Prints the contents of the buffer. */ 2670 final void print() { 2671 if (DEBUG_BUFFER) { 2672 if (fCurrentEntity != null) { 2673 System.out.print('['); 2674 System.out.print(fCurrentEntity.count); 2675 System.out.print(' '); 2676 System.out.print(fCurrentEntity.position); 2677 if (fCurrentEntity.count > 0) { 2678 System.out.print(" \""); 2679 for (int i = 0; i < fCurrentEntity.count; i++) { 2680 if (i == fCurrentEntity.position) { 2681 System.out.print('^'); 2682 } 2683 char c = fCurrentEntity.ch[i]; 2684 switch (c) { 2685 case '\n': { 2686 System.out.print("\\n"); 2687 break; 2688 } 2689 case '\r': { 2690 System.out.print("\\r"); 2691 break; 2692 } 2693 case '\t': { 2694 System.out.print("\\t"); 2695 break; 2696 } 2697 case '\\': { 2698 System.out.print("\\\\"); 2699 break; 2700 } 2701 default: { 2702 System.out.print(c); 2703 } 2704 } 2705 } 2706 if (fCurrentEntity.position == fCurrentEntity.count) { 2707 System.out.print('^'); 2708 } 2709 System.out.print('"'); 2710 } 2711 System.out.print(']'); 2712 System.out.print(" @ "); 2713 System.out.print(fCurrentEntity.lineNumber); 2714 System.out.print(','); 2715 System.out.print(fCurrentEntity.columnNumber); 2716 } else { 2717 System.out.print("*NO CURRENT ENTITY*"); 2718 } 2719 } 2720 } // print() 2721 2722 /** 2723 * Buffer used in entity manager to reuse character arrays instead 2724 * of creating new ones every time. 2725 * 2726 * @xerces.internal 2727 * 2728 * @author Ankit Pasricha, IBM 2729 */ 2730 private static class CharacterBuffer { 2731 2732 /** character buffer */ 2733 private char[] ch; 2734 2735 /** whether the buffer is for an external or internal scanned entity */ 2736 private boolean isExternal; 2737 2738 public CharacterBuffer(boolean isExternal, int size) { 2739 this.isExternal = isExternal; 2740 ch = new char[size]; 2741 } 2742 } 2743 2744 2745 /** 2746 * Stores a number of character buffers and provides it to the entity 2747 * manager to use when an entity is seen. 2748 * 2749 * @xerces.internal 2750 * 2751 * @author Ankit Pasricha, IBM 2752 */ 2753 private static class CharacterBufferPool { 2754 2755 private static final int DEFAULT_POOL_SIZE = 3; 2756 2757 private CharacterBuffer[] fInternalBufferPool; 2758 private CharacterBuffer[] fExternalBufferPool; 2759 2760 private int fExternalBufferSize; 2761 private int fInternalBufferSize; 2762 private int poolSize; 2763 2764 private int fInternalTop; 2765 private int fExternalTop; 2766 2767 public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { 2768 this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); 2769 } 2770 2771 public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { 2772 fExternalBufferSize = externalBufferSize; 2773 fInternalBufferSize = internalBufferSize; 2774 this.poolSize = poolSize; 2775 init(); 2776 } 2777 2778 /** Initializes buffer pool. **/ 2779 private void init() { 2780 fInternalBufferPool = new CharacterBuffer[poolSize]; 2781 fExternalBufferPool = new CharacterBuffer[poolSize]; 2782 fInternalTop = -1; 2783 fExternalTop = -1; 2784 } 2785 2786 /** Retrieves buffer from pool. **/ 2787 public CharacterBuffer getBuffer(boolean external) { 2788 if (external) { 2789 if (fExternalTop > -1) { 2790 return (CharacterBuffer)fExternalBufferPool[fExternalTop--]; 2791 } 2792 else { 2793 return new CharacterBuffer(true, fExternalBufferSize); 2794 } 2795 } 2796 else { 2797 if (fInternalTop > -1) { 2798 return (CharacterBuffer)fInternalBufferPool[fInternalTop--]; 2799 } 2800 else { 2801 return new CharacterBuffer(false, fInternalBufferSize); 2802 } 2803 } 2804 } 2805 2806 /** Returns buffer to pool. **/ 2807 public void returnToPool(CharacterBuffer buffer) { 2808 if (buffer.isExternal) { 2809 if (fExternalTop < fExternalBufferPool.length - 1) { 2810 fExternalBufferPool[++fExternalTop] = buffer; 2811 } 2812 } 2813 else if (fInternalTop < fInternalBufferPool.length - 1) { 2814 fInternalBufferPool[++fInternalTop] = buffer; 2815 } 2816 } 2817 2818 /** Sets the size of external buffers and dumps the old pool. **/ 2819 public void setExternalBufferSize(int bufferSize) { 2820 fExternalBufferSize = bufferSize; 2821 fExternalBufferPool = new CharacterBuffer[poolSize]; 2822 fExternalTop = -1; 2823 } 2824 } 2825 2826 /** 2827 * This class wraps the byte inputstreams we're presented with. 2828 * We need it because java.io.InputStreams don't provide 2829 * functionality to reread processed bytes, and they have a habit 2830 * of reading more than one character when you call their read() 2831 * methods. This means that, once we discover the true (declared) 2832 * encoding of a document, we can neither backtrack to read the 2833 * whole doc again nor start reading where we are with a new 2834 * reader. 2835 * 2836 * This class allows rewinding an inputStream by allowing a mark 2837 * to be set, and the stream reset to that position. <strong>The 2838 * class assumes that it needs to read one character per 2839 * invocation when it's read() method is inovked, but uses the 2840 * underlying InputStream's read(char[], offset length) method--it 2841 * won't buffer data read this way!</strong> 2842 * 2843 * @xerces.internal 2844 * 2845 * @author Neil Graham, IBM 2846 * @author Glenn Marcy, IBM 2847 */ 2848 2849 protected final class RewindableInputStream extends InputStream { 2850 2851 private InputStream fInputStream; 2852 private byte[] fData; 2853 private int fStartOffset; 2854 private int fEndOffset; 2855 private int fOffset; 2856 private int fLength; 2857 private int fMark; 2858 2859 public RewindableInputStream(InputStream is) { 2860 fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE]; 2861 fInputStream = is; 2862 fStartOffset = 0; 2863 fEndOffset = -1; 2864 fOffset = 0; 2865 fLength = 0; 2866 fMark = 0; 2867 } 2868 2869 public void setStartOffset(int offset) { 2870 fStartOffset = offset; 2871 } 2872 2873 public void rewind() { 2874 fOffset = fStartOffset; 2875 } 2876 2877 public int read() throws IOException { 2878 int b = 0; 2879 if (fOffset < fLength) { 2880 return fData[fOffset++] & 0xff; 2881 } 2882 if (fOffset == fEndOffset) { 2883 return -1; 2884 } 2885 if (fOffset == fData.length) { 2886 byte[] newData = new byte[fOffset << 1]; 2887 System.arraycopy(fData, 0, newData, 0, fOffset); 2888 fData = newData; 2889 } 2890 b = fInputStream.read(); 2891 if (b == -1) { 2892 fEndOffset = fOffset; 2893 return -1; 2894 } 2895 fData[fLength++] = (byte)b; 2896 fOffset++; 2897 return b & 0xff; 2898 } 2899 2900 public int read(byte[] b, int off, int len) throws IOException { 2901 int bytesLeft = fLength - fOffset; 2902 if (bytesLeft == 0) { 2903 if (fOffset == fEndOffset) { 2904 return -1; 2905 } 2906 2907 /** 2908 * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); 2909 * //System.out.println("fInputStream = " + fInputStream ); 2910 * // better get some more for the voracious reader... */ 2911 2912 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { 2913 2914 if (!fCurrentEntity.xmlDeclChunkRead) 2915 { 2916 fCurrentEntity.xmlDeclChunkRead = true; 2917 len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE; 2918 } 2919 return fInputStream.read(b, off, len); 2920 } 2921 2922 int returnedVal = read(); 2923 if(returnedVal == -1) { 2924 fEndOffset = fOffset; 2925 return -1; 2926 } 2927 b[off] = (byte)returnedVal; 2928 return 1; 2929 2930 } 2931 if (len < bytesLeft) { 2932 if (len <= 0) { 2933 return 0; 2934 } 2935 } else { 2936 len = bytesLeft; 2937 } 2938 if (b != null) { 2939 System.arraycopy(fData, fOffset, b, off, len); 2940 } 2941 fOffset += len; 2942 return len; 2943 } 2944 2945 public long skip(long n) 2946 throws IOException { 2947 int bytesLeft; 2948 if (n <= 0) { 2949 return 0; 2950 } 2951 bytesLeft = fLength - fOffset; 2952 if (bytesLeft == 0) { 2953 if (fOffset == fEndOffset) { 2954 return 0; 2955 } 2956 return fInputStream.skip(n); 2957 } 2958 if (n <= bytesLeft) { 2959 fOffset += n; 2960 return n; 2961 } 2962 fOffset += bytesLeft; 2963 if (fOffset == fEndOffset) { 2964 return bytesLeft; 2965 } 2966 n -= bytesLeft; 2967 /* 2968 * In a manner of speaking, when this class isn't permitting more 2969 * than one byte at a time to be read, it is "blocking". The 2970 * available() method should indicate how much can be read without 2971 * blocking, so while we're in this mode, it should only indicate 2972 * that bytes in its buffer are available; otherwise, the result of 2973 * available() on the underlying InputStream is appropriate. 2974 */ 2975 return fInputStream.skip(n) + bytesLeft; 2976 } 2977 2978 public int available() throws IOException { 2979 int bytesLeft = fLength - fOffset; 2980 if (bytesLeft == 0) { 2981 if (fOffset == fEndOffset) { 2982 return -1; 2983 } 2984 return fCurrentEntity.mayReadChunks ? fInputStream.available() 2985 : 0; 2986 } 2987 return bytesLeft; 2988 } 2989 2990 public void mark(int howMuch) { 2991 fMark = fOffset; 2992 } 2993 2994 public void reset() { 2995 fOffset = fMark; 2996 //test(); 2997 } 2998 2999 public boolean markSupported() { 3000 return true; 3001 } 3002 3003 public void close() throws IOException { 3004 if (fInputStream != null) { 3005 fInputStream.close(); 3006 fInputStream = null; 3007 } 3008 } 3009 } // end of RewindableInputStream class 3010 3011 public void test(){ 3012 //System.out.println("TESTING: Added familytree to entityManager"); 3013 //Usecase1 3014 fEntityStorage.addExternalEntity("entityUsecase1",null, 3015 "/space/home/stax/sun/6thJan2004/zephyr/data/test.txt", 3016 "/space/home/stax/sun/6thJan2004/zephyr/data/entity.xml"); 3017 3018 //Usecase2 3019 fEntityStorage.addInternalEntity("entityUsecase2","<Test>value</Test>"); 3020 fEntityStorage.addInternalEntity("entityUsecase3","value3"); 3021 fEntityStorage.addInternalEntity("text", "Hello World."); 3022 fEntityStorage.addInternalEntity("empty-element", "<foo/>"); 3023 fEntityStorage.addInternalEntity("balanced-element", "<foo></foo>"); 3024 fEntityStorage.addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 3025 fEntityStorage.addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 3026 fEntityStorage.addInternalEntity("unbalanced-entity", "<foo>"); 3027 fEntityStorage.addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 3028 fEntityStorage.addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 3029 fEntityStorage.addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 3030 fEntityStorage.addInternalEntity("ch","©"); 3031 fEntityStorage.addInternalEntity("ch1","T"); 3032 fEntityStorage.addInternalEntity("% ch2","param"); 3033 } 3034 3035 } // class XMLEntityManager