1 /* 2 * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl ; 22 23 import com.sun.org.apache.xerces.internal.impl.Constants; 24 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 29 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 30 import com.sun.org.apache.xerces.internal.util.*; 31 import com.sun.org.apache.xerces.internal.util.URI; 32 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 33 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 36 import com.sun.org.apache.xerces.internal.xni.Augmentations; 37 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 38 import com.sun.org.apache.xerces.internal.xni.XNIException; 39 import com.sun.org.apache.xerces.internal.xni.parser.*; 40 import com.sun.xml.internal.stream.Entity; 41 import com.sun.xml.internal.stream.StaxEntityResolverWrapper; 42 import com.sun.xml.internal.stream.StaxXMLInputSource; 43 import com.sun.xml.internal.stream.XMLEntityStorage; 44 import java.io.*; 45 import java.lang.reflect.Method; 46 import java.net.HttpURLConnection; 47 import java.net.URISyntaxException; 48 import java.net.URL; 49 import java.net.URLConnection; 50 import java.util.Hashtable; 51 import java.util.Iterator; 52 import java.util.Locale; 53 import java.util.Map; 54 import java.util.Stack; 55 import java.util.StringTokenizer; 56 import javax.xml.stream.XMLInputFactory; 57 58 59 /** 60 * Will keep track of current entity. 61 * 62 * The entity manager handles the registration of general and parameter 63 * entities; resolves entities; and starts entities. The entity manager 64 * is a central component in a standard parser configuration and this 65 * class works directly with the entity scanner to manage the underlying 66 * xni. 67 * <p> 68 * This component requires the following features and properties from the 69 * component manager that uses it: 70 * <ul> 71 * <li>http://xml.org/sax/features/validation</li> 72 * <li>http://xml.org/sax/features/external-general-entities</li> 73 * <li>http://xml.org/sax/features/external-parameter-entities</li> 74 * <li>http://apache.org/xml/features/allow-java-encodings</li> 75 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 76 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 77 * <li>http://apache.org/xml/properties/internal/entity-resolver</li> 78 * </ul> 79 * 80 * 81 * @author Andy Clark, IBM 82 * @author Arnaud Le Hors, IBM 83 * @author K.Venugopal SUN Microsystems 84 * @author Neeraj Bajaj SUN Microsystems 85 * @author Sunitha Reddy SUN Microsystems 86 */ 87 public class XMLEntityManager implements XMLComponent, XMLEntityResolver { 88 89 // 90 // Constants 91 // 92 93 /** Default buffer size (2048). */ 94 public static final int DEFAULT_BUFFER_SIZE = 8192; 95 96 /** Default buffer size before we've finished with the XMLDecl: */ 97 public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; 98 99 /** Default internal entity buffer size (1024). */ 100 public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 1024; 101 102 // feature identifiers 103 104 /** Feature identifier: validation. */ 105 protected static final String VALIDATION = 106 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 107 108 /** 109 * standard uri conformant (strict uri). 110 * http://apache.org/xml/features/standard-uri-conformant 111 */ 112 protected boolean fStrictURI; 113 114 115 /** Feature identifier: external general entities. */ 116 protected static final String EXTERNAL_GENERAL_ENTITIES = 117 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE; 118 119 /** Feature identifier: external parameter entities. */ 120 protected static final String EXTERNAL_PARAMETER_ENTITIES = 121 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE; 122 123 /** Feature identifier: allow Java encodings. */ 124 protected static final String ALLOW_JAVA_ENCODINGS = 125 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 126 127 /** Feature identifier: warn on duplicate EntityDef */ 128 protected static final String WARN_ON_DUPLICATE_ENTITYDEF = 129 Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE; 130 131 /** Feature identifier: load external DTD. */ 132 protected static final String LOAD_EXTERNAL_DTD = 133 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 134 135 // property identifiers 136 137 /** Property identifier: symbol table. */ 138 protected static final String SYMBOL_TABLE = 139 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 140 141 /** Property identifier: error reporter. */ 142 protected static final String ERROR_REPORTER = 143 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 144 145 /** Feature identifier: standard uri conformant */ 146 protected static final String STANDARD_URI_CONFORMANT = 147 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 148 149 /** Property identifier: entity resolver. */ 150 protected static final String ENTITY_RESOLVER = 151 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 152 153 protected static final String STAX_ENTITY_RESOLVER = 154 Constants.XERCES_PROPERTY_PREFIX + Constants.STAX_ENTITY_RESOLVER_PROPERTY; 155 156 // property identifier: ValidationManager 157 protected static final String VALIDATION_MANAGER = 158 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 159 160 /** property identifier: buffer size. */ 161 protected static final String BUFFER_SIZE = 162 Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY; 163 164 /** property identifier: security manager. */ 165 protected static final String SECURITY_MANAGER = 166 Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY; 167 168 protected static final String PARSER_SETTINGS = 169 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 170 171 /** Property identifier: Security property manager. */ 172 private static final String XML_SECURITY_PROPERTY_MANAGER = 173 Constants.XML_SECURITY_PROPERTY_MANAGER; 174 175 /** access external dtd: file protocol */ 176 static final String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 177 178 // recognized features and properties 179 180 /** Recognized features. */ 181 private static final String[] RECOGNIZED_FEATURES = { 182 VALIDATION, 183 EXTERNAL_GENERAL_ENTITIES, 184 EXTERNAL_PARAMETER_ENTITIES, 185 ALLOW_JAVA_ENCODINGS, 186 WARN_ON_DUPLICATE_ENTITYDEF, 187 STANDARD_URI_CONFORMANT 188 }; 189 190 /** Feature defaults. */ 191 private static final Boolean[] FEATURE_DEFAULTS = { 192 null, 193 Boolean.TRUE, 194 Boolean.TRUE, 195 Boolean.TRUE, 196 Boolean.FALSE, 197 Boolean.FALSE 198 }; 199 200 /** Recognized properties. */ 201 private static final String[] RECOGNIZED_PROPERTIES = { 202 SYMBOL_TABLE, 203 ERROR_REPORTER, 204 ENTITY_RESOLVER, 205 VALIDATION_MANAGER, 206 BUFFER_SIZE, 207 SECURITY_MANAGER, 208 XML_SECURITY_PROPERTY_MANAGER 209 }; 210 211 /** Property defaults. */ 212 private static final Object[] PROPERTY_DEFAULTS = { 213 null, 214 null, 215 null, 216 null, 217 new Integer(DEFAULT_BUFFER_SIZE), 218 null, 219 null 220 }; 221 222 private static final String XMLEntity = "[xml]".intern(); 223 private static final String DTDEntity = "[dtd]".intern(); 224 225 // debugging 226 227 /** 228 * Debug printing of buffer. This debugging flag works best when you 229 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 230 * 64 characters. 231 */ 232 private static final boolean DEBUG_BUFFER = false; 233 234 /** warn on duplicate Entity declaration. 235 * http://apache.org/xml/features/warn-on-duplicate-entitydef 236 */ 237 protected boolean fWarnDuplicateEntityDef; 238 239 /** Debug some basic entities. */ 240 private static final boolean DEBUG_ENTITIES = false; 241 242 /** Debug switching readers for encodings. */ 243 private static final boolean DEBUG_ENCODINGS = false; 244 245 // should be diplayed trace resolving messages 246 private static final boolean DEBUG_RESOLVER = false ; 247 248 // 249 // Data 250 // 251 252 // features 253 254 /** 255 * Validation. This feature identifier is: 256 * http://xml.org/sax/features/validation 257 */ 258 protected boolean fValidation; 259 260 /** 261 * External general entities. This feature identifier is: 262 * http://xml.org/sax/features/external-general-entities 263 */ 264 protected boolean fExternalGeneralEntities; 265 266 /** 267 * External parameter entities. This feature identifier is: 268 * http://xml.org/sax/features/external-parameter-entities 269 */ 270 protected boolean fExternalParameterEntities; 271 272 /** 273 * Allow Java encoding names. This feature identifier is: 274 * http://apache.org/xml/features/allow-java-encodings 275 */ 276 protected boolean fAllowJavaEncodings = true ; 277 278 /** Load external DTD. */ 279 protected boolean fLoadExternalDTD = true; 280 281 // properties 282 283 /** 284 * Symbol table. This property identifier is: 285 * http://apache.org/xml/properties/internal/symbol-table 286 */ 287 protected SymbolTable fSymbolTable; 288 289 /** 290 * Error reporter. This property identifier is: 291 * http://apache.org/xml/properties/internal/error-reporter 292 */ 293 protected XMLErrorReporter fErrorReporter; 294 295 /** 296 * Entity resolver. This property identifier is: 297 * http://apache.org/xml/properties/internal/entity-resolver 298 */ 299 protected XMLEntityResolver fEntityResolver; 300 301 /** Stax Entity Resolver. This property identifier is XMLInputFactory.ENTITY_RESOLVER */ 302 303 protected StaxEntityResolverWrapper fStaxEntityResolver; 304 305 /** Property Manager. This is used from Stax */ 306 protected PropertyManager fPropertyManager ; 307 308 /** StAX properties */ 309 boolean fSupportDTD = true; 310 boolean fReplaceEntityReferences = true; 311 boolean fSupportExternalEntities = true; 312 313 /** used to restrict external access */ 314 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 315 316 // settings 317 318 /** 319 * Validation manager. This property identifier is: 320 * http://apache.org/xml/properties/internal/validation-manager 321 */ 322 protected ValidationManager fValidationManager; 323 324 // settings 325 326 /** 327 * Buffer size. We get this value from a property. The default size 328 * is used if the input buffer size property is not specified. 329 * REVISIT: do we need a property for internal entity buffer size? 330 */ 331 protected int fBufferSize = DEFAULT_BUFFER_SIZE; 332 333 /** Security Manager */ 334 protected XMLSecurityManager fSecurityManager = null; 335 336 protected XMLLimitAnalyzer fLimitAnalyzer = null; 337 338 protected int entityExpansionIndex; 339 340 /** 341 * True if the document entity is standalone. This should really 342 * only be set by the document source (e.g. XMLDocumentScanner). 343 */ 344 protected boolean fStandalone; 345 346 // are the entities being parsed in the external subset? 347 // NOTE: this *is not* the same as whether they're external entities! 348 protected boolean fInExternalSubset = false; 349 350 351 // handlers 352 /** Entity handler. */ 353 protected XMLEntityHandler fEntityHandler; 354 355 /** Current entity scanner */ 356 protected XMLEntityScanner fEntityScanner ; 357 358 /** XML 1.0 entity scanner. */ 359 protected XMLEntityScanner fXML10EntityScanner; 360 361 /** XML 1.1 entity scanner. */ 362 protected XMLEntityScanner fXML11EntityScanner; 363 364 /** count of entities expanded: */ 365 protected int fEntityExpansionCount = 0; 366 367 // entities 368 369 /** Entities. */ 370 protected Hashtable fEntities = new Hashtable(); 371 372 /** Entity stack. */ 373 protected Stack fEntityStack = new Stack(); 374 375 /** Current entity. */ 376 protected Entity.ScannedEntity fCurrentEntity = null; 377 378 /** identify if the InputSource is created by a resolver */ 379 boolean fISCreatedByResolver = false; 380 381 // shared context 382 383 protected XMLEntityStorage fEntityStorage ; 384 385 protected final Object [] defaultEncoding = new Object[]{"UTF-8", null}; 386 387 388 // temp vars 389 390 /** Resource identifer. */ 391 private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 392 393 /** Augmentations for entities. */ 394 private final Augmentations fEntityAugs = new AugmentationsImpl(); 395 396 /** Pool of character buffers. */ 397 private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); 398 399 // 400 // Constructors 401 // 402 403 /** 404 * If this constructor is used to create the object, reset() should be invoked on this object 405 */ 406 public XMLEntityManager() { 407 fEntityStorage = new XMLEntityStorage(this) ; 408 setScannerVersion(Constants.XML_VERSION_1_0); 409 } // <init>() 410 411 /** Default constructor. */ 412 public XMLEntityManager(PropertyManager propertyManager) { 413 fPropertyManager = propertyManager ; 414 //pass a reference to current entity being scanned 415 //fEntityStorage = new XMLEntityStorage(fCurrentEntity) ; 416 fEntityStorage = new XMLEntityStorage(this) ; 417 fEntityScanner = new XMLEntityScanner(propertyManager, this) ; 418 reset(propertyManager); 419 } // <init>() 420 421 /** 422 * Adds an internal entity declaration. 423 * <p> 424 * <strong>Note:</strong> This method ignores subsequent entity 425 * declarations. 426 * <p> 427 * <strong>Note:</strong> The name should be a unique symbol. The 428 * SymbolTable can be used for this purpose. 429 * 430 * @param name The name of the entity. 431 * @param text The text of the entity. 432 * 433 * @see SymbolTable 434 */ 435 public void addInternalEntity(String name, String text) { 436 if (!fEntities.containsKey(name)) { 437 Entity entity = new Entity.InternalEntity(name, text, fInExternalSubset); 438 fEntities.put(name, entity); 439 } else{ 440 if(fWarnDuplicateEntityDef){ 441 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 442 "MSG_DUPLICATE_ENTITY_DEFINITION", 443 new Object[]{ name }, 444 XMLErrorReporter.SEVERITY_WARNING ); 445 } 446 } 447 448 } // addInternalEntity(String,String) 449 450 /** 451 * Adds an external entity declaration. 452 * <p> 453 * <strong>Note:</strong> This method ignores subsequent entity 454 * declarations. 455 * <p> 456 * <strong>Note:</strong> The name should be a unique symbol. The 457 * SymbolTable can be used for this purpose. 458 * 459 * @param name The name of the entity. 460 * @param publicId The public identifier of the entity. 461 * @param literalSystemId The system identifier of the entity. 462 * @param baseSystemId The base system identifier of the entity. 463 * This is the system identifier of the entity 464 * where <em>the entity being added</em> and 465 * is used to expand the system identifier when 466 * the system identifier is a relative URI. 467 * When null the system identifier of the first 468 * external entity on the stack is used instead. 469 * 470 * @see SymbolTable 471 */ 472 public void addExternalEntity(String name, 473 String publicId, String literalSystemId, 474 String baseSystemId) throws IOException { 475 if (!fEntities.containsKey(name)) { 476 if (baseSystemId == null) { 477 // search for the first external entity on the stack 478 int size = fEntityStack.size(); 479 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 480 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 481 } 482 for (int i = size - 1; i >= 0 ; i--) { 483 Entity.ScannedEntity externalEntity = 484 (Entity.ScannedEntity)fEntityStack.elementAt(i); 485 if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) { 486 baseSystemId = externalEntity.entityLocation.getExpandedSystemId(); 487 break; 488 } 489 } 490 } 491 Entity entity = new Entity.ExternalEntity(name, 492 new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId, 493 expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset); 494 fEntities.put(name, entity); 495 } else{ 496 if(fWarnDuplicateEntityDef){ 497 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 498 "MSG_DUPLICATE_ENTITY_DEFINITION", 499 new Object[]{ name }, 500 XMLErrorReporter.SEVERITY_WARNING ); 501 } 502 } 503 504 } // addExternalEntity(String,String,String,String) 505 506 507 /** 508 * Adds an unparsed entity declaration. 509 * <p> 510 * <strong>Note:</strong> This method ignores subsequent entity 511 * declarations. 512 * <p> 513 * <strong>Note:</strong> The name should be a unique symbol. The 514 * SymbolTable can be used for this purpose. 515 * 516 * @param name The name of the entity. 517 * @param publicId The public identifier of the entity. 518 * @param systemId The system identifier of the entity. 519 * @param notation The name of the notation. 520 * 521 * @see SymbolTable 522 */ 523 public void addUnparsedEntity(String name, 524 String publicId, String systemId, 525 String baseSystemId, String notation) { 526 if (!fEntities.containsKey(name)) { 527 Entity.ExternalEntity entity = new Entity.ExternalEntity(name, 528 new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null), 529 notation, fInExternalSubset); 530 fEntities.put(name, entity); 531 } else{ 532 if(fWarnDuplicateEntityDef){ 533 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 534 "MSG_DUPLICATE_ENTITY_DEFINITION", 535 new Object[]{ name }, 536 XMLErrorReporter.SEVERITY_WARNING ); 537 } 538 } 539 } // addUnparsedEntity(String,String,String,String) 540 541 542 /** get the entity storage object from entity manager */ 543 public XMLEntityStorage getEntityStore(){ 544 return fEntityStorage ; 545 } 546 547 /** return the entity responsible for reading the entity */ 548 public XMLEntityScanner getEntityScanner(){ 549 if(fEntityScanner == null) { 550 // default to 1.0 551 if(fXML10EntityScanner == null) { 552 fXML10EntityScanner = new XMLEntityScanner(); 553 } 554 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 555 fEntityScanner = fXML10EntityScanner; 556 } 557 return fEntityScanner; 558 559 } 560 561 public void setScannerVersion(short version) { 562 563 if(version == Constants.XML_VERSION_1_0) { 564 if(fXML10EntityScanner == null) { 565 fXML10EntityScanner = new XMLEntityScanner(); 566 } 567 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 568 fEntityScanner = fXML10EntityScanner; 569 fEntityScanner.setCurrentEntity(fCurrentEntity); 570 } else { 571 if(fXML11EntityScanner == null) { 572 fXML11EntityScanner = new XML11EntityScanner(); 573 } 574 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 575 fEntityScanner = fXML11EntityScanner; 576 fEntityScanner.setCurrentEntity(fCurrentEntity); 577 } 578 579 } 580 581 /** 582 * This method uses the passed-in XMLInputSource to make 583 * fCurrentEntity usable for reading. 584 * @param name name of the entity (XML is it's the document entity) 585 * @param xmlInputSource the input source, with sufficient information 586 * to begin scanning characters. 587 * @param literal True if this entity is started within a 588 * literal value. 589 * @param isExternal whether this entity should be treated as an internal or external entity. 590 * @throws IOException if anything can't be read 591 * XNIException If any parser-specific goes wrong. 592 * @return the encoding of the new entity or null if a character stream was employed 593 */ 594 public String setupCurrentEntity(String name, XMLInputSource xmlInputSource, 595 boolean literal, boolean isExternal) 596 throws IOException, XNIException { 597 // get information 598 599 final String publicId = xmlInputSource.getPublicId(); 600 String literalSystemId = xmlInputSource.getSystemId(); 601 String baseSystemId = xmlInputSource.getBaseSystemId(); 602 String encoding = xmlInputSource.getEncoding(); 603 final boolean encodingExternallySpecified = (encoding != null); 604 Boolean isBigEndian = null; 605 606 // create reader 607 InputStream stream = null; 608 Reader reader = xmlInputSource.getCharacterStream(); 609 610 // First chance checking strict URI 611 String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI); 612 if (baseSystemId == null) { 613 baseSystemId = expandedSystemId; 614 } 615 if (reader == null) { 616 stream = xmlInputSource.getByteStream(); 617 if (stream == null) { 618 URL location = new URL(expandedSystemId); 619 URLConnection connect = location.openConnection(); 620 if (!(connect instanceof HttpURLConnection)) { 621 stream = connect.getInputStream(); 622 } 623 else { 624 boolean followRedirects = true; 625 626 // setup URLConnection if we have an HTTPInputSource 627 if (xmlInputSource instanceof HTTPInputSource) { 628 final HttpURLConnection urlConnection = (HttpURLConnection) connect; 629 final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource; 630 631 // set request properties 632 Iterator propIter = httpInputSource.getHTTPRequestProperties(); 633 while (propIter.hasNext()) { 634 Map.Entry entry = (Map.Entry) propIter.next(); 635 urlConnection.setRequestProperty((String) entry.getKey(), (String) entry.getValue()); 636 } 637 638 // set preference for redirection 639 followRedirects = httpInputSource.getFollowHTTPRedirects(); 640 if (!followRedirects) { 641 urlConnection.setInstanceFollowRedirects(followRedirects); 642 } 643 } 644 645 stream = connect.getInputStream(); 646 647 // REVISIT: If the URLConnection has external encoding 648 // information, we should be reading it here. It's located 649 // in the charset parameter of Content-Type. -- mrglavas 650 651 if (followRedirects) { 652 String redirect = connect.getURL().toString(); 653 // E43: Check if the URL was redirected, and then 654 // update literal and expanded system IDs if needed. 655 if (!redirect.equals(expandedSystemId)) { 656 literalSystemId = redirect; 657 expandedSystemId = redirect; 658 } 659 } 660 } 661 } 662 663 // wrap this stream in RewindableInputStream 664 stream = new RewindableInputStream(stream); 665 666 // perform auto-detect of encoding if necessary 667 if (encoding == null) { 668 // read first four bytes and determine encoding 669 final byte[] b4 = new byte[4]; 670 int count = 0; 671 for (; count<4; count++ ) { 672 b4[count] = (byte)stream.read(); 673 } 674 if (count == 4) { 675 Object [] encodingDesc = getEncodingName(b4, count); 676 encoding = (String)(encodingDesc[0]); 677 isBigEndian = (Boolean)(encodingDesc[1]); 678 679 stream.reset(); 680 // Special case UTF-8 files with BOM created by Microsoft 681 // tools. It's more efficient to consume the BOM than make 682 // the reader perform extra checks. -Ac 683 if (count > 2 && encoding.equals("UTF-8")) { 684 int b0 = b4[0] & 0xFF; 685 int b1 = b4[1] & 0xFF; 686 int b2 = b4[2] & 0xFF; 687 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 688 // ignore first three bytes... 689 stream.skip(3); 690 } 691 } 692 reader = createReader(stream, encoding, isBigEndian); 693 } else { 694 reader = createReader(stream, encoding, isBigEndian); 695 } 696 } 697 698 // use specified encoding 699 else { 700 encoding = encoding.toUpperCase(Locale.ENGLISH); 701 702 // If encoding is UTF-8, consume BOM if one is present. 703 if (encoding.equals("UTF-8")) { 704 final int[] b3 = new int[3]; 705 int count = 0; 706 for (; count < 3; ++count) { 707 b3[count] = stream.read(); 708 if (b3[count] == -1) 709 break; 710 } 711 if (count == 3) { 712 if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { 713 // First three bytes are not BOM, so reset. 714 stream.reset(); 715 } 716 } else { 717 stream.reset(); 718 } 719 } 720 // If encoding is UTF-16, we still need to read the first four bytes 721 // in order to discover the byte order. 722 else if (encoding.equals("UTF-16")) { 723 final int[] b4 = new int[4]; 724 int count = 0; 725 for (; count < 4; ++count) { 726 b4[count] = stream.read(); 727 if (b4[count] == -1) 728 break; 729 } 730 stream.reset(); 731 732 String utf16Encoding = "UTF-16"; 733 if (count >= 2) { 734 final int b0 = b4[0]; 735 final int b1 = b4[1]; 736 if (b0 == 0xFE && b1 == 0xFF) { 737 // UTF-16, big-endian 738 utf16Encoding = "UTF-16BE"; 739 isBigEndian = Boolean.TRUE; 740 } 741 else if (b0 == 0xFF && b1 == 0xFE) { 742 // UTF-16, little-endian 743 utf16Encoding = "UTF-16LE"; 744 isBigEndian = Boolean.FALSE; 745 } 746 else if (count == 4) { 747 final int b2 = b4[2]; 748 final int b3 = b4[3]; 749 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 750 // UTF-16, big-endian, no BOM 751 utf16Encoding = "UTF-16BE"; 752 isBigEndian = Boolean.TRUE; 753 } 754 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 755 // UTF-16, little-endian, no BOM 756 utf16Encoding = "UTF-16LE"; 757 isBigEndian = Boolean.FALSE; 758 } 759 } 760 } 761 reader = createReader(stream, utf16Encoding, isBigEndian); 762 } 763 // If encoding is UCS-4, we still need to read the first four bytes 764 // in order to discover the byte order. 765 else if (encoding.equals("ISO-10646-UCS-4")) { 766 final int[] b4 = new int[4]; 767 int count = 0; 768 for (; count < 4; ++count) { 769 b4[count] = stream.read(); 770 if (b4[count] == -1) 771 break; 772 } 773 stream.reset(); 774 775 // Ignore unusual octet order for now. 776 if (count == 4) { 777 // UCS-4, big endian (1234) 778 if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) { 779 isBigEndian = Boolean.TRUE; 780 } 781 // UCS-4, little endian (1234) 782 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) { 783 isBigEndian = Boolean.FALSE; 784 } 785 } 786 } 787 // If encoding is UCS-2, we still need to read the first four bytes 788 // in order to discover the byte order. 789 else if (encoding.equals("ISO-10646-UCS-2")) { 790 final int[] b4 = new int[4]; 791 int count = 0; 792 for (; count < 4; ++count) { 793 b4[count] = stream.read(); 794 if (b4[count] == -1) 795 break; 796 } 797 stream.reset(); 798 799 if (count == 4) { 800 // UCS-2, big endian 801 if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) { 802 isBigEndian = Boolean.TRUE; 803 } 804 // UCS-2, little endian 805 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) { 806 isBigEndian = Boolean.FALSE; 807 } 808 } 809 } 810 811 reader = createReader(stream, encoding, isBigEndian); 812 } 813 814 // read one character at a time so we don't jump too far 815 // ahead, converting characters from the byte stream in 816 // the wrong encoding 817 if (DEBUG_ENCODINGS) { 818 System.out.println("$$$ no longer wrapping reader in OneCharReader"); 819 } 820 //reader = new OneCharReader(reader); 821 } 822 823 // We've seen a new Reader. 824 // Push it on the stack so we can close it later. 825 //fOwnReaders.add(reader); 826 827 // push entity on stack 828 if (fCurrentEntity != null) { 829 fEntityStack.push(fCurrentEntity); 830 } 831 832 // create entity 833 /* if encoding is specified externally, 'encoding' information present 834 * in the prolog of the XML document is not considered. Hence, prolog can 835 * be read in Chunks of data instead of byte by byte. 836 */ 837 fCurrentEntity = new com.sun.xml.internal.stream.Entity.ScannedEntity(name,new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId),stream, reader, encoding, literal, encodingExternallySpecified, isExternal); 838 fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified); 839 fEntityScanner.setCurrentEntity(fCurrentEntity); 840 fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 841 if (fLimitAnalyzer != null) { 842 fLimitAnalyzer.startEntity(name); 843 } 844 return encoding; 845 } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String 846 847 848 /** 849 * Checks whether an entity given by name is external. 850 * 851 * @param entityName The name of the entity to check. 852 * @return True if the entity is external, false otherwise 853 * (including when the entity is not declared). 854 */ 855 public boolean isExternalEntity(String entityName) { 856 857 Entity entity = (Entity)fEntities.get(entityName); 858 if (entity == null) { 859 return false; 860 } 861 return entity.isExternal(); 862 } 863 864 /** 865 * Checks whether the declaration of an entity given by name is 866 * // in the external subset. 867 * 868 * @param entityName The name of the entity to check. 869 * @return True if the entity was declared in the external subset, false otherwise 870 * (including when the entity is not declared). 871 */ 872 public boolean isEntityDeclInExternalSubset(String entityName) { 873 874 Entity entity = (Entity)fEntities.get(entityName); 875 if (entity == null) { 876 return false; 877 } 878 return entity.isEntityDeclInExternalSubset(); 879 } 880 881 882 883 // 884 // Public methods 885 // 886 887 /** 888 * Sets whether the document entity is standalone. 889 * 890 * @param standalone True if document entity is standalone. 891 */ 892 public void setStandalone(boolean standalone) { 893 fStandalone = standalone; 894 } 895 // setStandalone(boolean) 896 897 /** Returns true if the document entity is standalone. */ 898 public boolean isStandalone() { 899 return fStandalone; 900 } //isStandalone():boolean 901 902 public boolean isDeclaredEntity(String entityName) { 903 904 Entity entity = (Entity)fEntities.get(entityName); 905 return entity != null; 906 } 907 908 public boolean isUnparsedEntity(String entityName) { 909 910 Entity entity = (Entity)fEntities.get(entityName); 911 if (entity == null) { 912 return false; 913 } 914 return entity.isUnparsed(); 915 } 916 917 918 919 // this simply returns the fResourceIdentifier object; 920 // this should only be used with caution by callers that 921 // carefully manage the entity manager's behaviour, so that 922 // this doesn't returning meaningless or misleading data. 923 // @return a reference to the current fResourceIdentifier object 924 public XMLResourceIdentifier getCurrentResourceIdentifier() { 925 return fResourceIdentifier; 926 } 927 928 /** 929 * Sets the entity handler. When an entity starts and ends, the 930 * entity handler is notified of the change. 931 * 932 * @param entityHandler The new entity handler. 933 */ 934 935 public void setEntityHandler(com.sun.org.apache.xerces.internal.impl.XMLEntityHandler entityHandler) { 936 fEntityHandler = (XMLEntityHandler) entityHandler; 937 } // setEntityHandler(XMLEntityHandler) 938 939 //this function returns StaxXMLInputSource 940 public StaxXMLInputSource resolveEntityAsPerStax(XMLResourceIdentifier resourceIdentifier) throws java.io.IOException{ 941 942 if(resourceIdentifier == null ) return null; 943 944 String publicId = resourceIdentifier.getPublicId(); 945 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 946 String baseSystemId = resourceIdentifier.getBaseSystemId(); 947 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 948 // if no base systemId given, assume that it's relative 949 // to the systemId of the current scanned entity 950 // Sometimes the system id is not (properly) expanded. 951 // We need to expand the system id if: 952 // a. the expanded one was null; or 953 // b. the base system id was null, but becomes non-null from the current entity. 954 boolean needExpand = (expandedSystemId == null); 955 // REVISIT: why would the baseSystemId ever be null? if we 956 // didn't have to make this check we wouldn't have to reuse the 957 // fXMLResourceIdentifier object... 958 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 959 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 960 if (baseSystemId != null) 961 needExpand = true; 962 } 963 if (needExpand) 964 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 965 966 // give the entity resolver a chance 967 StaxXMLInputSource staxInputSource = null; 968 XMLInputSource xmlInputSource = null; 969 970 XMLResourceIdentifierImpl ri = null; 971 972 if (resourceIdentifier instanceof XMLResourceIdentifierImpl) { 973 ri = (XMLResourceIdentifierImpl)resourceIdentifier; 974 } else { 975 fResourceIdentifier.clear(); 976 ri = fResourceIdentifier; 977 } 978 ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 979 if(DEBUG_RESOLVER){ 980 System.out.println("BEFORE Calling resolveEntity") ; 981 } 982 983 fISCreatedByResolver = false; 984 //either of Stax or Xerces would be null 985 if(fStaxEntityResolver != null){ 986 staxInputSource = fStaxEntityResolver.resolveEntity(ri); 987 if(staxInputSource != null) { 988 fISCreatedByResolver = true; 989 } 990 } 991 992 if(fEntityResolver != null){ 993 xmlInputSource = fEntityResolver.resolveEntity(ri); 994 if(xmlInputSource != null) { 995 fISCreatedByResolver = true; 996 } 997 } 998 999 if(xmlInputSource != null){ 1000 //wrap this XMLInputSource to StaxInputSource 1001 staxInputSource = new StaxXMLInputSource(xmlInputSource, fISCreatedByResolver); 1002 } 1003 1004 // do default resolution 1005 //this works for both stax & Xerces, if staxInputSource is null, it means parser need to revert to default resolution 1006 if (staxInputSource == null) { 1007 // REVISIT: when systemId is null, I think we should return null. 1008 // is this the right solution? -SG 1009 //if (systemId != null) 1010 staxInputSource = new StaxXMLInputSource(new XMLInputSource(publicId, literalSystemId, baseSystemId)); 1011 }else if(staxInputSource.hasXMLStreamOrXMLEventReader()){ 1012 //Waiting for the clarification from EG. - nb 1013 } 1014 1015 if (DEBUG_RESOLVER) { 1016 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1017 System.err.println(" = " + xmlInputSource); 1018 } 1019 1020 return staxInputSource; 1021 1022 } 1023 1024 /** 1025 * Resolves the specified public and system identifiers. This 1026 * method first attempts to resolve the entity based on the 1027 * EntityResolver registered by the application. If no entity 1028 * resolver is registered or if the registered entity handler 1029 * is unable to resolve the entity, then default entity 1030 * resolution will occur. 1031 * 1032 * @param publicId The public identifier of the entity. 1033 * @param systemId The system identifier of the entity. 1034 * @param baseSystemId The base system identifier of the entity. 1035 * This is the system identifier of the current 1036 * entity and is used to expand the system 1037 * identifier when the system identifier is a 1038 * relative URI. 1039 * 1040 * @return Returns an input source that wraps the resolved entity. 1041 * This method will never return null. 1042 * 1043 * @throws IOException Thrown on i/o error. 1044 * @throws XNIException Thrown by entity resolver to signal an error. 1045 */ 1046 public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier) throws IOException, XNIException { 1047 if(resourceIdentifier == null ) return null; 1048 String publicId = resourceIdentifier.getPublicId(); 1049 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 1050 String baseSystemId = resourceIdentifier.getBaseSystemId(); 1051 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 1052 String namespace = resourceIdentifier.getNamespace(); 1053 1054 // if no base systemId given, assume that it's relative 1055 // to the systemId of the current scanned entity 1056 // Sometimes the system id is not (properly) expanded. 1057 // We need to expand the system id if: 1058 // a. the expanded one was null; or 1059 // b. the base system id was null, but becomes non-null from the current entity. 1060 boolean needExpand = (expandedSystemId == null); 1061 // REVISIT: why would the baseSystemId ever be null? if we 1062 // didn't have to make this check we wouldn't have to reuse the 1063 // fXMLResourceIdentifier object... 1064 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 1065 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 1066 if (baseSystemId != null) 1067 needExpand = true; 1068 } 1069 if (needExpand) 1070 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 1071 1072 // give the entity resolver a chance 1073 XMLInputSource xmlInputSource = null; 1074 1075 if (fEntityResolver != null) { 1076 resourceIdentifier.setBaseSystemId(baseSystemId); 1077 resourceIdentifier.setExpandedSystemId(expandedSystemId); 1078 xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier); 1079 } 1080 1081 // do default resolution 1082 // REVISIT: what's the correct behavior if the user provided an entity 1083 // resolver (fEntityResolver != null), but resolveEntity doesn't return 1084 // an input source (xmlInputSource == null)? 1085 // do we do default resolution, or do we just return null? -SG 1086 if (xmlInputSource == null) { 1087 // REVISIT: when systemId is null, I think we should return null. 1088 // is this the right solution? -SG 1089 //if (systemId != null) 1090 xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId); 1091 } 1092 1093 if (DEBUG_RESOLVER) { 1094 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1095 System.err.println(" = " + xmlInputSource); 1096 } 1097 1098 return xmlInputSource; 1099 1100 } // resolveEntity(XMLResourceIdentifier):XMLInputSource 1101 1102 /** 1103 * Starts a named entity. 1104 * 1105 * @param entityName The name of the entity to start. 1106 * @param literal True if this entity is started within a literal 1107 * value. 1108 * 1109 * @throws IOException Thrown on i/o error. 1110 * @throws XNIException Thrown by entity handler to signal an error. 1111 */ 1112 public void startEntity(String entityName, boolean literal) 1113 throws IOException, XNIException { 1114 1115 // was entity declared? 1116 Entity entity = (Entity)fEntityStorage.getEntity(entityName); 1117 if (entity == null) { 1118 if (fEntityHandler != null) { 1119 String encoding = null; 1120 fResourceIdentifier.clear(); 1121 fEntityAugs.removeAllItems(); 1122 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1123 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1124 fEntityAugs.removeAllItems(); 1125 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1126 fEntityHandler.endEntity(entityName, fEntityAugs); 1127 } 1128 return; 1129 } 1130 1131 // should we skip external entities? 1132 boolean external = entity.isExternal(); 1133 Entity.ExternalEntity externalEntity = null; 1134 String extLitSysId = null, extBaseSysId = null, expandedSystemId = null; 1135 if (external) { 1136 externalEntity = (Entity.ExternalEntity)entity; 1137 extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1138 extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1139 expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1140 boolean unparsed = entity.isUnparsed(); 1141 boolean parameter = entityName.startsWith("%"); 1142 boolean general = !parameter; 1143 if (unparsed || (general && !fExternalGeneralEntities) || 1144 (parameter && !fExternalParameterEntities) || 1145 !fSupportDTD || !fSupportExternalEntities) { 1146 1147 if (fEntityHandler != null) { 1148 fResourceIdentifier.clear(); 1149 final String encoding = null; 1150 fResourceIdentifier.setValues( 1151 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1152 extLitSysId, extBaseSysId, expandedSystemId); 1153 fEntityAugs.removeAllItems(); 1154 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1155 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1156 fEntityAugs.removeAllItems(); 1157 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1158 fEntityHandler.endEntity(entityName, fEntityAugs); 1159 } 1160 return; 1161 } 1162 } 1163 1164 // is entity recursive? 1165 int size = fEntityStack.size(); 1166 for (int i = size; i >= 0; i--) { 1167 Entity activeEntity = i == size 1168 ? fCurrentEntity 1169 : (Entity)fEntityStack.elementAt(i); 1170 if (activeEntity.name == entityName) { 1171 String path = entityName; 1172 for (int j = i + 1; j < size; j++) { 1173 activeEntity = (Entity)fEntityStack.elementAt(j); 1174 path = path + " -> " + activeEntity.name; 1175 } 1176 path = path + " -> " + fCurrentEntity.name; 1177 path = path + " -> " + entityName; 1178 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1179 "RecursiveReference", 1180 new Object[] { entityName, path }, 1181 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1182 1183 if (fEntityHandler != null) { 1184 fResourceIdentifier.clear(); 1185 final String encoding = null; 1186 if (external) { 1187 fResourceIdentifier.setValues( 1188 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1189 extLitSysId, extBaseSysId, expandedSystemId); 1190 } 1191 fEntityAugs.removeAllItems(); 1192 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1193 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1194 fEntityAugs.removeAllItems(); 1195 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1196 fEntityHandler.endEntity(entityName, fEntityAugs); 1197 } 1198 1199 return; 1200 } 1201 } 1202 1203 // resolve external entity 1204 StaxXMLInputSource staxInputSource = null; 1205 XMLInputSource xmlInputSource = null ; 1206 1207 if (external) { 1208 staxInputSource = resolveEntityAsPerStax(externalEntity.entityLocation); 1209 /** xxx: Waiting from the EG 1210 * //simply return if there was entity resolver registered and application 1211 * //returns either XMLStreamReader or XMLEventReader. 1212 * if(staxInputSource.hasXMLStreamOrXMLEventReader()) return ; 1213 */ 1214 xmlInputSource = staxInputSource.getXMLInputSource() ; 1215 if (!fISCreatedByResolver) { 1216 //let the not-LoadExternalDTD or not-SupportDTD process to handle the situation 1217 if (fLoadExternalDTD) { 1218 String accessError = SecuritySupport.checkAccess(expandedSystemId, fAccessExternalDTD, Constants.ACCESS_EXTERNAL_ALL); 1219 if (accessError != null) { 1220 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1221 "AccessExternalEntity", 1222 new Object[] { SecuritySupport.sanitizePath(expandedSystemId), accessError }, 1223 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1224 } 1225 } 1226 } 1227 } 1228 // wrap internal entity 1229 else { 1230 Entity.InternalEntity internalEntity = (Entity.InternalEntity)entity; 1231 Reader reader = new StringReader(internalEntity.text); 1232 xmlInputSource = new XMLInputSource(null, null, null, reader, null); 1233 } 1234 1235 // start the entity 1236 startEntity(entityName, xmlInputSource, literal, external); 1237 1238 } // startEntity(String,boolean) 1239 1240 /** 1241 * Starts the document entity. The document entity has the "[xml]" 1242 * pseudo-name. 1243 * 1244 * @param xmlInputSource The input source of the document entity. 1245 * 1246 * @throws IOException Thrown on i/o error. 1247 * @throws XNIException Thrown by entity handler to signal an error. 1248 */ 1249 public void startDocumentEntity(XMLInputSource xmlInputSource) 1250 throws IOException, XNIException { 1251 startEntity(XMLEntity, xmlInputSource, false, true); 1252 } // startDocumentEntity(XMLInputSource) 1253 1254 //xxx these methods are not required. 1255 /** 1256 * Starts the DTD entity. The DTD entity has the "[dtd]" 1257 * pseudo-name. 1258 * 1259 * @param xmlInputSource The input source of the DTD entity. 1260 * 1261 * @throws IOException Thrown on i/o error. 1262 * @throws XNIException Thrown by entity handler to signal an error. 1263 */ 1264 public void startDTDEntity(XMLInputSource xmlInputSource) 1265 throws IOException, XNIException { 1266 startEntity(DTDEntity, xmlInputSource, false, true); 1267 } // startDTDEntity(XMLInputSource) 1268 1269 // indicate start of external subset so that 1270 // location of entity decls can be tracked 1271 public void startExternalSubset() { 1272 fInExternalSubset = true; 1273 } 1274 1275 public void endExternalSubset() { 1276 fInExternalSubset = false; 1277 } 1278 1279 /** 1280 * Starts an entity. 1281 * <p> 1282 * This method can be used to insert an application defined XML 1283 * entity stream into the parsing stream. 1284 * 1285 * @param name The name of the entity. 1286 * @param xmlInputSource The input source of the entity. 1287 * @param literal True if this entity is started within a 1288 * literal value. 1289 * @param isExternal whether this entity should be treated as an internal or external entity. 1290 * 1291 * @throws IOException Thrown on i/o error. 1292 * @throws XNIException Thrown by entity handler to signal an error. 1293 */ 1294 public void startEntity(String name, 1295 XMLInputSource xmlInputSource, 1296 boolean literal, boolean isExternal) 1297 throws IOException, XNIException { 1298 1299 String encoding = setupCurrentEntity(name, xmlInputSource, literal, isExternal); 1300 1301 //when entity expansion limit is set by the Application, we need to 1302 //check for the entity expansion limit set by the parser, if number of entity 1303 //expansions exceeds the entity expansion limit, parser will throw fatal error. 1304 // Note that this represents the nesting level of open entities. 1305 fEntityExpansionCount++; 1306 if(fLimitAnalyzer != null) { 1307 fLimitAnalyzer.addValue(entityExpansionIndex, name, 1); 1308 } 1309 if( fSecurityManager != null && fSecurityManager.isOverLimit(entityExpansionIndex, fLimitAnalyzer)){ 1310 fSecurityManager.debugPrint(fLimitAnalyzer); 1311 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,"EntityExpansionLimitExceeded", 1312 new Object[]{fSecurityManager.getLimitValueByIndex(entityExpansionIndex)}, 1313 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1314 // is there anything better to do than reset the counter? 1315 // at least one can envision debugging applications where this might 1316 // be useful... 1317 fEntityExpansionCount = 0; 1318 } 1319 1320 // call handler 1321 if (fEntityHandler != null) { 1322 fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null); 1323 } 1324 1325 } // startEntity(String,XMLInputSource) 1326 1327 /** 1328 * Return the current entity being scanned. Current entity is SET using startEntity function. 1329 * @return Entity.ScannedEntity 1330 */ 1331 1332 public Entity.ScannedEntity getCurrentEntity(){ 1333 return fCurrentEntity ; 1334 } 1335 1336 /** 1337 * Return the top level entity handled by this manager, or null 1338 * if no entity was added. 1339 */ 1340 public Entity.ScannedEntity getTopLevelEntity() { 1341 return (Entity.ScannedEntity) 1342 (fEntityStack.empty() ? null : fEntityStack.elementAt(0)); 1343 } 1344 1345 1346 /** 1347 * Close all opened InputStreams and Readers opened by this parser. 1348 */ 1349 public void closeReaders() { 1350 /** this call actually does nothing, readers are closed in the endEntity method 1351 * through the current entity. 1352 * The change seems to have happened during the jdk6 development with the 1353 * addition of StAX 1354 **/ 1355 } 1356 1357 public void endEntity() throws IOException, XNIException { 1358 1359 // call handler 1360 if (DEBUG_BUFFER) { 1361 System.out.print("(endEntity: "); 1362 print(); 1363 System.out.println(); 1364 } 1365 //pop the entity from the stack 1366 Entity.ScannedEntity entity = fEntityStack.size() > 0 ? (Entity.ScannedEntity)fEntityStack.pop() : null ; 1367 1368 /** need to close the reader first since the program can end 1369 * prematurely (e.g. fEntityHandler.endEntity may throw exception) 1370 * leaving the reader open 1371 */ 1372 //close the reader 1373 if(fCurrentEntity != null){ 1374 //close the reader 1375 try{ 1376 if (fLimitAnalyzer != null) { 1377 fLimitAnalyzer.endEntity(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntity.name); 1378 if (fCurrentEntity.name.equals("[xml]")) { 1379 fSecurityManager.debugPrint(fLimitAnalyzer); 1380 } 1381 } 1382 fCurrentEntity.close(); 1383 }catch(IOException ex){ 1384 throw new XNIException(ex); 1385 } 1386 } 1387 1388 if (fEntityHandler != null) { 1389 //so this is the last opened entity, signal it to current fEntityHandler using Augmentation 1390 if(entity == null){ 1391 fEntityAugs.removeAllItems(); 1392 fEntityAugs.putItem(Constants.LAST_ENTITY, Boolean.TRUE); 1393 fEntityHandler.endEntity(fCurrentEntity.name, fEntityAugs); 1394 fEntityAugs.removeAllItems(); 1395 }else{ 1396 fEntityHandler.endEntity(fCurrentEntity.name, null); 1397 } 1398 } 1399 //check if it is a document entity 1400 boolean documentEntity = fCurrentEntity.name == XMLEntity; 1401 1402 //set popped entity as current entity 1403 fCurrentEntity = entity; 1404 fEntityScanner.setCurrentEntity(fCurrentEntity); 1405 1406 //check if there are any entity left in the stack -- if there are 1407 //no entries EOF has been reached. 1408 // throw exception when it is the last entity but it is not a document entity 1409 1410 if(fCurrentEntity == null & !documentEntity){ 1411 throw new EOFException() ; 1412 } 1413 1414 if (DEBUG_BUFFER) { 1415 System.out.print(")endEntity: "); 1416 print(); 1417 System.out.println(); 1418 } 1419 1420 } // endEntity() 1421 1422 1423 // 1424 // XMLComponent methods 1425 // 1426 public void reset(PropertyManager propertyManager){ 1427 //reset fEntityStorage 1428 fEntityStorage.reset(propertyManager); 1429 //reset XMLEntityReaderImpl 1430 fEntityScanner.reset(propertyManager); 1431 // xerces properties 1432 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 1433 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 1434 try { 1435 fStaxEntityResolver = (StaxEntityResolverWrapper)propertyManager.getProperty(STAX_ENTITY_RESOLVER); 1436 } catch (XMLConfigurationException e) { 1437 fStaxEntityResolver = null; 1438 } 1439 1440 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 1441 fReplaceEntityReferences = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES)).booleanValue(); 1442 fSupportExternalEntities = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES)).booleanValue(); 1443 1444 // Zephyr feature ignore-external-dtd is the opposite of Xerces' load-external-dtd 1445 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 1446 1447 // JAXP 1.5 feature 1448 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 1449 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1450 1451 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 1452 1453 // initialize state 1454 //fStandalone = false; 1455 fEntities.clear(); 1456 fEntityStack.removeAllElements(); 1457 fCurrentEntity = null; 1458 fValidation = false; 1459 fExternalGeneralEntities = true; 1460 fExternalParameterEntities = true; 1461 fAllowJavaEncodings = true ; 1462 } 1463 1464 /** 1465 * Resets the component. The component can query the component manager 1466 * about any features and properties that affect the operation of the 1467 * component. 1468 * 1469 * @param componentManager The component manager. 1470 * 1471 * @throws SAXException Thrown by component on initialization error. 1472 * For example, if a feature or property is 1473 * required for the operation of the component, the 1474 * component manager may throw a 1475 * SAXNotRecognizedException or a 1476 * SAXNotSupportedException. 1477 */ 1478 public void reset(XMLComponentManager componentManager) 1479 throws XMLConfigurationException { 1480 1481 boolean parser_settings = componentManager.getFeature(PARSER_SETTINGS, true); 1482 1483 if (!parser_settings) { 1484 // parser settings have not been changed 1485 reset(); 1486 if(fEntityScanner != null){ 1487 fEntityScanner.reset(componentManager); 1488 } 1489 if(fEntityStorage != null){ 1490 fEntityStorage.reset(componentManager); 1491 } 1492 return; 1493 } 1494 1495 // sax features 1496 fValidation = componentManager.getFeature(VALIDATION, false); 1497 fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES, true); 1498 fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES, true); 1499 1500 // xerces features 1501 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 1502 fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF, false); 1503 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 1504 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 1505 1506 // xerces properties 1507 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 1508 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 1509 fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER, null); 1510 fStaxEntityResolver = (StaxEntityResolverWrapper)componentManager.getProperty(STAX_ENTITY_RESOLVER, null); 1511 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 1512 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER, null); 1513 entityExpansionIndex = fSecurityManager.getIndex(Constants.JDK_ENTITY_EXPANSION_LIMIT); 1514 1515 //StAX Property 1516 fSupportDTD = true; 1517 fReplaceEntityReferences = true; 1518 fSupportExternalEntities = true; 1519 1520 // JAXP 1.5 feature 1521 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 1522 if (spm == null) { 1523 spm = new XMLSecurityPropertyManager(); 1524 } 1525 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1526 1527 //reset general state 1528 reset(); 1529 1530 fEntityScanner.reset(componentManager); 1531 fEntityStorage.reset(componentManager); 1532 1533 } // reset(XMLComponentManager) 1534 1535 // reset general state. Should not be called other than by 1536 // a class acting as a component manager but not 1537 // implementing that interface for whatever reason. 1538 public void reset() { 1539 1540 // initialize state 1541 fStandalone = false; 1542 fEntities.clear(); 1543 fEntityStack.removeAllElements(); 1544 fEntityExpansionCount = 0; 1545 1546 fCurrentEntity = null; 1547 // reset scanner 1548 if(fXML10EntityScanner != null){ 1549 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1550 } 1551 if(fXML11EntityScanner != null) { 1552 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1553 } 1554 1555 // DEBUG 1556 if (DEBUG_ENTITIES) { 1557 addInternalEntity("text", "Hello, World."); 1558 addInternalEntity("empty-element", "<foo/>"); 1559 addInternalEntity("balanced-element", "<foo></foo>"); 1560 addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 1561 addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 1562 addInternalEntity("unbalanced-entity", "<foo>"); 1563 addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 1564 addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 1565 addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 1566 try { 1567 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml"); 1568 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml"); 1569 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml"); 1570 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml"); 1571 } 1572 catch (IOException ex) { 1573 // should never happen 1574 } 1575 } 1576 1577 fEntityHandler = null; 1578 1579 // reset scanner 1580 //if(fEntityScanner!=null) 1581 // fEntityScanner.reset(fSymbolTable, this,fErrorReporter); 1582 1583 } 1584 /** 1585 * Returns a list of feature identifiers that are recognized by 1586 * this component. This method may return null if no features 1587 * are recognized by this component. 1588 */ 1589 public String[] getRecognizedFeatures() { 1590 return (String[])(RECOGNIZED_FEATURES.clone()); 1591 } // getRecognizedFeatures():String[] 1592 1593 /** 1594 * Sets the state of a feature. This method is called by the component 1595 * manager any time after reset when a feature changes state. 1596 * <p> 1597 * <strong>Note:</strong> Components should silently ignore features 1598 * that do not affect the operation of the component. 1599 * 1600 * @param featureId The feature identifier. 1601 * @param state The state of the feature. 1602 * 1603 * @throws SAXNotRecognizedException The component should not throw 1604 * this exception. 1605 * @throws SAXNotSupportedException The component should not throw 1606 * this exception. 1607 */ 1608 public void setFeature(String featureId, boolean state) 1609 throws XMLConfigurationException { 1610 1611 // xerces features 1612 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 1613 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 1614 if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() && 1615 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) { 1616 fAllowJavaEncodings = state; 1617 } 1618 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 1619 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 1620 fLoadExternalDTD = state; 1621 return; 1622 } 1623 } 1624 1625 } // setFeature(String,boolean) 1626 1627 /** 1628 * Sets the value of a property. This method is called by the component 1629 * manager any time after reset when a property changes value. 1630 * <p> 1631 * <strong>Note:</strong> Components should silently ignore properties 1632 * that do not affect the operation of the component. 1633 * 1634 * @param propertyId The property identifier. 1635 * @param value The value of the property. 1636 * 1637 * @throws SAXNotRecognizedException The component should not throw 1638 * this exception. 1639 * @throws SAXNotSupportedException The component should not throw 1640 * this exception. 1641 */ 1642 public void setProperty(String propertyId, Object value){ 1643 // Xerces properties 1644 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 1645 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 1646 1647 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() && 1648 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) { 1649 fSymbolTable = (SymbolTable)value; 1650 return; 1651 } 1652 if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() && 1653 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) { 1654 fErrorReporter = (XMLErrorReporter)value; 1655 return; 1656 } 1657 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 1658 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 1659 fEntityResolver = (XMLEntityResolver)value; 1660 return; 1661 } 1662 if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() && 1663 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) { 1664 Integer bufferSize = (Integer)value; 1665 if (bufferSize != null && 1666 bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { 1667 fBufferSize = bufferSize.intValue(); 1668 fEntityScanner.setBufferSize(fBufferSize); 1669 fBufferPool.setExternalBufferSize(fBufferSize); 1670 } 1671 } 1672 if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && 1673 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { 1674 fSecurityManager = (XMLSecurityManager)value; 1675 } 1676 } 1677 1678 //JAXP 1.5 properties 1679 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 1680 { 1681 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 1682 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1683 } 1684 } 1685 1686 public void setLimitAnalyzer(XMLLimitAnalyzer fLimitAnalyzer) { 1687 this.fLimitAnalyzer = fLimitAnalyzer; 1688 } 1689 1690 /** 1691 * Returns a list of property identifiers that are recognized by 1692 * this component. This method may return null if no properties 1693 * are recognized by this component. 1694 */ 1695 public String[] getRecognizedProperties() { 1696 return (String[])(RECOGNIZED_PROPERTIES.clone()); 1697 } // getRecognizedProperties():String[] 1698 /** 1699 * Returns the default state for a feature, or null if this 1700 * component does not want to report a default value for this 1701 * feature. 1702 * 1703 * @param featureId The feature identifier. 1704 * 1705 * @since Xerces 2.2.0 1706 */ 1707 public Boolean getFeatureDefault(String featureId) { 1708 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 1709 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 1710 return FEATURE_DEFAULTS[i]; 1711 } 1712 } 1713 return null; 1714 } // getFeatureDefault(String):Boolean 1715 1716 /** 1717 * Returns the default state for a property, or null if this 1718 * component does not want to report a default value for this 1719 * property. 1720 * 1721 * @param propertyId The property identifier. 1722 * 1723 * @since Xerces 2.2.0 1724 */ 1725 public Object getPropertyDefault(String propertyId) { 1726 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 1727 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 1728 return PROPERTY_DEFAULTS[i]; 1729 } 1730 } 1731 return null; 1732 } // getPropertyDefault(String):Object 1733 1734 // 1735 // Public static methods 1736 // 1737 1738 /** 1739 * Expands a system id and returns the system id as a URI, if 1740 * it can be expanded. A return value of null means that the 1741 * identifier is already expanded. An exception thrown 1742 * indicates a failure to expand the id. 1743 * 1744 * @param systemId The systemId to be expanded. 1745 * 1746 * @return Returns the URI string representing the expanded system 1747 * identifier. A null value indicates that the given 1748 * system identifier is already expanded. 1749 * 1750 */ 1751 public static String expandSystemId(String systemId) { 1752 return expandSystemId(systemId, null); 1753 } // expandSystemId(String):String 1754 1755 // 1756 // Public static methods 1757 // 1758 1759 // current value of the "user.dir" property 1760 private static String gUserDir; 1761 // cached URI object for the current value of the escaped "user.dir" property stored as a URI 1762 private static URI gUserDirURI; 1763 // which ASCII characters need to be escaped 1764 private static boolean gNeedEscaping[] = new boolean[128]; 1765 // the first hex character if a character needs to be escaped 1766 private static char gAfterEscaping1[] = new char[128]; 1767 // the second hex character if a character needs to be escaped 1768 private static char gAfterEscaping2[] = new char[128]; 1769 private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', 1770 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 1771 // initialize the above 3 arrays 1772 static { 1773 for (int i = 0; i <= 0x1f; i++) { 1774 gNeedEscaping[i] = true; 1775 gAfterEscaping1[i] = gHexChs[i >> 4]; 1776 gAfterEscaping2[i] = gHexChs[i & 0xf]; 1777 } 1778 gNeedEscaping[0x7f] = true; 1779 gAfterEscaping1[0x7f] = '7'; 1780 gAfterEscaping2[0x7f] = 'F'; 1781 char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}', 1782 '|', '\\', '^', '~', '[', ']', '`'}; 1783 int len = escChs.length; 1784 char ch; 1785 for (int i = 0; i < len; i++) { 1786 ch = escChs[i]; 1787 gNeedEscaping[ch] = true; 1788 gAfterEscaping1[ch] = gHexChs[ch >> 4]; 1789 gAfterEscaping2[ch] = gHexChs[ch & 0xf]; 1790 } 1791 } 1792 1793 // To escape the "user.dir" system property, by using %HH to represent 1794 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%' 1795 // and '"'. It's a static method, so needs to be synchronized. 1796 // this method looks heavy, but since the system property isn't expected 1797 // to change often, so in most cases, we only need to return the URI 1798 // that was escaped before. 1799 // According to the URI spec, non-ASCII characters (whose value >= 128) 1800 // need to be escaped too. 1801 // REVISIT: don't know how to escape non-ASCII characters, especially 1802 // which encoding to use. Leave them for now. 1803 private static synchronized URI getUserDir() throws URI.MalformedURIException { 1804 // get the user.dir property 1805 String userDir = ""; 1806 try { 1807 userDir = SecuritySupport.getSystemProperty("user.dir"); 1808 } 1809 catch (SecurityException se) { 1810 } 1811 1812 // return empty string if property value is empty string. 1813 if (userDir.length() == 0) 1814 return new URI("file", "", "", null, null); 1815 // compute the new escaped value if the new property value doesn't 1816 // match the previous one 1817 if (gUserDirURI != null && userDir.equals(gUserDir)) { 1818 return gUserDirURI; 1819 } 1820 1821 // record the new value as the global property value 1822 gUserDir = userDir; 1823 1824 char separator = java.io.File.separatorChar; 1825 userDir = userDir.replace(separator, '/'); 1826 1827 int len = userDir.length(), ch; 1828 StringBuilder buffer = new StringBuilder(len*3); 1829 // change C:/blah to /C:/blah 1830 if (len >= 2 && userDir.charAt(1) == ':') { 1831 ch = Character.toUpperCase(userDir.charAt(0)); 1832 if (ch >= 'A' && ch <= 'Z') { 1833 buffer.append('/'); 1834 } 1835 } 1836 1837 // for each character in the path 1838 int i = 0; 1839 for (; i < len; i++) { 1840 ch = userDir.charAt(i); 1841 // if it's not an ASCII character, break here, and use UTF-8 encoding 1842 if (ch >= 128) 1843 break; 1844 if (gNeedEscaping[ch]) { 1845 buffer.append('%'); 1846 buffer.append(gAfterEscaping1[ch]); 1847 buffer.append(gAfterEscaping2[ch]); 1848 // record the fact that it's escaped 1849 } 1850 else { 1851 buffer.append((char)ch); 1852 } 1853 } 1854 1855 // we saw some non-ascii character 1856 if (i < len) { 1857 // get UTF-8 bytes for the remaining sub-string 1858 byte[] bytes = null; 1859 byte b; 1860 try { 1861 bytes = userDir.substring(i).getBytes("UTF-8"); 1862 } catch (java.io.UnsupportedEncodingException e) { 1863 // should never happen 1864 return new URI("file", "", userDir, null, null); 1865 } 1866 len = bytes.length; 1867 1868 // for each byte 1869 for (i = 0; i < len; i++) { 1870 b = bytes[i]; 1871 // for non-ascii character: make it positive, then escape 1872 if (b < 0) { 1873 ch = b + 256; 1874 buffer.append('%'); 1875 buffer.append(gHexChs[ch >> 4]); 1876 buffer.append(gHexChs[ch & 0xf]); 1877 } 1878 else if (gNeedEscaping[b]) { 1879 buffer.append('%'); 1880 buffer.append(gAfterEscaping1[b]); 1881 buffer.append(gAfterEscaping2[b]); 1882 } 1883 else { 1884 buffer.append((char)b); 1885 } 1886 } 1887 } 1888 1889 // change blah/blah to blah/blah/ 1890 if (!userDir.endsWith("/")) 1891 buffer.append('/'); 1892 1893 gUserDirURI = new URI("file", "", buffer.toString(), null, null); 1894 1895 return gUserDirURI; 1896 } 1897 1898 public static OutputStream createOutputStream(String uri) throws IOException { 1899 // URI was specified. Handle relative URIs. 1900 final String expanded = XMLEntityManager.expandSystemId(uri, null, true); 1901 final URL url = new URL(expanded != null ? expanded : uri); 1902 OutputStream out = null; 1903 String protocol = url.getProtocol(); 1904 String host = url.getHost(); 1905 // Use FileOutputStream if this URI is for a local file. 1906 if (protocol.equals("file") 1907 && (host == null || host.length() == 0 || host.equals("localhost"))) { 1908 File file = new File(getPathWithoutEscapes(url.getPath())); 1909 if (!file.exists()) { 1910 File parent = file.getParentFile(); 1911 if (parent != null && !parent.exists()) { 1912 parent.mkdirs(); 1913 } 1914 } 1915 out = new FileOutputStream(file); 1916 } 1917 // Try to write to some other kind of URI. Some protocols 1918 // won't support this, though HTTP should work. 1919 else { 1920 URLConnection urlCon = url.openConnection(); 1921 urlCon.setDoInput(false); 1922 urlCon.setDoOutput(true); 1923 urlCon.setUseCaches(false); // Enable tunneling. 1924 if (urlCon instanceof HttpURLConnection) { 1925 // The DOM L3 REC says if we are writing to an HTTP URI 1926 // it is to be done with an HTTP PUT. 1927 HttpURLConnection httpCon = (HttpURLConnection) urlCon; 1928 httpCon.setRequestMethod("PUT"); 1929 } 1930 out = urlCon.getOutputStream(); 1931 } 1932 return out; 1933 } 1934 1935 private static String getPathWithoutEscapes(String origPath) { 1936 if (origPath != null && origPath.length() != 0 && origPath.indexOf('%') != -1) { 1937 // Locate the escape characters 1938 StringTokenizer tokenizer = new StringTokenizer(origPath, "%"); 1939 StringBuilder result = new StringBuilder(origPath.length()); 1940 int size = tokenizer.countTokens(); 1941 result.append(tokenizer.nextToken()); 1942 for(int i = 1; i < size; ++i) { 1943 String token = tokenizer.nextToken(); 1944 // Decode the 2 digit hexadecimal number following % in '%nn' 1945 result.append((char)Integer.valueOf(token.substring(0, 2), 16).intValue()); 1946 result.append(token.substring(2)); 1947 } 1948 return result.toString(); 1949 } 1950 return origPath; 1951 } 1952 1953 /** 1954 * Absolutizes a URI using the current value 1955 * of the "user.dir" property as the base URI. If 1956 * the URI is already absolute, this is a no-op. 1957 * 1958 * @param uri the URI to absolutize 1959 */ 1960 public static void absolutizeAgainstUserDir(URI uri) 1961 throws URI.MalformedURIException { 1962 uri.absolutize(getUserDir()); 1963 } 1964 1965 /** 1966 * Expands a system id and returns the system id as a URI, if 1967 * it can be expanded. A return value of null means that the 1968 * identifier is already expanded. An exception thrown 1969 * indicates a failure to expand the id. 1970 * 1971 * @param systemId The systemId to be expanded. 1972 * 1973 * @return Returns the URI string representing the expanded system 1974 * identifier. A null value indicates that the given 1975 * system identifier is already expanded. 1976 * 1977 */ 1978 public static String expandSystemId(String systemId, String baseSystemId) { 1979 1980 // check for bad parameters id 1981 if (systemId == null || systemId.length() == 0) { 1982 return systemId; 1983 } 1984 // if id already expanded, return 1985 try { 1986 URI uri = new URI(systemId); 1987 if (uri != null) { 1988 return systemId; 1989 } 1990 } catch (URI.MalformedURIException e) { 1991 // continue on... 1992 } 1993 // normalize id 1994 String id = fixURI(systemId); 1995 1996 // normalize base 1997 URI base = null; 1998 URI uri = null; 1999 try { 2000 if (baseSystemId == null || baseSystemId.length() == 0 || 2001 baseSystemId.equals(systemId)) { 2002 String dir = getUserDir().toString(); 2003 base = new URI("file", "", dir, null, null); 2004 } else { 2005 try { 2006 base = new URI(fixURI(baseSystemId)); 2007 } catch (URI.MalformedURIException e) { 2008 if (baseSystemId.indexOf(':') != -1) { 2009 // for xml schemas we might have baseURI with 2010 // a specified drive 2011 base = new URI("file", "", fixURI(baseSystemId), null, null); 2012 } else { 2013 String dir = getUserDir().toString(); 2014 dir = dir + fixURI(baseSystemId); 2015 base = new URI("file", "", dir, null, null); 2016 } 2017 } 2018 } 2019 // expand id 2020 uri = new URI(base, id); 2021 } catch (Exception e) { 2022 // let it go through 2023 2024 } 2025 2026 if (uri == null) { 2027 return systemId; 2028 } 2029 return uri.toString(); 2030 2031 } // expandSystemId(String,String):String 2032 2033 /** 2034 * Expands a system id and returns the system id as a URI, if 2035 * it can be expanded. A return value of null means that the 2036 * identifier is already expanded. An exception thrown 2037 * indicates a failure to expand the id. 2038 * 2039 * @param systemId The systemId to be expanded. 2040 * 2041 * @return Returns the URI string representing the expanded system 2042 * identifier. A null value indicates that the given 2043 * system identifier is already expanded. 2044 * 2045 */ 2046 public static String expandSystemId(String systemId, String baseSystemId, 2047 boolean strict) 2048 throws URI.MalformedURIException { 2049 2050 // check if there is a system id before 2051 // trying to expand it. 2052 if (systemId == null) { 2053 return null; 2054 } 2055 2056 // system id has to be a valid URI 2057 if (strict) { 2058 2059 2060 // check if there is a system id before 2061 // trying to expand it. 2062 if (systemId == null) { 2063 return null; 2064 } 2065 2066 try { 2067 // if it's already an absolute one, return it 2068 new URI(systemId); 2069 return systemId; 2070 } 2071 catch (URI.MalformedURIException ex) { 2072 } 2073 URI base = null; 2074 // if there isn't a base uri, use the working directory 2075 if (baseSystemId == null || baseSystemId.length() == 0) { 2076 base = new URI("file", "", getUserDir().toString(), null, null); 2077 } 2078 // otherwise, use the base uri 2079 else { 2080 try { 2081 base = new URI(baseSystemId); 2082 } 2083 catch (URI.MalformedURIException e) { 2084 // assume "base" is also a relative uri 2085 String dir = getUserDir().toString(); 2086 dir = dir + baseSystemId; 2087 base = new URI("file", "", dir, null, null); 2088 } 2089 } 2090 // absolutize the system id using the base 2091 URI uri = new URI(base, systemId); 2092 // return the string rep of the new uri (an absolute one) 2093 return uri.toString(); 2094 2095 // if any exception is thrown, it'll get thrown to the caller. 2096 } 2097 2098 // Assume the URIs are well-formed. If it turns out they're not, try fixing them up. 2099 try { 2100 return expandSystemIdStrictOff(systemId, baseSystemId); 2101 } 2102 catch (URI.MalformedURIException e) { 2103 /** Xerces URI rejects unicode, try java.net.URI 2104 * this is not ideal solution, but it covers known cases which either 2105 * Xerces URI or java.net.URI can handle alone 2106 * will file bug against java.net.URI 2107 */ 2108 try { 2109 return expandSystemIdStrictOff1(systemId, baseSystemId); 2110 } catch (URISyntaxException ex) { 2111 // continue on... 2112 } 2113 } 2114 // check for bad parameters id 2115 if (systemId.length() == 0) { 2116 return systemId; 2117 } 2118 2119 // normalize id 2120 String id = fixURI(systemId); 2121 2122 // normalize base 2123 URI base = null; 2124 URI uri = null; 2125 try { 2126 if (baseSystemId == null || baseSystemId.length() == 0 || 2127 baseSystemId.equals(systemId)) { 2128 base = getUserDir(); 2129 } 2130 else { 2131 try { 2132 base = new URI(fixURI(baseSystemId).trim()); 2133 } 2134 catch (URI.MalformedURIException e) { 2135 if (baseSystemId.indexOf(':') != -1) { 2136 // for xml schemas we might have baseURI with 2137 // a specified drive 2138 base = new URI("file", "", fixURI(baseSystemId).trim(), null, null); 2139 } 2140 else { 2141 base = new URI(getUserDir(), fixURI(baseSystemId)); 2142 } 2143 } 2144 } 2145 // expand id 2146 uri = new URI(base, id.trim()); 2147 } 2148 catch (Exception e) { 2149 // let it go through 2150 2151 } 2152 2153 if (uri == null) { 2154 return systemId; 2155 } 2156 return uri.toString(); 2157 2158 } // expandSystemId(String,String,boolean):String 2159 2160 /** 2161 * Helper method for expandSystemId(String,String,boolean):String 2162 */ 2163 private static String expandSystemIdStrictOn(String systemId, String baseSystemId) 2164 throws URI.MalformedURIException { 2165 2166 URI systemURI = new URI(systemId, true); 2167 // If it's already an absolute one, return it 2168 if (systemURI.isAbsoluteURI()) { 2169 return systemId; 2170 } 2171 2172 // If there isn't a base URI, use the working directory 2173 URI baseURI = null; 2174 if (baseSystemId == null || baseSystemId.length() == 0) { 2175 baseURI = getUserDir(); 2176 } 2177 else { 2178 baseURI = new URI(baseSystemId, true); 2179 if (!baseURI.isAbsoluteURI()) { 2180 // assume "base" is also a relative uri 2181 baseURI.absolutize(getUserDir()); 2182 } 2183 } 2184 2185 // absolutize the system identifier using the base URI 2186 systemURI.absolutize(baseURI); 2187 2188 // return the string rep of the new uri (an absolute one) 2189 return systemURI.toString(); 2190 2191 // if any exception is thrown, it'll get thrown to the caller. 2192 2193 } // expandSystemIdStrictOn(String,String):String 2194 2195 /** 2196 * Helper method for expandSystemId(String,String,boolean):String 2197 */ 2198 private static String expandSystemIdStrictOff(String systemId, String baseSystemId) 2199 throws URI.MalformedURIException { 2200 2201 URI systemURI = new URI(systemId, true); 2202 // If it's already an absolute one, return it 2203 if (systemURI.isAbsoluteURI()) { 2204 if (systemURI.getScheme().length() > 1) { 2205 return systemId; 2206 } 2207 /** 2208 * If the scheme's length is only one character, 2209 * it's likely that this was intended as a file 2210 * path. Fixing this up in expandSystemId to 2211 * maintain backwards compatibility. 2212 */ 2213 throw new URI.MalformedURIException(); 2214 } 2215 2216 // If there isn't a base URI, use the working directory 2217 URI baseURI = null; 2218 if (baseSystemId == null || baseSystemId.length() == 0) { 2219 baseURI = getUserDir(); 2220 } 2221 else { 2222 baseURI = new URI(baseSystemId, true); 2223 if (!baseURI.isAbsoluteURI()) { 2224 // assume "base" is also a relative uri 2225 baseURI.absolutize(getUserDir()); 2226 } 2227 } 2228 2229 // absolutize the system identifier using the base URI 2230 systemURI.absolutize(baseURI); 2231 2232 // return the string rep of the new uri (an absolute one) 2233 return systemURI.toString(); 2234 2235 // if any exception is thrown, it'll get thrown to the caller. 2236 2237 } // expandSystemIdStrictOff(String,String):String 2238 2239 private static String expandSystemIdStrictOff1(String systemId, String baseSystemId) 2240 throws URISyntaxException, URI.MalformedURIException { 2241 2242 java.net.URI systemURI = new java.net.URI(systemId); 2243 // If it's already an absolute one, return it 2244 if (systemURI.isAbsolute()) { 2245 if (systemURI.getScheme().length() > 1) { 2246 return systemId; 2247 } 2248 /** 2249 * If the scheme's length is only one character, 2250 * it's likely that this was intended as a file 2251 * path. Fixing this up in expandSystemId to 2252 * maintain backwards compatibility. 2253 */ 2254 throw new URISyntaxException(systemId, "the scheme's length is only one character"); 2255 } 2256 2257 // If there isn't a base URI, use the working directory 2258 URI baseURI = null; 2259 if (baseSystemId == null || baseSystemId.length() == 0) { 2260 baseURI = getUserDir(); 2261 } 2262 else { 2263 baseURI = new URI(baseSystemId, true); 2264 if (!baseURI.isAbsoluteURI()) { 2265 // assume "base" is also a relative uri 2266 baseURI.absolutize(getUserDir()); 2267 } 2268 } 2269 2270 // absolutize the system identifier using the base URI 2271 // systemURI.absolutize(baseURI); 2272 systemURI = (new java.net.URI(baseURI.toString())).resolve(systemURI); 2273 2274 // return the string rep of the new uri (an absolute one) 2275 return systemURI.toString(); 2276 2277 // if any exception is thrown, it'll get thrown to the caller. 2278 2279 } // expandSystemIdStrictOff(String,String):String 2280 2281 // 2282 // Protected methods 2283 // 2284 2285 2286 /** 2287 * Returns the IANA encoding name that is auto-detected from 2288 * the bytes specified, with the endian-ness of that encoding where appropriate. 2289 * 2290 * @param b4 The first four bytes of the input. 2291 * @param count The number of bytes actually read. 2292 * @return a 2-element array: the first element, an IANA-encoding string, 2293 * the second element a Boolean which is true iff the document is big endian, false 2294 * if it's little-endian, and null if the distinction isn't relevant. 2295 */ 2296 protected Object[] getEncodingName(byte[] b4, int count) { 2297 2298 if (count < 2) { 2299 return defaultEncoding; 2300 } 2301 2302 // UTF-16, with BOM 2303 int b0 = b4[0] & 0xFF; 2304 int b1 = b4[1] & 0xFF; 2305 if (b0 == 0xFE && b1 == 0xFF) { 2306 // UTF-16, big-endian 2307 return new Object [] {"UTF-16BE", new Boolean(true)}; 2308 } 2309 if (b0 == 0xFF && b1 == 0xFE) { 2310 // UTF-16, little-endian 2311 return new Object [] {"UTF-16LE", new Boolean(false)}; 2312 } 2313 2314 // default to UTF-8 if we don't have enough bytes to make a 2315 // good determination of the encoding 2316 if (count < 3) { 2317 return defaultEncoding; 2318 } 2319 2320 // UTF-8 with a BOM 2321 int b2 = b4[2] & 0xFF; 2322 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 2323 return defaultEncoding; 2324 } 2325 2326 // default to UTF-8 if we don't have enough bytes to make a 2327 // good determination of the encoding 2328 if (count < 4) { 2329 return defaultEncoding; 2330 } 2331 2332 // other encodings 2333 int b3 = b4[3] & 0xFF; 2334 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2335 // UCS-4, big endian (1234) 2336 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2337 } 2338 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2339 // UCS-4, little endian (4321) 2340 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2341 } 2342 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2343 // UCS-4, unusual octet order (2143) 2344 // REVISIT: What should this be? 2345 return new Object [] {"ISO-10646-UCS-4", null}; 2346 } 2347 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2348 // UCS-4, unusual octect order (3412) 2349 // REVISIT: What should this be? 2350 return new Object [] {"ISO-10646-UCS-4", null}; 2351 } 2352 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2353 // UTF-16, big-endian, no BOM 2354 // (or could turn out to be UCS-2... 2355 // REVISIT: What should this be? 2356 return new Object [] {"UTF-16BE", new Boolean(true)}; 2357 } 2358 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2359 // UTF-16, little-endian, no BOM 2360 // (or could turn out to be UCS-2... 2361 return new Object [] {"UTF-16LE", new Boolean(false)}; 2362 } 2363 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2364 // EBCDIC 2365 // a la xerces1, return CP037 instead of EBCDIC here 2366 return new Object [] {"CP037", null}; 2367 } 2368 2369 return defaultEncoding; 2370 2371 } // getEncodingName(byte[],int):Object[] 2372 2373 /** 2374 * Creates a reader capable of reading the given input stream in 2375 * the specified encoding. 2376 * 2377 * @param inputStream The input stream. 2378 * @param encoding The encoding name that the input stream is 2379 * encoded using. If the user has specified that 2380 * Java encoding names are allowed, then the 2381 * encoding name may be a Java encoding name; 2382 * otherwise, it is an ianaEncoding name. 2383 * @param isBigEndian For encodings (like uCS-4), whose names cannot 2384 * specify a byte order, this tells whether the order is bigEndian. null menas 2385 * unknown or not relevant. 2386 * 2387 * @return Returns a reader. 2388 */ 2389 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 2390 throws IOException { 2391 2392 // normalize encoding name 2393 if (encoding == null) { 2394 encoding = "UTF-8"; 2395 } 2396 2397 // try to use an optimized reader 2398 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 2399 if (ENCODING.equals("UTF-8")) { 2400 if (DEBUG_ENCODINGS) { 2401 System.out.println("$$$ creating UTF8Reader"); 2402 } 2403 return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 2404 } 2405 if (ENCODING.equals("US-ASCII")) { 2406 if (DEBUG_ENCODINGS) { 2407 System.out.println("$$$ creating ASCIIReader"); 2408 } 2409 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 2410 } 2411 if(ENCODING.equals("ISO-10646-UCS-4")) { 2412 if(isBigEndian != null) { 2413 boolean isBE = isBigEndian.booleanValue(); 2414 if(isBE) { 2415 return new UCSReader(inputStream, UCSReader.UCS4BE); 2416 } else { 2417 return new UCSReader(inputStream, UCSReader.UCS4LE); 2418 } 2419 } else { 2420 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2421 "EncodingByteOrderUnsupported", 2422 new Object[] { encoding }, 2423 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2424 } 2425 } 2426 if(ENCODING.equals("ISO-10646-UCS-2")) { 2427 if(isBigEndian != null) { // sould never happen with this encoding... 2428 boolean isBE = isBigEndian.booleanValue(); 2429 if(isBE) { 2430 return new UCSReader(inputStream, UCSReader.UCS2BE); 2431 } else { 2432 return new UCSReader(inputStream, UCSReader.UCS2LE); 2433 } 2434 } else { 2435 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2436 "EncodingByteOrderUnsupported", 2437 new Object[] { encoding }, 2438 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2439 } 2440 } 2441 2442 // check for valid name 2443 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 2444 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 2445 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 2446 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2447 "EncodingDeclInvalid", 2448 new Object[] { encoding }, 2449 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2450 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 2451 // because every byte is a valid ISO Latin 1 character. 2452 // It may not translate correctly but if we failed on 2453 // the encoding anyway, then we're expecting the content 2454 // of the document to be bad. This will just prevent an 2455 // invalid UTF-8 sequence to be detected. This is only 2456 // important when continue-after-fatal-error is turned 2457 // on. -Ac 2458 encoding = "ISO-8859-1"; 2459 } 2460 2461 // try to use a Java reader 2462 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 2463 if (javaEncoding == null) { 2464 if(fAllowJavaEncodings) { 2465 javaEncoding = encoding; 2466 } else { 2467 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2468 "EncodingDeclInvalid", 2469 new Object[] { encoding }, 2470 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2471 // see comment above. 2472 javaEncoding = "ISO8859_1"; 2473 } 2474 } 2475 if (DEBUG_ENCODINGS) { 2476 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 2477 if (javaEncoding == encoding) { 2478 System.out.print(" (IANA encoding)"); 2479 } 2480 System.out.println(); 2481 } 2482 return new BufferedReader( new InputStreamReader(inputStream, javaEncoding)); 2483 2484 } // createReader(InputStream,String, Boolean): Reader 2485 2486 2487 /** 2488 * Return the public identifier for the current document event. 2489 * <p> 2490 * The return value is the public identifier of the document 2491 * entity or of the external parsed entity in which the markup 2492 * triggering the event appears. 2493 * 2494 * @return A string containing the public identifier, or 2495 * null if none is available. 2496 */ 2497 public String getPublicId() { 2498 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 2499 } // getPublicId():String 2500 2501 /** 2502 * Return the expanded system identifier for the current document event. 2503 * <p> 2504 * The return value is the expanded system identifier of the document 2505 * entity or of the external parsed entity in which the markup 2506 * triggering the event appears. 2507 * <p> 2508 * If the system identifier is a URL, the parser must resolve it 2509 * fully before passing it to the application. 2510 * 2511 * @return A string containing the expanded system identifier, or null 2512 * if none is available. 2513 */ 2514 public String getExpandedSystemId() { 2515 if (fCurrentEntity != null) { 2516 if (fCurrentEntity.entityLocation != null && 2517 fCurrentEntity.entityLocation.getExpandedSystemId() != null ) { 2518 return fCurrentEntity.entityLocation.getExpandedSystemId(); 2519 } else { 2520 // search for the first external entity on the stack 2521 int size = fEntityStack.size(); 2522 for (int i = size - 1; i >= 0 ; i--) { 2523 Entity.ScannedEntity externalEntity = 2524 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2525 2526 if (externalEntity.entityLocation != null && 2527 externalEntity.entityLocation.getExpandedSystemId() != null) { 2528 return externalEntity.entityLocation.getExpandedSystemId(); 2529 } 2530 } 2531 } 2532 } 2533 return null; 2534 } // getExpandedSystemId():String 2535 2536 /** 2537 * Return the literal system identifier for the current document event. 2538 * <p> 2539 * The return value is the literal system identifier of the document 2540 * entity or of the external parsed entity in which the markup 2541 * triggering the event appears. 2542 * <p> 2543 * @return A string containing the literal system identifier, or null 2544 * if none is available. 2545 */ 2546 public String getLiteralSystemId() { 2547 if (fCurrentEntity != null) { 2548 if (fCurrentEntity.entityLocation != null && 2549 fCurrentEntity.entityLocation.getLiteralSystemId() != null ) { 2550 return fCurrentEntity.entityLocation.getLiteralSystemId(); 2551 } else { 2552 // search for the first external entity on the stack 2553 int size = fEntityStack.size(); 2554 for (int i = size - 1; i >= 0 ; i--) { 2555 Entity.ScannedEntity externalEntity = 2556 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2557 2558 if (externalEntity.entityLocation != null && 2559 externalEntity.entityLocation.getLiteralSystemId() != null) { 2560 return externalEntity.entityLocation.getLiteralSystemId(); 2561 } 2562 } 2563 } 2564 } 2565 return null; 2566 } // getLiteralSystemId():String 2567 2568 /** 2569 * Return the line number where the current document event ends. 2570 * <p> 2571 * <strong>Warning:</strong> The return value from the method 2572 * is intended only as an approximation for the sake of error 2573 * reporting; it is not intended to provide sufficient information 2574 * to edit the character content of the original XML document. 2575 * <p> 2576 * The return value is an approximation of the line number 2577 * in the document entity or external parsed entity where the 2578 * markup triggering the event appears. 2579 * <p> 2580 * If possible, the SAX driver should provide the line position 2581 * of the first character after the text associated with the document 2582 * event. The first line in the document is line 1. 2583 * 2584 * @return The line number, or -1 if none is available. 2585 */ 2586 public int getLineNumber() { 2587 if (fCurrentEntity != null) { 2588 if (fCurrentEntity.isExternal()) { 2589 return fCurrentEntity.lineNumber; 2590 } else { 2591 // search for the first external entity on the stack 2592 int size = fEntityStack.size(); 2593 for (int i=size-1; i>0 ; i--) { 2594 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2595 if (firstExternalEntity.isExternal()) { 2596 return firstExternalEntity.lineNumber; 2597 } 2598 } 2599 } 2600 } 2601 2602 return -1; 2603 2604 } // getLineNumber():int 2605 2606 /** 2607 * Return the column number where the current document event ends. 2608 * <p> 2609 * <strong>Warning:</strong> The return value from the method 2610 * is intended only as an approximation for the sake of error 2611 * reporting; it is not intended to provide sufficient information 2612 * to edit the character content of the original XML document. 2613 * <p> 2614 * The return value is an approximation of the column number 2615 * in the document entity or external parsed entity where the 2616 * markup triggering the event appears. 2617 * <p> 2618 * If possible, the SAX driver should provide the line position 2619 * of the first character after the text associated with the document 2620 * event. 2621 * <p> 2622 * If possible, the SAX driver should provide the line position 2623 * of the first character after the text associated with the document 2624 * event. The first column in each line is column 1. 2625 * 2626 * @return The column number, or -1 if none is available. 2627 */ 2628 public int getColumnNumber() { 2629 if (fCurrentEntity != null) { 2630 if (fCurrentEntity.isExternal()) { 2631 return fCurrentEntity.columnNumber; 2632 } else { 2633 // search for the first external entity on the stack 2634 int size = fEntityStack.size(); 2635 for (int i=size-1; i>0 ; i--) { 2636 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2637 if (firstExternalEntity.isExternal()) { 2638 return firstExternalEntity.columnNumber; 2639 } 2640 } 2641 } 2642 } 2643 2644 return -1; 2645 } // getColumnNumber():int 2646 2647 2648 // 2649 // Protected static methods 2650 // 2651 2652 /** 2653 * Fixes a platform dependent filename to standard URI form. 2654 * 2655 * @param str The string to fix. 2656 * 2657 * @return Returns the fixed URI string. 2658 */ 2659 protected static String fixURI(String str) { 2660 2661 // handle platform dependent strings 2662 str = str.replace(java.io.File.separatorChar, '/'); 2663 2664 // Windows fix 2665 if (str.length() >= 2) { 2666 char ch1 = str.charAt(1); 2667 // change "C:blah" to "/C:blah" 2668 if (ch1 == ':') { 2669 char ch0 = Character.toUpperCase(str.charAt(0)); 2670 if (ch0 >= 'A' && ch0 <= 'Z') { 2671 str = "/" + str; 2672 } 2673 } 2674 // change "//blah" to "file://blah" 2675 else if (ch1 == '/' && str.charAt(0) == '/') { 2676 str = "file:" + str; 2677 } 2678 } 2679 2680 // replace spaces in file names with %20. 2681 // Original comment from JDK5: the following algorithm might not be 2682 // very performant, but people who want to use invalid URI's have to 2683 // pay the price. 2684 int pos = str.indexOf(' '); 2685 if (pos >= 0) { 2686 StringBuilder sb = new StringBuilder(str.length()); 2687 // put characters before ' ' into the string builder 2688 for (int i = 0; i < pos; i++) 2689 sb.append(str.charAt(i)); 2690 // and %20 for the space 2691 sb.append("%20"); 2692 // for the remamining part, also convert ' ' to "%20". 2693 for (int i = pos+1; i < str.length(); i++) { 2694 if (str.charAt(i) == ' ') 2695 sb.append("%20"); 2696 else 2697 sb.append(str.charAt(i)); 2698 } 2699 str = sb.toString(); 2700 } 2701 2702 // done 2703 return str; 2704 2705 } // fixURI(String):String 2706 2707 2708 // 2709 // Package visible methods 2710 // 2711 /** Prints the contents of the buffer. */ 2712 final void print() { 2713 if (DEBUG_BUFFER) { 2714 if (fCurrentEntity != null) { 2715 System.out.print('['); 2716 System.out.print(fCurrentEntity.count); 2717 System.out.print(' '); 2718 System.out.print(fCurrentEntity.position); 2719 if (fCurrentEntity.count > 0) { 2720 System.out.print(" \""); 2721 for (int i = 0; i < fCurrentEntity.count; i++) { 2722 if (i == fCurrentEntity.position) { 2723 System.out.print('^'); 2724 } 2725 char c = fCurrentEntity.ch[i]; 2726 switch (c) { 2727 case '\n': { 2728 System.out.print("\\n"); 2729 break; 2730 } 2731 case '\r': { 2732 System.out.print("\\r"); 2733 break; 2734 } 2735 case '\t': { 2736 System.out.print("\\t"); 2737 break; 2738 } 2739 case '\\': { 2740 System.out.print("\\\\"); 2741 break; 2742 } 2743 default: { 2744 System.out.print(c); 2745 } 2746 } 2747 } 2748 if (fCurrentEntity.position == fCurrentEntity.count) { 2749 System.out.print('^'); 2750 } 2751 System.out.print('"'); 2752 } 2753 System.out.print(']'); 2754 System.out.print(" @ "); 2755 System.out.print(fCurrentEntity.lineNumber); 2756 System.out.print(','); 2757 System.out.print(fCurrentEntity.columnNumber); 2758 } else { 2759 System.out.print("*NO CURRENT ENTITY*"); 2760 } 2761 } 2762 } // print() 2763 2764 /** 2765 * Buffer used in entity manager to reuse character arrays instead 2766 * of creating new ones every time. 2767 * 2768 * @xerces.internal 2769 * 2770 * @author Ankit Pasricha, IBM 2771 */ 2772 private static class CharacterBuffer { 2773 2774 /** character buffer */ 2775 private char[] ch; 2776 2777 /** whether the buffer is for an external or internal scanned entity */ 2778 private boolean isExternal; 2779 2780 public CharacterBuffer(boolean isExternal, int size) { 2781 this.isExternal = isExternal; 2782 ch = new char[size]; 2783 } 2784 } 2785 2786 2787 /** 2788 * Stores a number of character buffers and provides it to the entity 2789 * manager to use when an entity is seen. 2790 * 2791 * @xerces.internal 2792 * 2793 * @author Ankit Pasricha, IBM 2794 */ 2795 private static class CharacterBufferPool { 2796 2797 private static final int DEFAULT_POOL_SIZE = 3; 2798 2799 private CharacterBuffer[] fInternalBufferPool; 2800 private CharacterBuffer[] fExternalBufferPool; 2801 2802 private int fExternalBufferSize; 2803 private int fInternalBufferSize; 2804 private int poolSize; 2805 2806 private int fInternalTop; 2807 private int fExternalTop; 2808 2809 public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { 2810 this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); 2811 } 2812 2813 public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { 2814 fExternalBufferSize = externalBufferSize; 2815 fInternalBufferSize = internalBufferSize; 2816 this.poolSize = poolSize; 2817 init(); 2818 } 2819 2820 /** Initializes buffer pool. **/ 2821 private void init() { 2822 fInternalBufferPool = new CharacterBuffer[poolSize]; 2823 fExternalBufferPool = new CharacterBuffer[poolSize]; 2824 fInternalTop = -1; 2825 fExternalTop = -1; 2826 } 2827 2828 /** Retrieves buffer from pool. **/ 2829 public CharacterBuffer getBuffer(boolean external) { 2830 if (external) { 2831 if (fExternalTop > -1) { 2832 return (CharacterBuffer)fExternalBufferPool[fExternalTop--]; 2833 } 2834 else { 2835 return new CharacterBuffer(true, fExternalBufferSize); 2836 } 2837 } 2838 else { 2839 if (fInternalTop > -1) { 2840 return (CharacterBuffer)fInternalBufferPool[fInternalTop--]; 2841 } 2842 else { 2843 return new CharacterBuffer(false, fInternalBufferSize); 2844 } 2845 } 2846 } 2847 2848 /** Returns buffer to pool. **/ 2849 public void returnToPool(CharacterBuffer buffer) { 2850 if (buffer.isExternal) { 2851 if (fExternalTop < fExternalBufferPool.length - 1) { 2852 fExternalBufferPool[++fExternalTop] = buffer; 2853 } 2854 } 2855 else if (fInternalTop < fInternalBufferPool.length - 1) { 2856 fInternalBufferPool[++fInternalTop] = buffer; 2857 } 2858 } 2859 2860 /** Sets the size of external buffers and dumps the old pool. **/ 2861 public void setExternalBufferSize(int bufferSize) { 2862 fExternalBufferSize = bufferSize; 2863 fExternalBufferPool = new CharacterBuffer[poolSize]; 2864 fExternalTop = -1; 2865 } 2866 } 2867 2868 /** 2869 * This class wraps the byte inputstreams we're presented with. 2870 * We need it because java.io.InputStreams don't provide 2871 * functionality to reread processed bytes, and they have a habit 2872 * of reading more than one character when you call their read() 2873 * methods. This means that, once we discover the true (declared) 2874 * encoding of a document, we can neither backtrack to read the 2875 * whole doc again nor start reading where we are with a new 2876 * reader. 2877 * 2878 * This class allows rewinding an inputStream by allowing a mark 2879 * to be set, and the stream reset to that position. <strong>The 2880 * class assumes that it needs to read one character per 2881 * invocation when it's read() method is inovked, but uses the 2882 * underlying InputStream's read(char[], offset length) method--it 2883 * won't buffer data read this way!</strong> 2884 * 2885 * @xerces.internal 2886 * 2887 * @author Neil Graham, IBM 2888 * @author Glenn Marcy, IBM 2889 */ 2890 2891 protected final class RewindableInputStream extends InputStream { 2892 2893 private InputStream fInputStream; 2894 private byte[] fData; 2895 private int fStartOffset; 2896 private int fEndOffset; 2897 private int fOffset; 2898 private int fLength; 2899 private int fMark; 2900 2901 public RewindableInputStream(InputStream is) { 2902 fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE]; 2903 fInputStream = is; 2904 fStartOffset = 0; 2905 fEndOffset = -1; 2906 fOffset = 0; 2907 fLength = 0; 2908 fMark = 0; 2909 } 2910 2911 public void setStartOffset(int offset) { 2912 fStartOffset = offset; 2913 } 2914 2915 public void rewind() { 2916 fOffset = fStartOffset; 2917 } 2918 2919 public int read() throws IOException { 2920 int b = 0; 2921 if (fOffset < fLength) { 2922 return fData[fOffset++] & 0xff; 2923 } 2924 if (fOffset == fEndOffset) { 2925 return -1; 2926 } 2927 if (fOffset == fData.length) { 2928 byte[] newData = new byte[fOffset << 1]; 2929 System.arraycopy(fData, 0, newData, 0, fOffset); 2930 fData = newData; 2931 } 2932 b = fInputStream.read(); 2933 if (b == -1) { 2934 fEndOffset = fOffset; 2935 return -1; 2936 } 2937 fData[fLength++] = (byte)b; 2938 fOffset++; 2939 return b & 0xff; 2940 } 2941 2942 public int read(byte[] b, int off, int len) throws IOException { 2943 int bytesLeft = fLength - fOffset; 2944 if (bytesLeft == 0) { 2945 if (fOffset == fEndOffset) { 2946 return -1; 2947 } 2948 2949 /** 2950 * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); 2951 * //System.out.println("fInputStream = " + fInputStream ); 2952 * // better get some more for the voracious reader... */ 2953 2954 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { 2955 2956 if (!fCurrentEntity.xmlDeclChunkRead) 2957 { 2958 fCurrentEntity.xmlDeclChunkRead = true; 2959 len = fCurrentEntity.DEFAULT_XMLDECL_BUFFER_SIZE; 2960 } 2961 return fInputStream.read(b, off, len); 2962 } 2963 2964 int returnedVal = read(); 2965 if(returnedVal == -1) { 2966 fEndOffset = fOffset; 2967 return -1; 2968 } 2969 b[off] = (byte)returnedVal; 2970 return 1; 2971 2972 } 2973 if (len < bytesLeft) { 2974 if (len <= 0) { 2975 return 0; 2976 } 2977 } else { 2978 len = bytesLeft; 2979 } 2980 if (b != null) { 2981 System.arraycopy(fData, fOffset, b, off, len); 2982 } 2983 fOffset += len; 2984 return len; 2985 } 2986 2987 public long skip(long n) 2988 throws IOException { 2989 int bytesLeft; 2990 if (n <= 0) { 2991 return 0; 2992 } 2993 bytesLeft = fLength - fOffset; 2994 if (bytesLeft == 0) { 2995 if (fOffset == fEndOffset) { 2996 return 0; 2997 } 2998 return fInputStream.skip(n); 2999 } 3000 if (n <= bytesLeft) { 3001 fOffset += n; 3002 return n; 3003 } 3004 fOffset += bytesLeft; 3005 if (fOffset == fEndOffset) { 3006 return bytesLeft; 3007 } 3008 n -= bytesLeft; 3009 /* 3010 * In a manner of speaking, when this class isn't permitting more 3011 * than one byte at a time to be read, it is "blocking". The 3012 * available() method should indicate how much can be read without 3013 * blocking, so while we're in this mode, it should only indicate 3014 * that bytes in its buffer are available; otherwise, the result of 3015 * available() on the underlying InputStream is appropriate. 3016 */ 3017 return fInputStream.skip(n) + bytesLeft; 3018 } 3019 3020 public int available() throws IOException { 3021 int bytesLeft = fLength - fOffset; 3022 if (bytesLeft == 0) { 3023 if (fOffset == fEndOffset) { 3024 return -1; 3025 } 3026 return fCurrentEntity.mayReadChunks ? fInputStream.available() 3027 : 0; 3028 } 3029 return bytesLeft; 3030 } 3031 3032 public void mark(int howMuch) { 3033 fMark = fOffset; 3034 } 3035 3036 public void reset() { 3037 fOffset = fMark; 3038 //test(); 3039 } 3040 3041 public boolean markSupported() { 3042 return true; 3043 } 3044 3045 public void close() throws IOException { 3046 if (fInputStream != null) { 3047 fInputStream.close(); 3048 fInputStream = null; 3049 } 3050 } 3051 } // end of RewindableInputStream class 3052 3053 public void test(){ 3054 //System.out.println("TESTING: Added familytree to entityManager"); 3055 //Usecase1 3056 fEntityStorage.addExternalEntity("entityUsecase1",null, 3057 "/space/home/stax/sun/6thJan2004/zephyr/data/test.txt", 3058 "/space/home/stax/sun/6thJan2004/zephyr/data/entity.xml"); 3059 3060 //Usecase2 3061 fEntityStorage.addInternalEntity("entityUsecase2","<Test>value</Test>"); 3062 fEntityStorage.addInternalEntity("entityUsecase3","value3"); 3063 fEntityStorage.addInternalEntity("text", "Hello World."); 3064 fEntityStorage.addInternalEntity("empty-element", "<foo/>"); 3065 fEntityStorage.addInternalEntity("balanced-element", "<foo></foo>"); 3066 fEntityStorage.addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 3067 fEntityStorage.addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 3068 fEntityStorage.addInternalEntity("unbalanced-entity", "<foo>"); 3069 fEntityStorage.addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 3070 fEntityStorage.addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 3071 fEntityStorage.addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 3072 fEntityStorage.addInternalEntity("ch","©"); 3073 fEntityStorage.addInternalEntity("ch1","T"); 3074 fEntityStorage.addInternalEntity("% ch2","param"); 3075 } 3076 3077 } // class XMLEntityManager