1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl ; 22 23 import com.sun.xml.internal.stream.StaxEntityResolverWrapper; 24 import com.sun.xml.internal.stream.StaxXMLInputSource; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import java.io.*; 27 import java.io.BufferedReader; 28 import java.util.*; 29 30 import java.io.IOException; 31 import java.io.InputStream; 32 import java.io.InputStreamReader; 33 import java.io.Reader; 34 import java.io.StringReader; 35 import java.lang.reflect.Method; 36 import java.net.HttpURLConnection; 37 import java.net.URL; 38 import java.net.URLConnection; 39 import java.net.URISyntaxException; 40 import java.util.Hashtable; 41 import java.util.Iterator; 42 import java.util.Locale; 43 import java.util.Map; 44 import java.util.Stack; 45 46 47 import com.sun.org.apache.xerces.internal.impl.io.*; 48 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 49 import com.sun.org.apache.xerces.internal.util.*; 50 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 51 import com.sun.org.apache.xerces.internal.xni.XNIException; 52 import com.sun.org.apache.xerces.internal.xni.parser.*; 53 import com.sun.org.apache.xerces.internal.impl.Constants; 54 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 55 import com.sun.xml.internal.stream.Entity; 56 import com.sun.org.apache.xerces.internal.xni.Augmentations; 57 58 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 59 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 60 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 61 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 62 import com.sun.org.apache.xerces.internal.util.HTTPInputSource; 63 import com.sun.org.apache.xerces.internal.xinclude.XIncludeHandler; 64 65 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 66 import com.sun.org.apache.xerces.internal.util.SecurityManager; 67 import com.sun.org.apache.xerces.internal.util.URI; 68 69 70 /** 71 * Will keep track of current entity. 72 * 73 * The entity manager handles the registration of general and parameter 74 * entities; resolves entities; and starts entities. The entity manager 75 * is a central component in a standard parser configuration and this 76 * class works directly with the entity scanner to manage the underlying 77 * xni. 78 * <p> 79 * This component requires the following features and properties from the 80 * component manager that uses it: 81 * <ul> 82 * <li>http://xml.org/sax/features/validation</li> 83 * <li>http://xml.org/sax/features/external-general-entities</li> 84 * <li>http://xml.org/sax/features/external-parameter-entities</li> 85 * <li>http://apache.org/xml/features/allow-java-encodings</li> 86 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 87 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 88 * <li>http://apache.org/xml/properties/internal/entity-resolver</li> 89 * </ul> 90 * 91 * 92 * @author Andy Clark, IBM 93 * @author Arnaud Le Hors, IBM 94 * @author K.Venugopal SUN Microsystems 95 * @author Neeraj Bajaj SUN Microsystems 96 * @author Sunitha Reddy SUN Microsystems 97 * @version $Id: XMLEntityManager.java,v 1.17 2010-11-01 04:39:41 joehw Exp $ 98 */ 99 public class XMLEntityManager implements XMLComponent, XMLEntityResolver { 100 101 // 102 // Constants 103 // 104 105 /** Default buffer size (2048). */ 106 public static final int DEFAULT_BUFFER_SIZE = 8192; 107 108 /** Default buffer size before we've finished with the XMLDecl: */ 109 public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; 110 111 /** Default internal entity buffer size (1024). */ 112 public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 1024; 113 114 // feature identifiers 115 116 /** Feature identifier: validation. */ 117 protected static final String VALIDATION = 118 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 119 120 /** 121 * standard uri conformant (strict uri). 122 * http://apache.org/xml/features/standard-uri-conformant 123 */ 124 protected boolean fStrictURI; 125 126 127 /** Feature identifier: external general entities. */ 128 protected static final String EXTERNAL_GENERAL_ENTITIES = 129 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE; 130 131 /** Feature identifier: external parameter entities. */ 132 protected static final String EXTERNAL_PARAMETER_ENTITIES = 133 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE; 134 135 /** Feature identifier: allow Java encodings. */ 136 protected static final String ALLOW_JAVA_ENCODINGS = 137 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 138 139 /** Feature identifier: warn on duplicate EntityDef */ 140 protected static final String WARN_ON_DUPLICATE_ENTITYDEF = 141 Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE; 142 143 // property identifiers 144 145 /** Property identifier: symbol table. */ 146 protected static final String SYMBOL_TABLE = 147 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 148 149 /** Property identifier: error reporter. */ 150 protected static final String ERROR_REPORTER = 151 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 152 153 /** Feature identifier: standard uri conformant */ 154 protected static final String STANDARD_URI_CONFORMANT = 155 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 156 157 /** Property identifier: entity resolver. */ 158 protected static final String ENTITY_RESOLVER = 159 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 160 161 protected static final String STAX_ENTITY_RESOLVER = 162 Constants.XERCES_PROPERTY_PREFIX + Constants.STAX_ENTITY_RESOLVER_PROPERTY; 163 164 // property identifier: ValidationManager 165 protected static final String VALIDATION_MANAGER = 166 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 167 168 /** property identifier: buffer size. */ 169 protected static final String BUFFER_SIZE = 170 Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY; 171 172 /** property identifier: security manager. */ 173 protected static final String SECURITY_MANAGER = 174 Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY; 175 176 protected static final String PARSER_SETTINGS = 177 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 178 // recognized features and properties 179 180 /** Recognized features. */ 181 private static final String[] RECOGNIZED_FEATURES = { 182 VALIDATION, 183 EXTERNAL_GENERAL_ENTITIES, 184 EXTERNAL_PARAMETER_ENTITIES, 185 ALLOW_JAVA_ENCODINGS, 186 WARN_ON_DUPLICATE_ENTITYDEF, 187 STANDARD_URI_CONFORMANT 188 }; 189 190 /** Feature defaults. */ 191 private static final Boolean[] FEATURE_DEFAULTS = { 192 null, 193 Boolean.TRUE, 194 Boolean.TRUE, 195 Boolean.TRUE, 196 Boolean.FALSE, 197 Boolean.FALSE 198 }; 199 200 /** Recognized properties. */ 201 private static final String[] RECOGNIZED_PROPERTIES = { 202 SYMBOL_TABLE, 203 ERROR_REPORTER, 204 ENTITY_RESOLVER, 205 VALIDATION_MANAGER, 206 BUFFER_SIZE, 207 SECURITY_MANAGER, 208 209 }; 210 211 /** Property defaults. */ 212 private static final Object[] PROPERTY_DEFAULTS = { 213 null, 214 null, 215 null, 216 null, 217 new Integer(DEFAULT_BUFFER_SIZE), 218 null 219 }; 220 221 private static final String XMLEntity = "[xml]".intern(); 222 private static final String DTDEntity = "[dtd]".intern(); 223 224 // debugging 225 226 /** 227 * Debug printing of buffer. This debugging flag works best when you 228 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 229 * 64 characters. 230 */ 231 private static final boolean DEBUG_BUFFER = false; 232 233 /** warn on duplicate Entity declaration. 234 * http://apache.org/xml/features/warn-on-duplicate-entitydef 235 */ 236 protected boolean fWarnDuplicateEntityDef; 237 238 /** Debug some basic entities. */ 239 private static final boolean DEBUG_ENTITIES = false; 240 241 /** Debug switching readers for encodings. */ 242 private static final boolean DEBUG_ENCODINGS = false; 243 244 // should be diplayed trace resolving messages 245 private static final boolean DEBUG_RESOLVER = false ; 246 247 // 248 // Data 249 // 250 251 // features 252 253 /** 254 * Validation. This feature identifier is: 255 * http://xml.org/sax/features/validation 256 */ 257 protected boolean fValidation; 258 259 /** 260 * External general entities. This feature identifier is: 261 * http://xml.org/sax/features/external-general-entities 262 */ 263 protected boolean fExternalGeneralEntities; 264 265 /** 266 * External parameter entities. This feature identifier is: 267 * http://xml.org/sax/features/external-parameter-entities 268 */ 269 protected boolean fExternalParameterEntities; 270 271 /** 272 * Allow Java encoding names. This feature identifier is: 273 * http://apache.org/xml/features/allow-java-encodings 274 */ 275 protected boolean fAllowJavaEncodings = true ; 276 277 278 // properties 279 280 /** 281 * Symbol table. This property identifier is: 282 * http://apache.org/xml/properties/internal/symbol-table 283 */ 284 protected SymbolTable fSymbolTable; 285 286 /** 287 * Error reporter. This property identifier is: 288 * http://apache.org/xml/properties/internal/error-reporter 289 */ 290 protected XMLErrorReporter fErrorReporter; 291 292 /** 293 * Entity resolver. This property identifier is: 294 * http://apache.org/xml/properties/internal/entity-resolver 295 */ 296 protected XMLEntityResolver fEntityResolver; 297 298 /** Stax Entity Resolver. This property identifier is XMLInputFactory.ENTITY_RESOLVER */ 299 300 protected StaxEntityResolverWrapper fStaxEntityResolver; 301 302 /** Property Manager. This is used from Stax */ 303 protected PropertyManager fPropertyManager ; 304 305 306 // settings 307 308 /** 309 * Validation manager. This property identifier is: 310 * http://apache.org/xml/properties/internal/validation-manager 311 */ 312 protected ValidationManager fValidationManager; 313 314 // settings 315 316 /** 317 * Buffer size. We get this value from a property. The default size 318 * is used if the input buffer size property is not specified. 319 * REVISIT: do we need a property for internal entity buffer size? 320 */ 321 protected int fBufferSize = DEFAULT_BUFFER_SIZE; 322 323 // stores defaults for entity expansion limit if it has 324 // been set on the configuration. 325 protected SecurityManager fSecurityManager = null; 326 327 /** 328 * True if the document entity is standalone. This should really 329 * only be set by the document source (e.g. XMLDocumentScanner). 330 */ 331 protected boolean fStandalone; 332 333 // are the entities being parsed in the external subset? 334 // NOTE: this *is not* the same as whether they're external entities! 335 protected boolean fInExternalSubset = false; 336 337 338 // handlers 339 /** Entity handler. */ 340 protected XMLEntityHandler fEntityHandler; 341 342 /** Current entity scanner */ 343 protected XMLEntityScanner fEntityScanner ; 344 345 /** XML 1.0 entity scanner. */ 346 protected XMLEntityScanner fXML10EntityScanner; 347 348 /** XML 1.1 entity scanner. */ 349 protected XMLEntityScanner fXML11EntityScanner; 350 351 /** entity expansion limit (contains useful data if and only if 352 fSecurityManager is non-null) */ 353 protected int fEntityExpansionLimit = 0; 354 355 /** count of entities expanded: */ 356 protected int fEntityExpansionCount = 0; 357 358 // entities 359 360 /** Entities. */ 361 protected Hashtable fEntities = new Hashtable(); 362 363 /** Entity stack. */ 364 protected Stack fEntityStack = new Stack(); 365 366 /** Current entity. */ 367 protected Entity.ScannedEntity fCurrentEntity = null; 368 369 // shared context 370 371 protected XMLEntityStorage fEntityStorage ; 372 373 protected final Object [] defaultEncoding = new Object[]{"UTF-8", null}; 374 375 376 // temp vars 377 378 /** Resource identifer. */ 379 private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 380 381 /** Augmentations for entities. */ 382 private final Augmentations fEntityAugs = new AugmentationsImpl(); 383 384 /** Pool of character buffers. */ 385 private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); 386 387 // 388 // Constructors 389 // 390 391 /** 392 * If this constructor is used to create the object, reset() should be invoked on this object 393 */ 394 public XMLEntityManager() { 395 fEntityStorage = new XMLEntityStorage(this) ; 396 setScannerVersion(Constants.XML_VERSION_1_0); 397 } // <init>() 398 399 /** Default constructor. */ 400 public XMLEntityManager(PropertyManager propertyManager) { 401 fPropertyManager = propertyManager ; 402 //pass a reference to current entity being scanned 403 //fEntityStorage = new XMLEntityStorage(fCurrentEntity) ; 404 fEntityStorage = new XMLEntityStorage(this) ; 405 fEntityScanner = new XMLEntityScanner(propertyManager, this) ; 406 reset(propertyManager); 407 } // <init>() 408 409 /** 410 * Adds an internal entity declaration. 411 * <p> 412 * <strong>Note:</strong> This method ignores subsequent entity 413 * declarations. 414 * <p> 415 * <strong>Note:</strong> The name should be a unique symbol. The 416 * SymbolTable can be used for this purpose. 417 * 418 * @param name The name of the entity. 419 * @param text The text of the entity. 420 * 421 * @see SymbolTable 422 */ 423 public void addInternalEntity(String name, String text) { 424 if (!fEntities.containsKey(name)) { 425 Entity entity = new Entity.InternalEntity(name, text, fInExternalSubset); 426 fEntities.put(name, entity); 427 } else{ 428 if(fWarnDuplicateEntityDef){ 429 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 430 "MSG_DUPLICATE_ENTITY_DEFINITION", 431 new Object[]{ name }, 432 XMLErrorReporter.SEVERITY_WARNING ); 433 } 434 } 435 436 } // addInternalEntity(String,String) 437 438 /** 439 * Adds an external entity declaration. 440 * <p> 441 * <strong>Note:</strong> This method ignores subsequent entity 442 * declarations. 443 * <p> 444 * <strong>Note:</strong> The name should be a unique symbol. The 445 * SymbolTable can be used for this purpose. 446 * 447 * @param name The name of the entity. 448 * @param publicId The public identifier of the entity. 449 * @param literalSystemId The system identifier of the entity. 450 * @param baseSystemId The base system identifier of the entity. 451 * This is the system identifier of the entity 452 * where <em>the entity being added</em> and 453 * is used to expand the system identifier when 454 * the system identifier is a relative URI. 455 * When null the system identifier of the first 456 * external entity on the stack is used instead. 457 * 458 * @see SymbolTable 459 */ 460 public void addExternalEntity(String name, 461 String publicId, String literalSystemId, 462 String baseSystemId) throws IOException { 463 if (!fEntities.containsKey(name)) { 464 if (baseSystemId == null) { 465 // search for the first external entity on the stack 466 int size = fEntityStack.size(); 467 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 468 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 469 } 470 for (int i = size - 1; i >= 0 ; i--) { 471 Entity.ScannedEntity externalEntity = 472 (Entity.ScannedEntity)fEntityStack.elementAt(i); 473 if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) { 474 baseSystemId = externalEntity.entityLocation.getExpandedSystemId(); 475 break; 476 } 477 } 478 } 479 Entity entity = new Entity.ExternalEntity(name, 480 new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId, 481 expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset); 482 fEntities.put(name, entity); 483 } else{ 484 if(fWarnDuplicateEntityDef){ 485 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 486 "MSG_DUPLICATE_ENTITY_DEFINITION", 487 new Object[]{ name }, 488 XMLErrorReporter.SEVERITY_WARNING ); 489 } 490 } 491 492 } // addExternalEntity(String,String,String,String) 493 494 495 /** 496 * Adds an unparsed entity declaration. 497 * <p> 498 * <strong>Note:</strong> This method ignores subsequent entity 499 * declarations. 500 * <p> 501 * <strong>Note:</strong> The name should be a unique symbol. The 502 * SymbolTable can be used for this purpose. 503 * 504 * @param name The name of the entity. 505 * @param publicId The public identifier of the entity. 506 * @param systemId The system identifier of the entity. 507 * @param notation The name of the notation. 508 * 509 * @see SymbolTable 510 */ 511 public void addUnparsedEntity(String name, 512 String publicId, String systemId, 513 String baseSystemId, String notation) { 514 if (!fEntities.containsKey(name)) { 515 Entity.ExternalEntity entity = new Entity.ExternalEntity(name, 516 new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null), 517 notation, fInExternalSubset); 518 fEntities.put(name, entity); 519 } else{ 520 if(fWarnDuplicateEntityDef){ 521 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 522 "MSG_DUPLICATE_ENTITY_DEFINITION", 523 new Object[]{ name }, 524 XMLErrorReporter.SEVERITY_WARNING ); 525 } 526 } 527 } // addUnparsedEntity(String,String,String,String) 528 529 530 /** get the entity storage object from entity manager */ 531 public XMLEntityStorage getEntityStore(){ 532 return fEntityStorage ; 533 } 534 535 /** return the entity responsible for reading the entity */ 536 public XMLEntityScanner getEntityScanner(){ 537 if(fEntityScanner == null) { 538 // default to 1.0 539 if(fXML10EntityScanner == null) { 540 fXML10EntityScanner = new XMLEntityScanner(); 541 } 542 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 543 fEntityScanner = fXML10EntityScanner; 544 } 545 return fEntityScanner; 546 547 } 548 549 public void setScannerVersion(short version) { 550 551 if(version == Constants.XML_VERSION_1_0) { 552 if(fXML10EntityScanner == null) { 553 fXML10EntityScanner = new XMLEntityScanner(); 554 } 555 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 556 fEntityScanner = fXML10EntityScanner; 557 fEntityScanner.setCurrentEntity(fCurrentEntity); 558 } else { 559 if(fXML11EntityScanner == null) { 560 fXML11EntityScanner = new XML11EntityScanner(); 561 } 562 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 563 fEntityScanner = fXML11EntityScanner; 564 fEntityScanner.setCurrentEntity(fCurrentEntity); 565 } 566 567 } 568 569 /** 570 * This method uses the passed-in XMLInputSource to make 571 * fCurrentEntity usable for reading. 572 * @param name name of the entity (XML is it's the document entity) 573 * @param xmlInputSource the input source, with sufficient information 574 * to begin scanning characters. 575 * @param literal True if this entity is started within a 576 * literal value. 577 * @param isExternal whether this entity should be treated as an internal or external entity. 578 * @throws IOException if anything can't be read 579 * XNIException If any parser-specific goes wrong. 580 * @return the encoding of the new entity or null if a character stream was employed 581 */ 582 public String setupCurrentEntity(String name, XMLInputSource xmlInputSource, 583 boolean literal, boolean isExternal) 584 throws IOException, XNIException { 585 // get information 586 587 final String publicId = xmlInputSource.getPublicId(); 588 String literalSystemId = xmlInputSource.getSystemId(); 589 String baseSystemId = xmlInputSource.getBaseSystemId(); 590 String encoding = xmlInputSource.getEncoding(); 591 final boolean encodingExternallySpecified = (encoding != null); 592 Boolean isBigEndian = null; 593 594 // create reader 595 InputStream stream = null; 596 Reader reader = xmlInputSource.getCharacterStream(); 597 598 // First chance checking strict URI 599 String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI); 600 if (baseSystemId == null) { 601 baseSystemId = expandedSystemId; 602 } 603 if (reader == null) { 604 stream = xmlInputSource.getByteStream(); 605 if (stream == null) { 606 URL location = new URL(expandedSystemId); 607 URLConnection connect = location.openConnection(); 608 if (!(connect instanceof HttpURLConnection)) { 609 stream = connect.getInputStream(); 610 } 611 else { 612 boolean followRedirects = true; 613 614 // setup URLConnection if we have an HTTPInputSource 615 if (xmlInputSource instanceof HTTPInputSource) { 616 final HttpURLConnection urlConnection = (HttpURLConnection) connect; 617 final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource; 618 619 // set request properties 620 Iterator propIter = httpInputSource.getHTTPRequestProperties(); 621 while (propIter.hasNext()) { 622 Map.Entry entry = (Map.Entry) propIter.next(); 623 urlConnection.setRequestProperty((String) entry.getKey(), (String) entry.getValue()); 624 } 625 626 // set preference for redirection 627 followRedirects = httpInputSource.getFollowHTTPRedirects(); 628 if (!followRedirects) { 629 setInstanceFollowRedirects(urlConnection, followRedirects); 630 } 631 } 632 633 stream = connect.getInputStream(); 634 635 // REVISIT: If the URLConnection has external encoding 636 // information, we should be reading it here. It's located 637 // in the charset parameter of Content-Type. -- mrglavas 638 639 if (followRedirects) { 640 String redirect = connect.getURL().toString(); 641 // E43: Check if the URL was redirected, and then 642 // update literal and expanded system IDs if needed. 643 if (!redirect.equals(expandedSystemId)) { 644 literalSystemId = redirect; 645 expandedSystemId = redirect; 646 } 647 } 648 } 649 } 650 651 // wrap this stream in RewindableInputStream 652 stream = new RewindableInputStream(stream); 653 654 // perform auto-detect of encoding if necessary 655 if (encoding == null) { 656 // read first four bytes and determine encoding 657 final byte[] b4 = new byte[4]; 658 int count = 0; 659 for (; count<4; count++ ) { 660 b4[count] = (byte)stream.read(); 661 } 662 if (count == 4) { 663 Object [] encodingDesc = getEncodingName(b4, count); 664 encoding = (String)(encodingDesc[0]); 665 isBigEndian = (Boolean)(encodingDesc[1]); 666 667 stream.reset(); 668 // Special case UTF-8 files with BOM created by Microsoft 669 // tools. It's more efficient to consume the BOM than make 670 // the reader perform extra checks. -Ac 671 if (count > 2 && encoding.equals("UTF-8")) { 672 int b0 = b4[0] & 0xFF; 673 int b1 = b4[1] & 0xFF; 674 int b2 = b4[2] & 0xFF; 675 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 676 // ignore first three bytes... 677 stream.skip(3); 678 } 679 } 680 reader = createReader(stream, encoding, isBigEndian); 681 } else { 682 reader = createReader(stream, encoding, isBigEndian); 683 } 684 } 685 686 // use specified encoding 687 else { 688 encoding = encoding.toUpperCase(Locale.ENGLISH); 689 690 // If encoding is UTF-8, consume BOM if one is present. 691 if (encoding.equals("UTF-8")) { 692 final int[] b3 = new int[3]; 693 int count = 0; 694 for (; count < 3; ++count) { 695 b3[count] = stream.read(); 696 if (b3[count] == -1) 697 break; 698 } 699 if (count == 3) { 700 if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { 701 // First three bytes are not BOM, so reset. 702 stream.reset(); 703 } 704 } else { 705 stream.reset(); 706 } 707 } 708 // If encoding is UTF-16, we still need to read the first four bytes 709 // in order to discover the byte order. 710 else if (encoding.equals("UTF-16")) { 711 final int[] b4 = new int[4]; 712 int count = 0; 713 for (; count < 4; ++count) { 714 b4[count] = stream.read(); 715 if (b4[count] == -1) 716 break; 717 } 718 stream.reset(); 719 720 String utf16Encoding = "UTF-16"; 721 if (count >= 2) { 722 final int b0 = b4[0]; 723 final int b1 = b4[1]; 724 if (b0 == 0xFE && b1 == 0xFF) { 725 // UTF-16, big-endian 726 utf16Encoding = "UTF-16BE"; 727 isBigEndian = Boolean.TRUE; 728 } 729 else if (b0 == 0xFF && b1 == 0xFE) { 730 // UTF-16, little-endian 731 utf16Encoding = "UTF-16LE"; 732 isBigEndian = Boolean.FALSE; 733 } 734 else if (count == 4) { 735 final int b2 = b4[2]; 736 final int b3 = b4[3]; 737 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 738 // UTF-16, big-endian, no BOM 739 utf16Encoding = "UTF-16BE"; 740 isBigEndian = Boolean.TRUE; 741 } 742 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 743 // UTF-16, little-endian, no BOM 744 utf16Encoding = "UTF-16LE"; 745 isBigEndian = Boolean.FALSE; 746 } 747 } 748 } 749 reader = createReader(stream, utf16Encoding, isBigEndian); 750 } 751 // If encoding is UCS-4, we still need to read the first four bytes 752 // in order to discover the byte order. 753 else if (encoding.equals("ISO-10646-UCS-4")) { 754 final int[] b4 = new int[4]; 755 int count = 0; 756 for (; count < 4; ++count) { 757 b4[count] = stream.read(); 758 if (b4[count] == -1) 759 break; 760 } 761 stream.reset(); 762 763 // Ignore unusual octet order for now. 764 if (count == 4) { 765 // UCS-4, big endian (1234) 766 if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) { 767 isBigEndian = Boolean.TRUE; 768 } 769 // UCS-4, little endian (1234) 770 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) { 771 isBigEndian = Boolean.FALSE; 772 } 773 } 774 } 775 // If encoding is UCS-2, we still need to read the first four bytes 776 // in order to discover the byte order. 777 else if (encoding.equals("ISO-10646-UCS-2")) { 778 final int[] b4 = new int[4]; 779 int count = 0; 780 for (; count < 4; ++count) { 781 b4[count] = stream.read(); 782 if (b4[count] == -1) 783 break; 784 } 785 stream.reset(); 786 787 if (count == 4) { 788 // UCS-2, big endian 789 if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) { 790 isBigEndian = Boolean.TRUE; 791 } 792 // UCS-2, little endian 793 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) { 794 isBigEndian = Boolean.FALSE; 795 } 796 } 797 } 798 799 reader = createReader(stream, encoding, isBigEndian); 800 } 801 802 // read one character at a time so we don't jump too far 803 // ahead, converting characters from the byte stream in 804 // the wrong encoding 805 if (DEBUG_ENCODINGS) { 806 System.out.println("$$$ no longer wrapping reader in OneCharReader"); 807 } 808 //reader = new OneCharReader(reader); 809 } 810 811 // We've seen a new Reader. 812 // Push it on the stack so we can close it later. 813 //fOwnReaders.add(reader); 814 815 // push entity on stack 816 if (fCurrentEntity != null) { 817 fEntityStack.push(fCurrentEntity); 818 } 819 820 // create entity 821 /* if encoding is specified externally, 'encoding' information present 822 * in the prolog of the XML document is not considered. Hence, prolog can 823 * be read in Chunks of data instead of byte by byte. 824 */ 825 fCurrentEntity = new com.sun.xml.internal.stream.Entity.ScannedEntity(name,new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId),stream, reader, encoding, literal, encodingExternallySpecified, isExternal); 826 fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified); 827 fEntityScanner.setCurrentEntity(fCurrentEntity); 828 fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 829 return encoding; 830 } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String 831 832 833 /** 834 * Checks whether an entity given by name is external. 835 * 836 * @param entityName The name of the entity to check. 837 * @return True if the entity is external, false otherwise 838 * (including when the entity is not declared). 839 */ 840 public boolean isExternalEntity(String entityName) { 841 842 Entity entity = (Entity)fEntities.get(entityName); 843 if (entity == null) { 844 return false; 845 } 846 return entity.isExternal(); 847 } 848 849 /** 850 * Checks whether the declaration of an entity given by name is 851 * // in the external subset. 852 * 853 * @param entityName The name of the entity to check. 854 * @return True if the entity was declared in the external subset, false otherwise 855 * (including when the entity is not declared). 856 */ 857 public boolean isEntityDeclInExternalSubset(String entityName) { 858 859 Entity entity = (Entity)fEntities.get(entityName); 860 if (entity == null) { 861 return false; 862 } 863 return entity.isEntityDeclInExternalSubset(); 864 } 865 866 867 868 // 869 // Public methods 870 // 871 872 /** 873 * Sets whether the document entity is standalone. 874 * 875 * @param standalone True if document entity is standalone. 876 */ 877 public void setStandalone(boolean standalone) { 878 fStandalone = standalone; 879 } 880 // setStandalone(boolean) 881 882 /** Returns true if the document entity is standalone. */ 883 public boolean isStandalone() { 884 return fStandalone; 885 } //isStandalone():boolean 886 887 public boolean isDeclaredEntity(String entityName) { 888 889 Entity entity = (Entity)fEntities.get(entityName); 890 return entity != null; 891 } 892 893 public boolean isUnparsedEntity(String entityName) { 894 895 Entity entity = (Entity)fEntities.get(entityName); 896 if (entity == null) { 897 return false; 898 } 899 return entity.isUnparsed(); 900 } 901 902 903 904 // this simply returns the fResourceIdentifier object; 905 // this should only be used with caution by callers that 906 // carefully manage the entity manager's behaviour, so that 907 // this doesn't returning meaningless or misleading data. 908 // @return a reference to the current fResourceIdentifier object 909 public XMLResourceIdentifier getCurrentResourceIdentifier() { 910 return fResourceIdentifier; 911 } 912 913 /** 914 * Sets the entity handler. When an entity starts and ends, the 915 * entity handler is notified of the change. 916 * 917 * @param entityHandler The new entity handler. 918 */ 919 920 public void setEntityHandler(com.sun.org.apache.xerces.internal.impl.XMLEntityHandler entityHandler) { 921 fEntityHandler = (XMLEntityHandler) entityHandler; 922 } // setEntityHandler(XMLEntityHandler) 923 924 //this function returns StaxXMLInputSource 925 public StaxXMLInputSource resolveEntityAsPerStax(XMLResourceIdentifier resourceIdentifier) throws java.io.IOException{ 926 927 if(resourceIdentifier == null ) return null; 928 929 String publicId = resourceIdentifier.getPublicId(); 930 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 931 String baseSystemId = resourceIdentifier.getBaseSystemId(); 932 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 933 // if no base systemId given, assume that it's relative 934 // to the systemId of the current scanned entity 935 // Sometimes the system id is not (properly) expanded. 936 // We need to expand the system id if: 937 // a. the expanded one was null; or 938 // b. the base system id was null, but becomes non-null from the current entity. 939 boolean needExpand = (expandedSystemId == null); 940 // REVISIT: why would the baseSystemId ever be null? if we 941 // didn't have to make this check we wouldn't have to reuse the 942 // fXMLResourceIdentifier object... 943 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 944 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 945 if (baseSystemId != null) 946 needExpand = true; 947 } 948 if (needExpand) 949 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 950 951 // give the entity resolver a chance 952 StaxXMLInputSource staxInputSource = null; 953 XMLInputSource xmlInputSource = null; 954 955 XMLResourceIdentifierImpl ri = null; 956 957 if (resourceIdentifier instanceof XMLResourceIdentifierImpl) { 958 ri = (XMLResourceIdentifierImpl)resourceIdentifier; 959 } else { 960 fResourceIdentifier.clear(); 961 ri = fResourceIdentifier; 962 } 963 ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 964 if(DEBUG_RESOLVER){ 965 System.out.println("BEFORE Calling resolveEntity") ; 966 } 967 968 //either of Stax or Xerces would be null 969 if(fStaxEntityResolver != null){ 970 staxInputSource = fStaxEntityResolver.resolveEntity(ri); 971 } 972 973 if(fEntityResolver != null){ 974 xmlInputSource = fEntityResolver.resolveEntity(ri); 975 } 976 977 if(xmlInputSource != null){ 978 //wrap this XMLInputSource to StaxInputSource 979 staxInputSource = new StaxXMLInputSource(xmlInputSource); 980 } 981 982 // do default resolution 983 //this works for both stax & Xerces, if staxInputSource is null, it means parser need to revert to default resolution 984 if (staxInputSource == null) { 985 // REVISIT: when systemId is null, I think we should return null. 986 // is this the right solution? -SG 987 //if (systemId != null) 988 staxInputSource = new StaxXMLInputSource(new XMLInputSource(publicId, literalSystemId, baseSystemId)); 989 }else if(staxInputSource.hasXMLStreamOrXMLEventReader()){ 990 //Waiting for the clarification from EG. - nb 991 } 992 993 if (DEBUG_RESOLVER) { 994 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 995 System.err.println(" = " + xmlInputSource); 996 } 997 998 return staxInputSource; 999 1000 } 1001 1002 /** 1003 * Resolves the specified public and system identifiers. This 1004 * method first attempts to resolve the entity based on the 1005 * EntityResolver registered by the application. If no entity 1006 * resolver is registered or if the registered entity handler 1007 * is unable to resolve the entity, then default entity 1008 * resolution will occur. 1009 * 1010 * @param publicId The public identifier of the entity. 1011 * @param systemId The system identifier of the entity. 1012 * @param baseSystemId The base system identifier of the entity. 1013 * This is the system identifier of the current 1014 * entity and is used to expand the system 1015 * identifier when the system identifier is a 1016 * relative URI. 1017 * 1018 * @return Returns an input source that wraps the resolved entity. 1019 * This method will never return null. 1020 * 1021 * @throws IOException Thrown on i/o error. 1022 * @throws XNIException Thrown by entity resolver to signal an error. 1023 */ 1024 public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier) throws IOException, XNIException { 1025 if(resourceIdentifier == null ) return null; 1026 String publicId = resourceIdentifier.getPublicId(); 1027 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 1028 String baseSystemId = resourceIdentifier.getBaseSystemId(); 1029 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 1030 String namespace = resourceIdentifier.getNamespace(); 1031 1032 // if no base systemId given, assume that it's relative 1033 // to the systemId of the current scanned entity 1034 // Sometimes the system id is not (properly) expanded. 1035 // We need to expand the system id if: 1036 // a. the expanded one was null; or 1037 // b. the base system id was null, but becomes non-null from the current entity. 1038 boolean needExpand = (expandedSystemId == null); 1039 // REVISIT: why would the baseSystemId ever be null? if we 1040 // didn't have to make this check we wouldn't have to reuse the 1041 // fXMLResourceIdentifier object... 1042 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 1043 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 1044 if (baseSystemId != null) 1045 needExpand = true; 1046 } 1047 if (needExpand) 1048 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 1049 1050 // give the entity resolver a chance 1051 XMLInputSource xmlInputSource = null; 1052 1053 if (fEntityResolver != null) { 1054 resourceIdentifier.setBaseSystemId(baseSystemId); 1055 resourceIdentifier.setExpandedSystemId(expandedSystemId); 1056 xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier); 1057 } 1058 1059 // do default resolution 1060 // REVISIT: what's the correct behavior if the user provided an entity 1061 // resolver (fEntityResolver != null), but resolveEntity doesn't return 1062 // an input source (xmlInputSource == null)? 1063 // do we do default resolution, or do we just return null? -SG 1064 if (xmlInputSource == null) { 1065 // REVISIT: when systemId is null, I think we should return null. 1066 // is this the right solution? -SG 1067 //if (systemId != null) 1068 xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId); 1069 } 1070 1071 if (DEBUG_RESOLVER) { 1072 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1073 System.err.println(" = " + xmlInputSource); 1074 } 1075 1076 return xmlInputSource; 1077 1078 } // resolveEntity(XMLResourceIdentifier):XMLInputSource 1079 1080 /** 1081 * Starts a named entity. 1082 * 1083 * @param entityName The name of the entity to start. 1084 * @param literal True if this entity is started within a literal 1085 * value. 1086 * 1087 * @throws IOException Thrown on i/o error. 1088 * @throws XNIException Thrown by entity handler to signal an error. 1089 */ 1090 public void startEntity(String entityName, boolean literal) 1091 throws IOException, XNIException { 1092 1093 // was entity declared? 1094 Entity entity = (Entity)fEntityStorage.getEntity(entityName); 1095 if (entity == null) { 1096 if (fEntityHandler != null) { 1097 String encoding = null; 1098 fResourceIdentifier.clear(); 1099 fEntityAugs.removeAllItems(); 1100 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1101 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1102 fEntityAugs.removeAllItems(); 1103 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1104 fEntityHandler.endEntity(entityName, fEntityAugs); 1105 } 1106 return; 1107 } 1108 1109 // should we skip external entities? 1110 boolean external = entity.isExternal(); 1111 if (external) { 1112 boolean unparsed = entity.isUnparsed(); 1113 boolean parameter = entityName.startsWith("%"); 1114 boolean general = !parameter; 1115 if (unparsed || (general && !fExternalGeneralEntities) || 1116 (parameter && !fExternalParameterEntities)) { 1117 1118 if (fEntityHandler != null) { 1119 fResourceIdentifier.clear(); 1120 final String encoding = null; 1121 Entity.ExternalEntity externalEntity = (Entity.ExternalEntity)entity; 1122 //REVISIT: since we're storing expandedSystemId in the 1123 // externalEntity, how could this have got here if it wasn't already 1124 // expanded??? - neilg 1125 String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1126 String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1127 String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1128 fResourceIdentifier.setValues( 1129 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1130 extLitSysId, extBaseSysId, expandedSystemId); 1131 fEntityAugs.removeAllItems(); 1132 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1133 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1134 fEntityAugs.removeAllItems(); 1135 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1136 fEntityHandler.endEntity(entityName, fEntityAugs); 1137 } 1138 return; 1139 } 1140 } 1141 1142 // is entity recursive? 1143 int size = fEntityStack.size(); 1144 for (int i = size; i >= 0; i--) { 1145 Entity activeEntity = i == size 1146 ? fCurrentEntity 1147 : (Entity)fEntityStack.elementAt(i); 1148 if (activeEntity.name == entityName) { 1149 String path = entityName; 1150 for (int j = i + 1; j < size; j++) { 1151 activeEntity = (Entity)fEntityStack.elementAt(j); 1152 path = path + " -> " + activeEntity.name; 1153 } 1154 path = path + " -> " + fCurrentEntity.name; 1155 path = path + " -> " + entityName; 1156 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1157 "RecursiveReference", 1158 new Object[] { entityName, path }, 1159 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1160 1161 if (fEntityHandler != null) { 1162 fResourceIdentifier.clear(); 1163 final String encoding = null; 1164 if (external) { 1165 Entity.ExternalEntity externalEntity = (Entity.ExternalEntity)entity; 1166 // REVISIT: for the same reason above... 1167 String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1168 String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1169 String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1170 fResourceIdentifier.setValues( 1171 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1172 extLitSysId, extBaseSysId, expandedSystemId); 1173 } 1174 fEntityAugs.removeAllItems(); 1175 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1176 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1177 fEntityAugs.removeAllItems(); 1178 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1179 fEntityHandler.endEntity(entityName, fEntityAugs); 1180 } 1181 1182 return; 1183 } 1184 } 1185 1186 // resolve external entity 1187 StaxXMLInputSource staxInputSource = null; 1188 XMLInputSource xmlInputSource = null ; 1189 1190 if (external) { 1191 Entity.ExternalEntity externalEntity = (Entity.ExternalEntity)entity; 1192 staxInputSource = resolveEntityAsPerStax(externalEntity.entityLocation); 1193 /** xxx: Waiting from the EG 1194 * //simply return if there was entity resolver registered and application 1195 * //returns either XMLStreamReader or XMLEventReader. 1196 * if(staxInputSource.hasXMLStreamOrXMLEventReader()) return ; 1197 */ 1198 xmlInputSource = staxInputSource.getXMLInputSource() ; 1199 } 1200 // wrap internal entity 1201 else { 1202 Entity.InternalEntity internalEntity = (Entity.InternalEntity)entity; 1203 Reader reader = new StringReader(internalEntity.text); 1204 xmlInputSource = new XMLInputSource(null, null, null, reader, null); 1205 } 1206 1207 // start the entity 1208 startEntity(entityName, xmlInputSource, literal, external); 1209 1210 } // startEntity(String,boolean) 1211 1212 /** 1213 * Starts the document entity. The document entity has the "[xml]" 1214 * pseudo-name. 1215 * 1216 * @param xmlInputSource The input source of the document entity. 1217 * 1218 * @throws IOException Thrown on i/o error. 1219 * @throws XNIException Thrown by entity handler to signal an error. 1220 */ 1221 public void startDocumentEntity(XMLInputSource xmlInputSource) 1222 throws IOException, XNIException { 1223 startEntity(XMLEntity, xmlInputSource, false, true); 1224 } // startDocumentEntity(XMLInputSource) 1225 1226 //xxx these methods are not required. 1227 /** 1228 * Starts the DTD entity. The DTD entity has the "[dtd]" 1229 * pseudo-name. 1230 * 1231 * @param xmlInputSource The input source of the DTD entity. 1232 * 1233 * @throws IOException Thrown on i/o error. 1234 * @throws XNIException Thrown by entity handler to signal an error. 1235 */ 1236 public void startDTDEntity(XMLInputSource xmlInputSource) 1237 throws IOException, XNIException { 1238 startEntity(DTDEntity, xmlInputSource, false, true); 1239 } // startDTDEntity(XMLInputSource) 1240 1241 // indicate start of external subset so that 1242 // location of entity decls can be tracked 1243 public void startExternalSubset() { 1244 fInExternalSubset = true; 1245 } 1246 1247 public void endExternalSubset() { 1248 fInExternalSubset = false; 1249 } 1250 1251 /** 1252 * Starts an entity. 1253 * <p> 1254 * This method can be used to insert an application defined XML 1255 * entity stream into the parsing stream. 1256 * 1257 * @param name The name of the entity. 1258 * @param xmlInputSource The input source of the entity. 1259 * @param literal True if this entity is started within a 1260 * literal value. 1261 * @param isExternal whether this entity should be treated as an internal or external entity. 1262 * 1263 * @throws IOException Thrown on i/o error. 1264 * @throws XNIException Thrown by entity handler to signal an error. 1265 */ 1266 public void startEntity(String name, 1267 XMLInputSource xmlInputSource, 1268 boolean literal, boolean isExternal) 1269 throws IOException, XNIException { 1270 1271 String encoding = setupCurrentEntity(name, xmlInputSource, literal, isExternal); 1272 1273 //when entity expansion limit is set by the Application, we need to 1274 //check for the entity expansion limit set by the parser, if number of entity 1275 //expansions exceeds the entity expansion limit, parser will throw fatal error. 1276 // Note that this represents the nesting level of open entities. 1277 fEntityExpansionCount++; 1278 if( fSecurityManager != null && fEntityExpansionCount > fEntityExpansionLimit ){ 1279 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1280 "EntityExpansionLimitExceeded", 1281 new Object[]{new Integer(fEntityExpansionLimit) }, 1282 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1283 // is there anything better to do than reset the counter? 1284 // at least one can envision debugging applications where this might 1285 // be useful... 1286 fEntityExpansionCount = 0; 1287 } 1288 1289 // call handler 1290 if (fEntityHandler != null) { 1291 fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null); 1292 } 1293 1294 } // startEntity(String,XMLInputSource) 1295 1296 /** 1297 * Return the current entity being scanned. Current entity is SET using startEntity function. 1298 * @return Entity.ScannedEntity 1299 */ 1300 1301 public Entity.ScannedEntity getCurrentEntity(){ 1302 return fCurrentEntity ; 1303 } 1304 1305 /** 1306 * Return the top level entity handled by this manager, or null 1307 * if no entity was added. 1308 */ 1309 public Entity.ScannedEntity getTopLevelEntity() { 1310 return (Entity.ScannedEntity) 1311 (fEntityStack.empty() ? null : fEntityStack.elementAt(0)); 1312 } 1313 1314 1315 /** 1316 * Close all opened InputStreams and Readers opened by this parser. 1317 */ 1318 public void closeReaders() { 1319 /** this call actually does nothing, readers are closed in the endEntity method 1320 * through the current entity. 1321 * The change seems to have happened during the jdk6 development with the 1322 * addition of StAX 1323 **/ 1324 } 1325 1326 public void endEntity() throws IOException, XNIException { 1327 1328 // call handler 1329 if (DEBUG_BUFFER) { 1330 System.out.print("(endEntity: "); 1331 print(); 1332 System.out.println(); 1333 } 1334 //pop the entity from the stack 1335 Entity.ScannedEntity entity = fEntityStack.size() > 0 ? (Entity.ScannedEntity)fEntityStack.pop() : null ; 1336 1337 /** need to close the reader first since the program can end 1338 * prematurely (e.g. fEntityHandler.endEntity may throw exception) 1339 * leaving the reader open 1340 */ 1341 //close the reader 1342 if(fCurrentEntity != null){ 1343 //close the reader 1344 try{ 1345 fCurrentEntity.close(); 1346 }catch(IOException ex){ 1347 throw new XNIException(ex); 1348 } 1349 } 1350 1351 if (fEntityHandler != null) { 1352 //so this is the last opened entity, signal it to current fEntityHandler using Augmentation 1353 if(entity == null){ 1354 fEntityAugs.removeAllItems(); 1355 fEntityAugs.putItem(Constants.LAST_ENTITY, Boolean.TRUE); 1356 fEntityHandler.endEntity(fCurrentEntity.name, fEntityAugs); 1357 fEntityAugs.removeAllItems(); 1358 }else{ 1359 fEntityHandler.endEntity(fCurrentEntity.name, null); 1360 } 1361 } 1362 //check if it is a document entity 1363 boolean documentEntity = fCurrentEntity.name == XMLEntity; 1364 1365 //set popped entity as current entity 1366 fCurrentEntity = entity; 1367 fEntityScanner.setCurrentEntity(fCurrentEntity); 1368 1369 //check if there are any entity left in the stack -- if there are 1370 //no entries EOF has been reached. 1371 // throw exception when it is the last entity but it is not a document entity 1372 1373 if(fCurrentEntity == null & !documentEntity){ 1374 throw new EOFException() ; 1375 } 1376 1377 if (DEBUG_BUFFER) { 1378 System.out.print(")endEntity: "); 1379 print(); 1380 System.out.println(); 1381 } 1382 1383 } // endEntity() 1384 1385 1386 // 1387 // XMLComponent methods 1388 // 1389 public void reset(PropertyManager propertyManager){ 1390 //reset fEntityStorage 1391 fEntityStorage.reset(propertyManager); 1392 //reset XMLEntityReaderImpl 1393 fEntityScanner.reset(propertyManager); 1394 // xerces properties 1395 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 1396 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 1397 try { 1398 fStaxEntityResolver = (StaxEntityResolverWrapper)propertyManager.getProperty(STAX_ENTITY_RESOLVER); 1399 } catch (XMLConfigurationException e) { 1400 fStaxEntityResolver = null; 1401 } 1402 1403 // initialize state 1404 //fStandalone = false; 1405 fEntities.clear(); 1406 fEntityStack.removeAllElements(); 1407 fCurrentEntity = null; 1408 fValidation = false; 1409 fExternalGeneralEntities = true; 1410 fExternalParameterEntities = true; 1411 fAllowJavaEncodings = true ; 1412 1413 //test(); 1414 } 1415 1416 /** 1417 * Resets the component. The component can query the component manager 1418 * about any features and properties that affect the operation of the 1419 * component. 1420 * 1421 * @param componentManager The component manager. 1422 * 1423 * @throws SAXException Thrown by component on initialization error. 1424 * For example, if a feature or property is 1425 * required for the operation of the component, the 1426 * component manager may throw a 1427 * SAXNotRecognizedException or a 1428 * SAXNotSupportedException. 1429 */ 1430 public void reset(XMLComponentManager componentManager) 1431 throws XMLConfigurationException { 1432 1433 boolean parser_settings = componentManager.getFeature(PARSER_SETTINGS, true); 1434 1435 if (!parser_settings) { 1436 // parser settings have not been changed 1437 reset(); 1438 if(fEntityScanner != null){ 1439 fEntityScanner.reset(componentManager); 1440 } 1441 if(fEntityStorage != null){ 1442 fEntityStorage.reset(componentManager); 1443 } 1444 return; 1445 } 1446 1447 // sax features 1448 fValidation = componentManager.getFeature(VALIDATION, false); 1449 fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES, true); 1450 fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES, true); 1451 1452 // xerces features 1453 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 1454 fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF, false); 1455 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 1456 1457 // xerces properties 1458 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 1459 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 1460 fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER, null); 1461 fStaxEntityResolver = (StaxEntityResolverWrapper)componentManager.getProperty(STAX_ENTITY_RESOLVER, null); 1462 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 1463 fSecurityManager = (SecurityManager)componentManager.getProperty(SECURITY_MANAGER, null); 1464 1465 //reset general state 1466 reset(); 1467 1468 fEntityScanner.reset(componentManager); 1469 fEntityStorage.reset(componentManager); 1470 1471 } // reset(XMLComponentManager) 1472 1473 // reset general state. Should not be called other than by 1474 // a class acting as a component manager but not 1475 // implementing that interface for whatever reason. 1476 public void reset() { 1477 fEntityExpansionLimit = (fSecurityManager != null)?fSecurityManager.getEntityExpansionLimit():0; 1478 1479 // initialize state 1480 fStandalone = false; 1481 fEntities.clear(); 1482 fEntityStack.removeAllElements(); 1483 fEntityExpansionCount = 0; 1484 1485 fCurrentEntity = null; 1486 // reset scanner 1487 if(fXML10EntityScanner != null){ 1488 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1489 } 1490 if(fXML11EntityScanner != null) { 1491 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1492 } 1493 1494 // DEBUG 1495 if (DEBUG_ENTITIES) { 1496 addInternalEntity("text", "Hello, World."); 1497 addInternalEntity("empty-element", "<foo/>"); 1498 addInternalEntity("balanced-element", "<foo></foo>"); 1499 addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 1500 addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 1501 addInternalEntity("unbalanced-entity", "<foo>"); 1502 addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 1503 addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 1504 addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 1505 try { 1506 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml"); 1507 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml"); 1508 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml"); 1509 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml"); 1510 } 1511 catch (IOException ex) { 1512 // should never happen 1513 } 1514 } 1515 1516 fEntityHandler = null; 1517 1518 // reset scanner 1519 //if(fEntityScanner!=null) 1520 // fEntityScanner.reset(fSymbolTable, this,fErrorReporter); 1521 1522 } 1523 /** 1524 * Returns a list of feature identifiers that are recognized by 1525 * this component. This method may return null if no features 1526 * are recognized by this component. 1527 */ 1528 public String[] getRecognizedFeatures() { 1529 return (String[])(RECOGNIZED_FEATURES.clone()); 1530 } // getRecognizedFeatures():String[] 1531 1532 /** 1533 * Sets the state of a feature. This method is called by the component 1534 * manager any time after reset when a feature changes state. 1535 * <p> 1536 * <strong>Note:</strong> Components should silently ignore features 1537 * that do not affect the operation of the component. 1538 * 1539 * @param featureId The feature identifier. 1540 * @param state The state of the feature. 1541 * 1542 * @throws SAXNotRecognizedException The component should not throw 1543 * this exception. 1544 * @throws SAXNotSupportedException The component should not throw 1545 * this exception. 1546 */ 1547 public void setFeature(String featureId, boolean state) 1548 throws XMLConfigurationException { 1549 1550 // xerces features 1551 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 1552 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 1553 if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() && 1554 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) { 1555 fAllowJavaEncodings = state; 1556 } 1557 } 1558 1559 } // setFeature(String,boolean) 1560 1561 /** 1562 * Sets the value of a property. This method is called by the component 1563 * manager any time after reset when a property changes value. 1564 * <p> 1565 * <strong>Note:</strong> Components should silently ignore properties 1566 * that do not affect the operation of the component. 1567 * 1568 * @param propertyId The property identifier. 1569 * @param value The value of the property. 1570 * 1571 * @throws SAXNotRecognizedException The component should not throw 1572 * this exception. 1573 * @throws SAXNotSupportedException The component should not throw 1574 * this exception. 1575 */ 1576 public void setProperty(String propertyId, Object value){ 1577 // Xerces properties 1578 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 1579 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 1580 1581 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() && 1582 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) { 1583 fSymbolTable = (SymbolTable)value; 1584 return; 1585 } 1586 if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() && 1587 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) { 1588 fErrorReporter = (XMLErrorReporter)value; 1589 return; 1590 } 1591 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 1592 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 1593 fEntityResolver = (XMLEntityResolver)value; 1594 return; 1595 } 1596 if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() && 1597 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) { 1598 Integer bufferSize = (Integer)value; 1599 if (bufferSize != null && 1600 bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { 1601 fBufferSize = bufferSize.intValue(); 1602 fEntityScanner.setBufferSize(fBufferSize); 1603 fBufferPool.setExternalBufferSize(fBufferSize); 1604 } 1605 } 1606 if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && 1607 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { 1608 fSecurityManager = (SecurityManager)value; 1609 fEntityExpansionLimit = (fSecurityManager != null)?fSecurityManager.getEntityExpansionLimit():0; 1610 } 1611 } 1612 1613 } 1614 /** 1615 * Returns a list of property identifiers that are recognized by 1616 * this component. This method may return null if no properties 1617 * are recognized by this component. 1618 */ 1619 public String[] getRecognizedProperties() { 1620 return (String[])(RECOGNIZED_PROPERTIES.clone()); 1621 } // getRecognizedProperties():String[] 1622 /** 1623 * Returns the default state for a feature, or null if this 1624 * component does not want to report a default value for this 1625 * feature. 1626 * 1627 * @param featureId The feature identifier. 1628 * 1629 * @since Xerces 2.2.0 1630 */ 1631 public Boolean getFeatureDefault(String featureId) { 1632 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 1633 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 1634 return FEATURE_DEFAULTS[i]; 1635 } 1636 } 1637 return null; 1638 } // getFeatureDefault(String):Boolean 1639 1640 /** 1641 * Returns the default state for a property, or null if this 1642 * component does not want to report a default value for this 1643 * property. 1644 * 1645 * @param propertyId The property identifier. 1646 * 1647 * @since Xerces 2.2.0 1648 */ 1649 public Object getPropertyDefault(String propertyId) { 1650 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 1651 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 1652 return PROPERTY_DEFAULTS[i]; 1653 } 1654 } 1655 return null; 1656 } // getPropertyDefault(String):Object 1657 1658 // 1659 // Public static methods 1660 // 1661 1662 /** 1663 * Expands a system id and returns the system id as a URI, if 1664 * it can be expanded. A return value of null means that the 1665 * identifier is already expanded. An exception thrown 1666 * indicates a failure to expand the id. 1667 * 1668 * @param systemId The systemId to be expanded. 1669 * 1670 * @return Returns the URI string representing the expanded system 1671 * identifier. A null value indicates that the given 1672 * system identifier is already expanded. 1673 * 1674 */ 1675 public static String expandSystemId(String systemId) { 1676 return expandSystemId(systemId, null); 1677 } // expandSystemId(String):String 1678 1679 // 1680 // Public static methods 1681 // 1682 1683 // current value of the "user.dir" property 1684 private static String gUserDir; 1685 // cached URI object for the current value of the escaped "user.dir" property stored as a URI 1686 private static URI gUserDirURI; 1687 // which ASCII characters need to be escaped 1688 private static boolean gNeedEscaping[] = new boolean[128]; 1689 // the first hex character if a character needs to be escaped 1690 private static char gAfterEscaping1[] = new char[128]; 1691 // the second hex character if a character needs to be escaped 1692 private static char gAfterEscaping2[] = new char[128]; 1693 private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', 1694 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 1695 // initialize the above 3 arrays 1696 static { 1697 for (int i = 0; i <= 0x1f; i++) { 1698 gNeedEscaping[i] = true; 1699 gAfterEscaping1[i] = gHexChs[i >> 4]; 1700 gAfterEscaping2[i] = gHexChs[i & 0xf]; 1701 } 1702 gNeedEscaping[0x7f] = true; 1703 gAfterEscaping1[0x7f] = '7'; 1704 gAfterEscaping2[0x7f] = 'F'; 1705 char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}', 1706 '|', '\\', '^', '~', '[', ']', '`'}; 1707 int len = escChs.length; 1708 char ch; 1709 for (int i = 0; i < len; i++) { 1710 ch = escChs[i]; 1711 gNeedEscaping[ch] = true; 1712 gAfterEscaping1[ch] = gHexChs[ch >> 4]; 1713 gAfterEscaping2[ch] = gHexChs[ch & 0xf]; 1714 } 1715 } 1716 1717 // To escape the "user.dir" system property, by using %HH to represent 1718 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%' 1719 // and '"'. It's a static method, so needs to be synchronized. 1720 // this method looks heavy, but since the system property isn't expected 1721 // to change often, so in most cases, we only need to return the URI 1722 // that was escaped before. 1723 // According to the URI spec, non-ASCII characters (whose value >= 128) 1724 // need to be escaped too. 1725 // REVISIT: don't know how to escape non-ASCII characters, especially 1726 // which encoding to use. Leave them for now. 1727 private static synchronized URI getUserDir() throws URI.MalformedURIException { 1728 // get the user.dir property 1729 String userDir = ""; 1730 try { 1731 userDir = SecuritySupport.getSystemProperty("user.dir"); 1732 } 1733 catch (SecurityException se) { 1734 } 1735 1736 // return empty string if property value is empty string. 1737 if (userDir.length() == 0) 1738 return new URI("file", "", "", null, null); 1739 // compute the new escaped value if the new property value doesn't 1740 // match the previous one 1741 if (gUserDirURI != null && userDir.equals(gUserDir)) { 1742 return gUserDirURI; 1743 } 1744 1745 // record the new value as the global property value 1746 gUserDir = userDir; 1747 1748 char separator = java.io.File.separatorChar; 1749 userDir = userDir.replace(separator, '/'); 1750 1751 int len = userDir.length(), ch; 1752 StringBuffer buffer = new StringBuffer(len*3); 1753 // change C:/blah to /C:/blah 1754 if (len >= 2 && userDir.charAt(1) == ':') { 1755 ch = Character.toUpperCase(userDir.charAt(0)); 1756 if (ch >= 'A' && ch <= 'Z') { 1757 buffer.append('/'); 1758 } 1759 } 1760 1761 // for each character in the path 1762 int i = 0; 1763 for (; i < len; i++) { 1764 ch = userDir.charAt(i); 1765 // if it's not an ASCII character, break here, and use UTF-8 encoding 1766 if (ch >= 128) 1767 break; 1768 if (gNeedEscaping[ch]) { 1769 buffer.append('%'); 1770 buffer.append(gAfterEscaping1[ch]); 1771 buffer.append(gAfterEscaping2[ch]); 1772 // record the fact that it's escaped 1773 } 1774 else { 1775 buffer.append((char)ch); 1776 } 1777 } 1778 1779 // we saw some non-ascii character 1780 if (i < len) { 1781 // get UTF-8 bytes for the remaining sub-string 1782 byte[] bytes = null; 1783 byte b; 1784 try { 1785 bytes = userDir.substring(i).getBytes("UTF-8"); 1786 } catch (java.io.UnsupportedEncodingException e) { 1787 // should never happen 1788 return new URI("file", "", userDir, null, null); 1789 } 1790 len = bytes.length; 1791 1792 // for each byte 1793 for (i = 0; i < len; i++) { 1794 b = bytes[i]; 1795 // for non-ascii character: make it positive, then escape 1796 if (b < 0) { 1797 ch = b + 256; 1798 buffer.append('%'); 1799 buffer.append(gHexChs[ch >> 4]); 1800 buffer.append(gHexChs[ch & 0xf]); 1801 } 1802 else if (gNeedEscaping[b]) { 1803 buffer.append('%'); 1804 buffer.append(gAfterEscaping1[b]); 1805 buffer.append(gAfterEscaping2[b]); 1806 } 1807 else { 1808 buffer.append((char)b); 1809 } 1810 } 1811 } 1812 1813 // change blah/blah to blah/blah/ 1814 if (!userDir.endsWith("/")) 1815 buffer.append('/'); 1816 1817 gUserDirURI = new URI("file", "", buffer.toString(), null, null); 1818 1819 return gUserDirURI; 1820 } 1821 1822 /** 1823 * Absolutizes a URI using the current value 1824 * of the "user.dir" property as the base URI. If 1825 * the URI is already absolute, this is a no-op. 1826 * 1827 * @param uri the URI to absolutize 1828 */ 1829 public static void absolutizeAgainstUserDir(URI uri) 1830 throws URI.MalformedURIException { 1831 uri.absolutize(getUserDir()); 1832 } 1833 1834 /** 1835 * Expands a system id and returns the system id as a URI, if 1836 * it can be expanded. A return value of null means that the 1837 * identifier is already expanded. An exception thrown 1838 * indicates a failure to expand the id. 1839 * 1840 * @param systemId The systemId to be expanded. 1841 * 1842 * @return Returns the URI string representing the expanded system 1843 * identifier. A null value indicates that the given 1844 * system identifier is already expanded. 1845 * 1846 */ 1847 public static String expandSystemId(String systemId, String baseSystemId) { 1848 1849 // check for bad parameters id 1850 if (systemId == null || systemId.length() == 0) { 1851 return systemId; 1852 } 1853 // if id already expanded, return 1854 try { 1855 URI uri = new URI(systemId); 1856 if (uri != null) { 1857 return systemId; 1858 } 1859 } catch (URI.MalformedURIException e) { 1860 // continue on... 1861 } 1862 // normalize id 1863 String id = fixURI(systemId); 1864 1865 // normalize base 1866 URI base = null; 1867 URI uri = null; 1868 try { 1869 if (baseSystemId == null || baseSystemId.length() == 0 || 1870 baseSystemId.equals(systemId)) { 1871 String dir = getUserDir().toString(); 1872 base = new URI("file", "", dir, null, null); 1873 } else { 1874 try { 1875 base = new URI(fixURI(baseSystemId)); 1876 } catch (URI.MalformedURIException e) { 1877 if (baseSystemId.indexOf(':') != -1) { 1878 // for xml schemas we might have baseURI with 1879 // a specified drive 1880 base = new URI("file", "", fixURI(baseSystemId), null, null); 1881 } else { 1882 String dir = getUserDir().toString(); 1883 dir = dir + fixURI(baseSystemId); 1884 base = new URI("file", "", dir, null, null); 1885 } 1886 } 1887 } 1888 // expand id 1889 uri = new URI(base, id); 1890 } catch (Exception e) { 1891 // let it go through 1892 1893 } 1894 1895 if (uri == null) { 1896 return systemId; 1897 } 1898 return uri.toString(); 1899 1900 } // expandSystemId(String,String):String 1901 1902 /** 1903 * Expands a system id and returns the system id as a URI, if 1904 * it can be expanded. A return value of null means that the 1905 * identifier is already expanded. An exception thrown 1906 * indicates a failure to expand the id. 1907 * 1908 * @param systemId The systemId to be expanded. 1909 * 1910 * @return Returns the URI string representing the expanded system 1911 * identifier. A null value indicates that the given 1912 * system identifier is already expanded. 1913 * 1914 */ 1915 public static String expandSystemId(String systemId, String baseSystemId, 1916 boolean strict) 1917 throws URI.MalformedURIException { 1918 1919 // check if there is a system id before 1920 // trying to expand it. 1921 if (systemId == null) { 1922 return null; 1923 } 1924 1925 // system id has to be a valid URI 1926 if (strict) { 1927 1928 1929 // check if there is a system id before 1930 // trying to expand it. 1931 if (systemId == null) { 1932 return null; 1933 } 1934 1935 try { 1936 // if it's already an absolute one, return it 1937 new URI(systemId); 1938 return systemId; 1939 } 1940 catch (URI.MalformedURIException ex) { 1941 } 1942 URI base = null; 1943 // if there isn't a base uri, use the working directory 1944 if (baseSystemId == null || baseSystemId.length() == 0) { 1945 base = new URI("file", "", getUserDir().toString(), null, null); 1946 } 1947 // otherwise, use the base uri 1948 else { 1949 try { 1950 base = new URI(baseSystemId); 1951 } 1952 catch (URI.MalformedURIException e) { 1953 // assume "base" is also a relative uri 1954 String dir = getUserDir().toString(); 1955 dir = dir + baseSystemId; 1956 base = new URI("file", "", dir, null, null); 1957 } 1958 } 1959 // absolutize the system id using the base 1960 URI uri = new URI(base, systemId); 1961 // return the string rep of the new uri (an absolute one) 1962 return uri.toString(); 1963 1964 // if any exception is thrown, it'll get thrown to the caller. 1965 } 1966 1967 // Assume the URIs are well-formed. If it turns out they're not, try fixing them up. 1968 try { 1969 return expandSystemIdStrictOff(systemId, baseSystemId); 1970 } 1971 catch (URI.MalformedURIException e) { 1972 /** Xerces URI rejects unicode, try java.net.URI 1973 * this is not ideal solution, but it covers known cases which either 1974 * Xerces URI or java.net.URI can handle alone 1975 * will file bug against java.net.URI 1976 */ 1977 try { 1978 return expandSystemIdStrictOff1(systemId, baseSystemId); 1979 } catch (URISyntaxException ex) { 1980 // continue on... 1981 } 1982 } 1983 // check for bad parameters id 1984 if (systemId.length() == 0) { 1985 return systemId; 1986 } 1987 1988 // normalize id 1989 String id = fixURI(systemId); 1990 1991 // normalize base 1992 URI base = null; 1993 URI uri = null; 1994 try { 1995 if (baseSystemId == null || baseSystemId.length() == 0 || 1996 baseSystemId.equals(systemId)) { 1997 base = getUserDir(); 1998 } 1999 else { 2000 try { 2001 base = new URI(fixURI(baseSystemId).trim()); 2002 } 2003 catch (URI.MalformedURIException e) { 2004 if (baseSystemId.indexOf(':') != -1) { 2005 // for xml schemas we might have baseURI with 2006 // a specified drive 2007 base = new URI("file", "", fixURI(baseSystemId).trim(), null, null); 2008 } 2009 else { 2010 base = new URI(getUserDir(), fixURI(baseSystemId)); 2011 } 2012 } 2013 } 2014 // expand id 2015 uri = new URI(base, id.trim()); 2016 } 2017 catch (Exception e) { 2018 // let it go through 2019 2020 } 2021 2022 if (uri == null) { 2023 return systemId; 2024 } 2025 return uri.toString(); 2026 2027 } // expandSystemId(String,String,boolean):String 2028 2029 /** 2030 * Helper method for expandSystemId(String,String,boolean):String 2031 */ 2032 private static String expandSystemIdStrictOn(String systemId, String baseSystemId) 2033 throws URI.MalformedURIException { 2034 2035 URI systemURI = new URI(systemId, true); 2036 // If it's already an absolute one, return it 2037 if (systemURI.isAbsoluteURI()) { 2038 return systemId; 2039 } 2040 2041 // If there isn't a base URI, use the working directory 2042 URI baseURI = null; 2043 if (baseSystemId == null || baseSystemId.length() == 0) { 2044 baseURI = getUserDir(); 2045 } 2046 else { 2047 baseURI = new URI(baseSystemId, true); 2048 if (!baseURI.isAbsoluteURI()) { 2049 // assume "base" is also a relative uri 2050 baseURI.absolutize(getUserDir()); 2051 } 2052 } 2053 2054 // absolutize the system identifier using the base URI 2055 systemURI.absolutize(baseURI); 2056 2057 // return the string rep of the new uri (an absolute one) 2058 return systemURI.toString(); 2059 2060 // if any exception is thrown, it'll get thrown to the caller. 2061 2062 } // expandSystemIdStrictOn(String,String):String 2063 2064 /** 2065 * Attempt to set whether redirects will be followed for an <code>HttpURLConnection</code>. 2066 * This may fail on earlier JDKs which do not support setting this preference. 2067 */ 2068 public static void setInstanceFollowRedirects(HttpURLConnection urlCon, boolean followRedirects) { 2069 try { 2070 Method method = HttpURLConnection.class.getMethod("setInstanceFollowRedirects", new Class[] {Boolean.TYPE}); 2071 method.invoke(urlCon, new Object[] {followRedirects ? Boolean.TRUE : Boolean.FALSE}); 2072 } 2073 // setInstanceFollowRedirects doesn't exist. 2074 catch (Exception exc) {} 2075 } 2076 2077 2078 /** 2079 * Helper method for expandSystemId(String,String,boolean):String 2080 */ 2081 private static String expandSystemIdStrictOff(String systemId, String baseSystemId) 2082 throws URI.MalformedURIException { 2083 2084 URI systemURI = new URI(systemId, true); 2085 // If it's already an absolute one, return it 2086 if (systemURI.isAbsoluteURI()) { 2087 if (systemURI.getScheme().length() > 1) { 2088 return systemId; 2089 } 2090 /** 2091 * If the scheme's length is only one character, 2092 * it's likely that this was intended as a file 2093 * path. Fixing this up in expandSystemId to 2094 * maintain backwards compatibility. 2095 */ 2096 throw new URI.MalformedURIException(); 2097 } 2098 2099 // If there isn't a base URI, use the working directory 2100 URI baseURI = null; 2101 if (baseSystemId == null || baseSystemId.length() == 0) { 2102 baseURI = getUserDir(); 2103 } 2104 else { 2105 baseURI = new URI(baseSystemId, true); 2106 if (!baseURI.isAbsoluteURI()) { 2107 // assume "base" is also a relative uri 2108 baseURI.absolutize(getUserDir()); 2109 } 2110 } 2111 2112 // absolutize the system identifier using the base URI 2113 systemURI.absolutize(baseURI); 2114 2115 // return the string rep of the new uri (an absolute one) 2116 return systemURI.toString(); 2117 2118 // if any exception is thrown, it'll get thrown to the caller. 2119 2120 } // expandSystemIdStrictOff(String,String):String 2121 2122 private static String expandSystemIdStrictOff1(String systemId, String baseSystemId) 2123 throws URISyntaxException, URI.MalformedURIException { 2124 2125 java.net.URI systemURI = new java.net.URI(systemId); 2126 // If it's already an absolute one, return it 2127 if (systemURI.isAbsolute()) { 2128 if (systemURI.getScheme().length() > 1) { 2129 return systemId; 2130 } 2131 /** 2132 * If the scheme's length is only one character, 2133 * it's likely that this was intended as a file 2134 * path. Fixing this up in expandSystemId to 2135 * maintain backwards compatibility. 2136 */ 2137 throw new URISyntaxException(systemId, "the scheme's length is only one character"); 2138 } 2139 2140 // If there isn't a base URI, use the working directory 2141 URI baseURI = null; 2142 if (baseSystemId == null || baseSystemId.length() == 0) { 2143 baseURI = getUserDir(); 2144 } 2145 else { 2146 baseURI = new URI(baseSystemId, true); 2147 if (!baseURI.isAbsoluteURI()) { 2148 // assume "base" is also a relative uri 2149 baseURI.absolutize(getUserDir()); 2150 } 2151 } 2152 2153 // absolutize the system identifier using the base URI 2154 // systemURI.absolutize(baseURI); 2155 systemURI = (new java.net.URI(baseURI.toString())).resolve(systemURI); 2156 2157 // return the string rep of the new uri (an absolute one) 2158 return systemURI.toString(); 2159 2160 // if any exception is thrown, it'll get thrown to the caller. 2161 2162 } // expandSystemIdStrictOff(String,String):String 2163 2164 // 2165 // Protected methods 2166 // 2167 2168 2169 /** 2170 * Returns the IANA encoding name that is auto-detected from 2171 * the bytes specified, with the endian-ness of that encoding where appropriate. 2172 * 2173 * @param b4 The first four bytes of the input. 2174 * @param count The number of bytes actually read. 2175 * @return a 2-element array: the first element, an IANA-encoding string, 2176 * the second element a Boolean which is true iff the document is big endian, false 2177 * if it's little-endian, and null if the distinction isn't relevant. 2178 */ 2179 protected Object[] getEncodingName(byte[] b4, int count) { 2180 2181 if (count < 2) { 2182 return defaultEncoding; 2183 } 2184 2185 // UTF-16, with BOM 2186 int b0 = b4[0] & 0xFF; 2187 int b1 = b4[1] & 0xFF; 2188 if (b0 == 0xFE && b1 == 0xFF) { 2189 // UTF-16, big-endian 2190 return new Object [] {"UTF-16BE", new Boolean(true)}; 2191 } 2192 if (b0 == 0xFF && b1 == 0xFE) { 2193 // UTF-16, little-endian 2194 return new Object [] {"UTF-16LE", new Boolean(false)}; 2195 } 2196 2197 // default to UTF-8 if we don't have enough bytes to make a 2198 // good determination of the encoding 2199 if (count < 3) { 2200 return defaultEncoding; 2201 } 2202 2203 // UTF-8 with a BOM 2204 int b2 = b4[2] & 0xFF; 2205 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 2206 return defaultEncoding; 2207 } 2208 2209 // default to UTF-8 if we don't have enough bytes to make a 2210 // good determination of the encoding 2211 if (count < 4) { 2212 return defaultEncoding; 2213 } 2214 2215 // other encodings 2216 int b3 = b4[3] & 0xFF; 2217 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2218 // UCS-4, big endian (1234) 2219 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2220 } 2221 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2222 // UCS-4, little endian (4321) 2223 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2224 } 2225 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2226 // UCS-4, unusual octet order (2143) 2227 // REVISIT: What should this be? 2228 return new Object [] {"ISO-10646-UCS-4", null}; 2229 } 2230 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2231 // UCS-4, unusual octect order (3412) 2232 // REVISIT: What should this be? 2233 return new Object [] {"ISO-10646-UCS-4", null}; 2234 } 2235 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2236 // UTF-16, big-endian, no BOM 2237 // (or could turn out to be UCS-2... 2238 // REVISIT: What should this be? 2239 return new Object [] {"UTF-16BE", new Boolean(true)}; 2240 } 2241 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2242 // UTF-16, little-endian, no BOM 2243 // (or could turn out to be UCS-2... 2244 return new Object [] {"UTF-16LE", new Boolean(false)}; 2245 } 2246 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2247 // EBCDIC 2248 // a la xerces1, return CP037 instead of EBCDIC here 2249 return new Object [] {"CP037", null}; 2250 } 2251 2252 return defaultEncoding; 2253 2254 } // getEncodingName(byte[],int):Object[] 2255 2256 /** 2257 * Creates a reader capable of reading the given input stream in 2258 * the specified encoding. 2259 * 2260 * @param inputStream The input stream. 2261 * @param encoding The encoding name that the input stream is 2262 * encoded using. If the user has specified that 2263 * Java encoding names are allowed, then the 2264 * encoding name may be a Java encoding name; 2265 * otherwise, it is an ianaEncoding name. 2266 * @param isBigEndian For encodings (like uCS-4), whose names cannot 2267 * specify a byte order, this tells whether the order is bigEndian. null menas 2268 * unknown or not relevant. 2269 * 2270 * @return Returns a reader. 2271 */ 2272 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 2273 throws IOException { 2274 2275 // normalize encoding name 2276 if (encoding == null) { 2277 encoding = "UTF-8"; 2278 } 2279 2280 // try to use an optimized reader 2281 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 2282 if (ENCODING.equals("UTF-8")) { 2283 if (DEBUG_ENCODINGS) { 2284 System.out.println("$$$ creating UTF8Reader"); 2285 } 2286 return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 2287 } 2288 if (ENCODING.equals("US-ASCII")) { 2289 if (DEBUG_ENCODINGS) { 2290 System.out.println("$$$ creating ASCIIReader"); 2291 } 2292 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 2293 } 2294 if(ENCODING.equals("ISO-10646-UCS-4")) { 2295 if(isBigEndian != null) { 2296 boolean isBE = isBigEndian.booleanValue(); 2297 if(isBE) { 2298 return new UCSReader(inputStream, UCSReader.UCS4BE); 2299 } else { 2300 return new UCSReader(inputStream, UCSReader.UCS4LE); 2301 } 2302 } else { 2303 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2304 "EncodingByteOrderUnsupported", 2305 new Object[] { encoding }, 2306 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2307 } 2308 } 2309 if(ENCODING.equals("ISO-10646-UCS-2")) { 2310 if(isBigEndian != null) { // sould never happen with this encoding... 2311 boolean isBE = isBigEndian.booleanValue(); 2312 if(isBE) { 2313 return new UCSReader(inputStream, UCSReader.UCS2BE); 2314 } else { 2315 return new UCSReader(inputStream, UCSReader.UCS2LE); 2316 } 2317 } else { 2318 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2319 "EncodingByteOrderUnsupported", 2320 new Object[] { encoding }, 2321 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2322 } 2323 } 2324 2325 // check for valid name 2326 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 2327 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 2328 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 2329 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2330 "EncodingDeclInvalid", 2331 new Object[] { encoding }, 2332 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2333 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 2334 // because every byte is a valid ISO Latin 1 character. 2335 // It may not translate correctly but if we failed on 2336 // the encoding anyway, then we're expecting the content 2337 // of the document to be bad. This will just prevent an 2338 // invalid UTF-8 sequence to be detected. This is only 2339 // important when continue-after-fatal-error is turned 2340 // on. -Ac 2341 encoding = "ISO-8859-1"; 2342 } 2343 2344 // try to use a Java reader 2345 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 2346 if (javaEncoding == null) { 2347 if(fAllowJavaEncodings) { 2348 javaEncoding = encoding; 2349 } else { 2350 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2351 "EncodingDeclInvalid", 2352 new Object[] { encoding }, 2353 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2354 // see comment above. 2355 javaEncoding = "ISO8859_1"; 2356 } 2357 } 2358 if (DEBUG_ENCODINGS) { 2359 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 2360 if (javaEncoding == encoding) { 2361 System.out.print(" (IANA encoding)"); 2362 } 2363 System.out.println(); 2364 } 2365 return new BufferedReader( new InputStreamReader(inputStream, javaEncoding)); 2366 2367 } // createReader(InputStream,String, Boolean): Reader 2368 2369 2370 /** 2371 * Return the public identifier for the current document event. 2372 * <p> 2373 * The return value is the public identifier of the document 2374 * entity or of the external parsed entity in which the markup 2375 * triggering the event appears. 2376 * 2377 * @return A string containing the public identifier, or 2378 * null if none is available. 2379 */ 2380 public String getPublicId() { 2381 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 2382 } // getPublicId():String 2383 2384 /** 2385 * Return the expanded system identifier for the current document event. 2386 * <p> 2387 * The return value is the expanded system identifier of the document 2388 * entity or of the external parsed entity in which the markup 2389 * triggering the event appears. 2390 * <p> 2391 * If the system identifier is a URL, the parser must resolve it 2392 * fully before passing it to the application. 2393 * 2394 * @return A string containing the expanded system identifier, or null 2395 * if none is available. 2396 */ 2397 public String getExpandedSystemId() { 2398 if (fCurrentEntity != null) { 2399 if (fCurrentEntity.entityLocation != null && 2400 fCurrentEntity.entityLocation.getExpandedSystemId() != null ) { 2401 return fCurrentEntity.entityLocation.getExpandedSystemId(); 2402 } else { 2403 // search for the first external entity on the stack 2404 int size = fEntityStack.size(); 2405 for (int i = size - 1; i >= 0 ; i--) { 2406 Entity.ScannedEntity externalEntity = 2407 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2408 2409 if (externalEntity.entityLocation != null && 2410 externalEntity.entityLocation.getExpandedSystemId() != null) { 2411 return externalEntity.entityLocation.getExpandedSystemId(); 2412 } 2413 } 2414 } 2415 } 2416 return null; 2417 } // getExpandedSystemId():String 2418 2419 /** 2420 * Return the literal system identifier for the current document event. 2421 * <p> 2422 * The return value is the literal system identifier of the document 2423 * entity or of the external parsed entity in which the markup 2424 * triggering the event appears. 2425 * <p> 2426 * @return A string containing the literal system identifier, or null 2427 * if none is available. 2428 */ 2429 public String getLiteralSystemId() { 2430 if (fCurrentEntity != null) { 2431 if (fCurrentEntity.entityLocation != null && 2432 fCurrentEntity.entityLocation.getLiteralSystemId() != null ) { 2433 return fCurrentEntity.entityLocation.getLiteralSystemId(); 2434 } else { 2435 // search for the first external entity on the stack 2436 int size = fEntityStack.size(); 2437 for (int i = size - 1; i >= 0 ; i--) { 2438 Entity.ScannedEntity externalEntity = 2439 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2440 2441 if (externalEntity.entityLocation != null && 2442 externalEntity.entityLocation.getLiteralSystemId() != null) { 2443 return externalEntity.entityLocation.getLiteralSystemId(); 2444 } 2445 } 2446 } 2447 } 2448 return null; 2449 } // getLiteralSystemId():String 2450 2451 /** 2452 * Return the line number where the current document event ends. 2453 * <p> 2454 * <strong>Warning:</strong> The return value from the method 2455 * is intended only as an approximation for the sake of error 2456 * reporting; it is not intended to provide sufficient information 2457 * to edit the character content of the original XML document. 2458 * <p> 2459 * The return value is an approximation of the line number 2460 * in the document entity or external parsed entity where the 2461 * markup triggering the event appears. 2462 * <p> 2463 * If possible, the SAX driver should provide the line position 2464 * of the first character after the text associated with the document 2465 * event. The first line in the document is line 1. 2466 * 2467 * @return The line number, or -1 if none is available. 2468 */ 2469 public int getLineNumber() { 2470 if (fCurrentEntity != null) { 2471 if (fCurrentEntity.isExternal()) { 2472 return fCurrentEntity.lineNumber; 2473 } else { 2474 // search for the first external entity on the stack 2475 int size = fEntityStack.size(); 2476 for (int i=size-1; i>0 ; i--) { 2477 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2478 if (firstExternalEntity.isExternal()) { 2479 return firstExternalEntity.lineNumber; 2480 } 2481 } 2482 } 2483 } 2484 2485 return -1; 2486 2487 } // getLineNumber():int 2488 2489 /** 2490 * Return the column number where the current document event ends. 2491 * <p> 2492 * <strong>Warning:</strong> The return value from the method 2493 * is intended only as an approximation for the sake of error 2494 * reporting; it is not intended to provide sufficient information 2495 * to edit the character content of the original XML document. 2496 * <p> 2497 * The return value is an approximation of the column number 2498 * in the document entity or external parsed entity where the 2499 * markup triggering the event appears. 2500 * <p> 2501 * If possible, the SAX driver should provide the line position 2502 * of the first character after the text associated with the document 2503 * event. 2504 * <p> 2505 * If possible, the SAX driver should provide the line position 2506 * of the first character after the text associated with the document 2507 * event. The first column in each line is column 1. 2508 * 2509 * @return The column number, or -1 if none is available. 2510 */ 2511 public int getColumnNumber() { 2512 if (fCurrentEntity != null) { 2513 if (fCurrentEntity.isExternal()) { 2514 return fCurrentEntity.columnNumber; 2515 } else { 2516 // search for the first external entity on the stack 2517 int size = fEntityStack.size(); 2518 for (int i=size-1; i>0 ; i--) { 2519 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2520 if (firstExternalEntity.isExternal()) { 2521 return firstExternalEntity.columnNumber; 2522 } 2523 } 2524 } 2525 } 2526 2527 return -1; 2528 } // getColumnNumber():int 2529 2530 2531 // 2532 // Protected static methods 2533 // 2534 2535 /** 2536 * Fixes a platform dependent filename to standard URI form. 2537 * 2538 * @param str The string to fix. 2539 * 2540 * @return Returns the fixed URI string. 2541 */ 2542 protected static String fixURI(String str) { 2543 2544 // handle platform dependent strings 2545 str = str.replace(java.io.File.separatorChar, '/'); 2546 2547 // Windows fix 2548 if (str.length() >= 2) { 2549 char ch1 = str.charAt(1); 2550 // change "C:blah" to "/C:blah" 2551 if (ch1 == ':') { 2552 char ch0 = Character.toUpperCase(str.charAt(0)); 2553 if (ch0 >= 'A' && ch0 <= 'Z') { 2554 str = "/" + str; 2555 } 2556 } 2557 // change "//blah" to "file://blah" 2558 else if (ch1 == '/' && str.charAt(0) == '/') { 2559 str = "file:" + str; 2560 } 2561 } 2562 2563 // replace spaces in file names with %20. 2564 // Original comment from JDK5: the following algorithm might not be 2565 // very performant, but people who want to use invalid URI's have to 2566 // pay the price. 2567 int pos = str.indexOf(' '); 2568 if (pos >= 0) { 2569 StringBuilder sb = new StringBuilder(str.length()); 2570 // put characters before ' ' into the string builder 2571 for (int i = 0; i < pos; i++) 2572 sb.append(str.charAt(i)); 2573 // and %20 for the space 2574 sb.append("%20"); 2575 // for the remamining part, also convert ' ' to "%20". 2576 for (int i = pos+1; i < str.length(); i++) { 2577 if (str.charAt(i) == ' ') 2578 sb.append("%20"); 2579 else 2580 sb.append(str.charAt(i)); 2581 } 2582 str = sb.toString(); 2583 } 2584 2585 // done 2586 return str; 2587 2588 } // fixURI(String):String 2589 2590 2591 // 2592 // Package visible methods 2593 // 2594 /** Prints the contents of the buffer. */ 2595 final void print() { 2596 if (DEBUG_BUFFER) { 2597 if (fCurrentEntity != null) { 2598 System.out.print('['); 2599 System.out.print(fCurrentEntity.count); 2600 System.out.print(' '); 2601 System.out.print(fCurrentEntity.position); 2602 if (fCurrentEntity.count > 0) { 2603 System.out.print(" \""); 2604 for (int i = 0; i < fCurrentEntity.count; i++) { 2605 if (i == fCurrentEntity.position) { 2606 System.out.print('^'); 2607 } 2608 char c = fCurrentEntity.ch[i]; 2609 switch (c) { 2610 case '\n': { 2611 System.out.print("\\n"); 2612 break; 2613 } 2614 case '\r': { 2615 System.out.print("\\r"); 2616 break; 2617 } 2618 case '\t': { 2619 System.out.print("\\t"); 2620 break; 2621 } 2622 case '\\': { 2623 System.out.print("\\\\"); 2624 break; 2625 } 2626 default: { 2627 System.out.print(c); 2628 } 2629 } 2630 } 2631 if (fCurrentEntity.position == fCurrentEntity.count) { 2632 System.out.print('^'); 2633 } 2634 System.out.print('"'); 2635 } 2636 System.out.print(']'); 2637 System.out.print(" @ "); 2638 System.out.print(fCurrentEntity.lineNumber); 2639 System.out.print(','); 2640 System.out.print(fCurrentEntity.columnNumber); 2641 } else { 2642 System.out.print("*NO CURRENT ENTITY*"); 2643 } 2644 } 2645 } // print() 2646 2647 /** 2648 * Buffer used in entity manager to reuse character arrays instead 2649 * of creating new ones every time. 2650 * 2651 * @xerces.internal 2652 * 2653 * @author Ankit Pasricha, IBM 2654 */ 2655 private static class CharacterBuffer { 2656 2657 /** character buffer */ 2658 private char[] ch; 2659 2660 /** whether the buffer is for an external or internal scanned entity */ 2661 private boolean isExternal; 2662 2663 public CharacterBuffer(boolean isExternal, int size) { 2664 this.isExternal = isExternal; 2665 ch = new char[size]; 2666 } 2667 } 2668 2669 2670 /** 2671 * Stores a number of character buffers and provides it to the entity 2672 * manager to use when an entity is seen. 2673 * 2674 * @xerces.internal 2675 * 2676 * @author Ankit Pasricha, IBM 2677 */ 2678 private static class CharacterBufferPool { 2679 2680 private static final int DEFAULT_POOL_SIZE = 3; 2681 2682 private CharacterBuffer[] fInternalBufferPool; 2683 private CharacterBuffer[] fExternalBufferPool; 2684 2685 private int fExternalBufferSize; 2686 private int fInternalBufferSize; 2687 private int poolSize; 2688 2689 private int fInternalTop; 2690 private int fExternalTop; 2691 2692 public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { 2693 this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); 2694 } 2695 2696 public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { 2697 fExternalBufferSize = externalBufferSize; 2698 fInternalBufferSize = internalBufferSize; 2699 this.poolSize = poolSize; 2700 init(); 2701 } 2702 2703 /** Initializes buffer pool. **/ 2704 private void init() { 2705 fInternalBufferPool = new CharacterBuffer[poolSize]; 2706 fExternalBufferPool = new CharacterBuffer[poolSize]; 2707 fInternalTop = -1; 2708 fExternalTop = -1; 2709 } 2710 2711 /** Retrieves buffer from pool. **/ 2712 public CharacterBuffer getBuffer(boolean external) { 2713 if (external) { 2714 if (fExternalTop > -1) { 2715 return (CharacterBuffer)fExternalBufferPool[fExternalTop--]; 2716 } 2717 else { 2718 return new CharacterBuffer(true, fExternalBufferSize); 2719 } 2720 } 2721 else { 2722 if (fInternalTop > -1) { 2723 return (CharacterBuffer)fInternalBufferPool[fInternalTop--]; 2724 } 2725 else { 2726 return new CharacterBuffer(false, fInternalBufferSize); 2727 } 2728 } 2729 } 2730 2731 /** Returns buffer to pool. **/ 2732 public void returnToPool(CharacterBuffer buffer) { 2733 if (buffer.isExternal) { 2734 if (fExternalTop < fExternalBufferPool.length - 1) { 2735 fExternalBufferPool[++fExternalTop] = buffer; 2736 } 2737 } 2738 else if (fInternalTop < fInternalBufferPool.length - 1) { 2739 fInternalBufferPool[++fInternalTop] = buffer; 2740 } 2741 } 2742 2743 /** Sets the size of external buffers and dumps the old pool. **/ 2744 public void setExternalBufferSize(int bufferSize) { 2745 fExternalBufferSize = bufferSize; 2746 fExternalBufferPool = new CharacterBuffer[poolSize]; 2747 fExternalTop = -1; 2748 } 2749 } 2750 2751 /** 2752 * This class wraps the byte inputstreams we're presented with. 2753 * We need it because java.io.InputStreams don't provide 2754 * functionality to reread processed bytes, and they have a habit 2755 * of reading more than one character when you call their read() 2756 * methods. This means that, once we discover the true (declared) 2757 * encoding of a document, we can neither backtrack to read the 2758 * whole doc again nor start reading where we are with a new 2759 * reader. 2760 * 2761 * This class allows rewinding an inputStream by allowing a mark 2762 * to be set, and the stream reset to that position. <strong>The 2763 * class assumes that it needs to read one character per 2764 * invocation when it's read() method is inovked, but uses the 2765 * underlying InputStream's read(char[], offset length) method--it 2766 * won't buffer data read this way!</strong> 2767 * 2768 * @xerces.internal 2769 * 2770 * @author Neil Graham, IBM 2771 * @author Glenn Marcy, IBM 2772 */ 2773 2774 protected final class RewindableInputStream extends InputStream { 2775 2776 private InputStream fInputStream; 2777 private byte[] fData; 2778 private int fStartOffset; 2779 private int fEndOffset; 2780 private int fOffset; 2781 private int fLength; 2782 private int fMark; 2783 2784 public RewindableInputStream(InputStream is) { 2785 fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE]; 2786 fInputStream = is; 2787 fStartOffset = 0; 2788 fEndOffset = -1; 2789 fOffset = 0; 2790 fLength = 0; 2791 fMark = 0; 2792 } 2793 2794 public void setStartOffset(int offset) { 2795 fStartOffset = offset; 2796 } 2797 2798 public void rewind() { 2799 fOffset = fStartOffset; 2800 } 2801 2802 public int read() throws IOException { 2803 int b = 0; 2804 if (fOffset < fLength) { 2805 return fData[fOffset++] & 0xff; 2806 } 2807 if (fOffset == fEndOffset) { 2808 return -1; 2809 } 2810 if (fOffset == fData.length) { 2811 byte[] newData = new byte[fOffset << 1]; 2812 System.arraycopy(fData, 0, newData, 0, fOffset); 2813 fData = newData; 2814 } 2815 b = fInputStream.read(); 2816 if (b == -1) { 2817 fEndOffset = fOffset; 2818 return -1; 2819 } 2820 fData[fLength++] = (byte)b; 2821 fOffset++; 2822 return b & 0xff; 2823 } 2824 2825 public int read(byte[] b, int off, int len) throws IOException { 2826 int bytesLeft = fLength - fOffset; 2827 if (bytesLeft == 0) { 2828 if (fOffset == fEndOffset) { 2829 return -1; 2830 } 2831 2832 /** 2833 * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); 2834 * //System.out.println("fInputStream = " + fInputStream ); 2835 * // better get some more for the voracious reader... */ 2836 2837 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { 2838 2839 if (!fCurrentEntity.xmlDeclChunkRead) 2840 { 2841 fCurrentEntity.xmlDeclChunkRead = true; 2842 len = fCurrentEntity.DEFAULT_XMLDECL_BUFFER_SIZE; 2843 } 2844 return fInputStream.read(b, off, len); 2845 } 2846 2847 int returnedVal = read(); 2848 if(returnedVal == -1) { 2849 fEndOffset = fOffset; 2850 return -1; 2851 } 2852 b[off] = (byte)returnedVal; 2853 return 1; 2854 2855 } 2856 if (len < bytesLeft) { 2857 if (len <= 0) { 2858 return 0; 2859 } 2860 } else { 2861 len = bytesLeft; 2862 } 2863 if (b != null) { 2864 System.arraycopy(fData, fOffset, b, off, len); 2865 } 2866 fOffset += len; 2867 return len; 2868 } 2869 2870 public long skip(long n) 2871 throws IOException { 2872 int bytesLeft; 2873 if (n <= 0) { 2874 return 0; 2875 } 2876 bytesLeft = fLength - fOffset; 2877 if (bytesLeft == 0) { 2878 if (fOffset == fEndOffset) { 2879 return 0; 2880 } 2881 return fInputStream.skip(n); 2882 } 2883 if (n <= bytesLeft) { 2884 fOffset += n; 2885 return n; 2886 } 2887 fOffset += bytesLeft; 2888 if (fOffset == fEndOffset) { 2889 return bytesLeft; 2890 } 2891 n -= bytesLeft; 2892 /* 2893 * In a manner of speaking, when this class isn't permitting more 2894 * than one byte at a time to be read, it is "blocking". The 2895 * available() method should indicate how much can be read without 2896 * blocking, so while we're in this mode, it should only indicate 2897 * that bytes in its buffer are available; otherwise, the result of 2898 * available() on the underlying InputStream is appropriate. 2899 */ 2900 return fInputStream.skip(n) + bytesLeft; 2901 } 2902 2903 public int available() throws IOException { 2904 int bytesLeft = fLength - fOffset; 2905 if (bytesLeft == 0) { 2906 if (fOffset == fEndOffset) { 2907 return -1; 2908 } 2909 return fCurrentEntity.mayReadChunks ? fInputStream.available() 2910 : 0; 2911 } 2912 return bytesLeft; 2913 } 2914 2915 public void mark(int howMuch) { 2916 fMark = fOffset; 2917 } 2918 2919 public void reset() { 2920 fOffset = fMark; 2921 //test(); 2922 } 2923 2924 public boolean markSupported() { 2925 return true; 2926 } 2927 2928 public void close() throws IOException { 2929 if (fInputStream != null) { 2930 fInputStream.close(); 2931 fInputStream = null; 2932 } 2933 } 2934 } // end of RewindableInputStream class 2935 2936 public void test(){ 2937 //System.out.println("TESTING: Added familytree to entityManager"); 2938 //Usecase1 2939 fEntityStorage.addExternalEntity("entityUsecase1",null, 2940 "/space/home/stax/sun/6thJan2004/zephyr/data/test.txt", 2941 "/space/home/stax/sun/6thJan2004/zephyr/data/entity.xml"); 2942 2943 //Usecase2 2944 fEntityStorage.addInternalEntity("entityUsecase2","<Test>value</Test>"); 2945 fEntityStorage.addInternalEntity("entityUsecase3","value3"); 2946 fEntityStorage.addInternalEntity("text", "Hello World."); 2947 fEntityStorage.addInternalEntity("empty-element", "<foo/>"); 2948 fEntityStorage.addInternalEntity("balanced-element", "<foo></foo>"); 2949 fEntityStorage.addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 2950 fEntityStorage.addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 2951 fEntityStorage.addInternalEntity("unbalanced-entity", "<foo>"); 2952 fEntityStorage.addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 2953 fEntityStorage.addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 2954 fEntityStorage.addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 2955 fEntityStorage.addInternalEntity("ch","©"); 2956 fEntityStorage.addInternalEntity("ch1","T"); 2957 fEntityStorage.addInternalEntity("% ch2","param"); 2958 } 2959 2960 } // class XMLEntityManager