1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl ; 22 23 import com.sun.xml.internal.stream.StaxEntityResolverWrapper; 24 import com.sun.xml.internal.stream.StaxXMLInputSource; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import java.io.*; 27 import java.io.BufferedReader; 28 import java.util.*; 29 30 import java.io.IOException; 31 import java.io.InputStream; 32 import java.io.InputStreamReader; 33 import java.io.Reader; 34 import java.io.StringReader; 35 import java.lang.reflect.Method; 36 import java.net.HttpURLConnection; 37 import java.net.URL; 38 import java.net.URLConnection; 39 import java.net.URISyntaxException; 40 import java.util.Hashtable; 41 import java.util.Iterator; 42 import java.util.Locale; 43 import java.util.Map; 44 import java.util.Stack; 45 46 47 import com.sun.org.apache.xerces.internal.impl.io.*; 48 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 49 import com.sun.org.apache.xerces.internal.util.*; 50 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 51 import com.sun.org.apache.xerces.internal.xni.XNIException; 52 import com.sun.org.apache.xerces.internal.xni.parser.*; 53 import com.sun.org.apache.xerces.internal.impl.Constants; 54 import com.sun.xml.internal.stream.Entity; 55 import com.sun.org.apache.xerces.internal.xni.Augmentations; 56 57 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 58 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 59 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 60 import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 61 import com.sun.org.apache.xerces.internal.util.HTTPInputSource; 62 import com.sun.org.apache.xerces.internal.xinclude.XIncludeHandler; 63 64 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 65 import com.sun.org.apache.xerces.internal.util.SecurityManager; 66 import com.sun.org.apache.xerces.internal.util.URI; 67 68 69 /** 70 * Will keep track of current entity. 71 * 72 * The entity manager handles the registration of general and parameter 73 * entities; resolves entities; and starts entities. The entity manager 74 * is a central component in a standard parser configuration and this 75 * class works directly with the entity scanner to manage the underlying 76 * xni. 77 * <p> 78 * This component requires the following features and properties from the 79 * component manager that uses it: 80 * <ul> 81 * <li>http://xml.org/sax/features/validation</li> 82 * <li>http://xml.org/sax/features/external-general-entities</li> 83 * <li>http://xml.org/sax/features/external-parameter-entities</li> 84 * <li>http://apache.org/xml/features/allow-java-encodings</li> 85 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 86 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 87 * <li>http://apache.org/xml/properties/internal/entity-resolver</li> 88 * </ul> 89 * 90 * 91 * @author Andy Clark, IBM 92 * @author Arnaud Le Hors, IBM 93 * @author K.Venugopal SUN Microsystems 94 * @author Neeraj Bajaj SUN Microsystems 95 * @author Sunitha Reddy SUN Microsystems 96 * @version $Id: XMLEntityManager.java,v 1.17 2010-11-01 04:39:41 joehw Exp $ 97 */ 98 public class XMLEntityManager implements XMLComponent, XMLEntityResolver { 99 100 // 101 // Constants 102 // 103 104 /** Default buffer size (2048). */ 105 public static final int DEFAULT_BUFFER_SIZE = 8192; 106 107 /** Default buffer size before we've finished with the XMLDecl: */ 108 public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; 109 110 /** Default internal entity buffer size (1024). */ 111 public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 1024; 112 113 // feature identifiers 114 115 /** Feature identifier: validation. */ 116 protected static final String VALIDATION = 117 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 118 119 /** 120 * standard uri conformant (strict uri). 121 * http://apache.org/xml/features/standard-uri-conformant 122 */ 123 protected boolean fStrictURI; 124 125 126 /** Feature identifier: external general entities. */ 127 protected static final String EXTERNAL_GENERAL_ENTITIES = 128 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE; 129 130 /** Feature identifier: external parameter entities. */ 131 protected static final String EXTERNAL_PARAMETER_ENTITIES = 132 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE; 133 134 /** Feature identifier: allow Java encodings. */ 135 protected static final String ALLOW_JAVA_ENCODINGS = 136 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 137 138 /** Feature identifier: warn on duplicate EntityDef */ 139 protected static final String WARN_ON_DUPLICATE_ENTITYDEF = 140 Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE; 141 142 // property identifiers 143 144 /** Property identifier: symbol table. */ 145 protected static final String SYMBOL_TABLE = 146 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 147 148 /** Property identifier: error reporter. */ 149 protected static final String ERROR_REPORTER = 150 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 151 152 /** Feature identifier: standard uri conformant */ 153 protected static final String STANDARD_URI_CONFORMANT = 154 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 155 156 /** Property identifier: entity resolver. */ 157 protected static final String ENTITY_RESOLVER = 158 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 159 160 protected static final String STAX_ENTITY_RESOLVER = 161 Constants.XERCES_PROPERTY_PREFIX + Constants.STAX_ENTITY_RESOLVER_PROPERTY; 162 163 // property identifier: ValidationManager 164 protected static final String VALIDATION_MANAGER = 165 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 166 167 /** property identifier: buffer size. */ 168 protected static final String BUFFER_SIZE = 169 Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY; 170 171 /** property identifier: security manager. */ 172 protected static final String SECURITY_MANAGER = 173 Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY; 174 175 protected static final String PARSER_SETTINGS = 176 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 177 // recognized features and properties 178 179 /** Recognized features. */ 180 private static final String[] RECOGNIZED_FEATURES = { 181 VALIDATION, 182 EXTERNAL_GENERAL_ENTITIES, 183 EXTERNAL_PARAMETER_ENTITIES, 184 ALLOW_JAVA_ENCODINGS, 185 WARN_ON_DUPLICATE_ENTITYDEF, 186 STANDARD_URI_CONFORMANT 187 }; 188 189 /** Feature defaults. */ 190 private static final Boolean[] FEATURE_DEFAULTS = { 191 null, 192 Boolean.TRUE, 193 Boolean.TRUE, 194 Boolean.TRUE, 195 Boolean.FALSE, 196 Boolean.FALSE 197 }; 198 199 /** Recognized properties. */ 200 private static final String[] RECOGNIZED_PROPERTIES = { 201 SYMBOL_TABLE, 202 ERROR_REPORTER, 203 ENTITY_RESOLVER, 204 VALIDATION_MANAGER, 205 BUFFER_SIZE, 206 SECURITY_MANAGER, 207 208 }; 209 210 /** Property defaults. */ 211 private static final Object[] PROPERTY_DEFAULTS = { 212 null, 213 null, 214 null, 215 null, 216 new Integer(DEFAULT_BUFFER_SIZE), 217 null 218 }; 219 220 private static final String XMLEntity = "[xml]".intern(); 221 private static final String DTDEntity = "[dtd]".intern(); 222 223 // debugging 224 225 /** 226 * Debug printing of buffer. This debugging flag works best when you 227 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 228 * 64 characters. 229 */ 230 private static final boolean DEBUG_BUFFER = false; 231 232 /** warn on duplicate Entity declaration. 233 * http://apache.org/xml/features/warn-on-duplicate-entitydef 234 */ 235 protected boolean fWarnDuplicateEntityDef; 236 237 /** Debug some basic entities. */ 238 private static final boolean DEBUG_ENTITIES = false; 239 240 /** Debug switching readers for encodings. */ 241 private static final boolean DEBUG_ENCODINGS = false; 242 243 // should be diplayed trace resolving messages 244 private static final boolean DEBUG_RESOLVER = false ; 245 246 // 247 // Data 248 // 249 250 // features 251 252 /** 253 * Validation. This feature identifier is: 254 * http://xml.org/sax/features/validation 255 */ 256 protected boolean fValidation; 257 258 /** 259 * External general entities. This feature identifier is: 260 * http://xml.org/sax/features/external-general-entities 261 */ 262 protected boolean fExternalGeneralEntities; 263 264 /** 265 * External parameter entities. This feature identifier is: 266 * http://xml.org/sax/features/external-parameter-entities 267 */ 268 protected boolean fExternalParameterEntities; 269 270 /** 271 * Allow Java encoding names. This feature identifier is: 272 * http://apache.org/xml/features/allow-java-encodings 273 */ 274 protected boolean fAllowJavaEncodings = true ; 275 276 277 // properties 278 279 /** 280 * Symbol table. This property identifier is: 281 * http://apache.org/xml/properties/internal/symbol-table 282 */ 283 protected SymbolTable fSymbolTable; 284 285 /** 286 * Error reporter. This property identifier is: 287 * http://apache.org/xml/properties/internal/error-reporter 288 */ 289 protected XMLErrorReporter fErrorReporter; 290 291 /** 292 * Entity resolver. This property identifier is: 293 * http://apache.org/xml/properties/internal/entity-resolver 294 */ 295 protected XMLEntityResolver fEntityResolver; 296 297 /** Stax Entity Resolver. This property identifier is XMLInputFactory.ENTITY_RESOLVER */ 298 299 protected StaxEntityResolverWrapper fStaxEntityResolver; 300 301 /** Property Manager. This is used from Stax */ 302 protected PropertyManager fPropertyManager ; 303 304 305 // settings 306 307 /** 308 * Validation manager. This property identifier is: 309 * http://apache.org/xml/properties/internal/validation-manager 310 */ 311 protected ValidationManager fValidationManager; 312 313 // settings 314 315 /** 316 * Buffer size. We get this value from a property. The default size 317 * is used if the input buffer size property is not specified. 318 * REVISIT: do we need a property for internal entity buffer size? 319 */ 320 protected int fBufferSize = DEFAULT_BUFFER_SIZE; 321 322 // stores defaults for entity expansion limit if it has 323 // been set on the configuration. 324 protected SecurityManager fSecurityManager = null; 325 326 /** 327 * True if the document entity is standalone. This should really 328 * only be set by the document source (e.g. XMLDocumentScanner). 329 */ 330 protected boolean fStandalone; 331 332 // are the entities being parsed in the external subset? 333 // NOTE: this *is not* the same as whether they're external entities! 334 protected boolean fInExternalSubset = false; 335 336 337 // handlers 338 /** Entity handler. */ 339 protected XMLEntityHandler fEntityHandler; 340 341 /** Current entity scanner */ 342 protected XMLEntityScanner fEntityScanner ; 343 344 /** XML 1.0 entity scanner. */ 345 protected XMLEntityScanner fXML10EntityScanner; 346 347 /** XML 1.1 entity scanner. */ 348 protected XMLEntityScanner fXML11EntityScanner; 349 350 /** entity expansion limit (contains useful data if and only if 351 fSecurityManager is non-null) */ 352 protected int fEntityExpansionLimit = 0; 353 354 /** count of entities expanded: */ 355 protected int fEntityExpansionCount = 0; 356 357 // entities 358 359 /** Entities. */ 360 protected Hashtable fEntities = new Hashtable(); 361 362 /** Entity stack. */ 363 protected Stack fEntityStack = new Stack(); 364 365 /** Current entity. */ 366 protected Entity.ScannedEntity fCurrentEntity = null; 367 368 // shared context 369 370 protected XMLEntityStorage fEntityStorage ; 371 372 protected final Object [] defaultEncoding = new Object[]{"UTF-8", null}; 373 374 375 // temp vars 376 377 /** Resource identifer. */ 378 private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 379 380 /** Augmentations for entities. */ 381 private final Augmentations fEntityAugs = new AugmentationsImpl(); 382 383 /** Pool of character buffers. */ 384 private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); 385 386 // 387 // Constructors 388 // 389 390 /** 391 * If this constructor is used to create the object, reset() should be invoked on this object 392 */ 393 public XMLEntityManager() { 394 fEntityStorage = new XMLEntityStorage(this) ; 395 setScannerVersion(Constants.XML_VERSION_1_0); 396 } // <init>() 397 398 /** Default constructor. */ 399 public XMLEntityManager(PropertyManager propertyManager) { 400 fPropertyManager = propertyManager ; 401 //pass a reference to current entity being scanned 402 //fEntityStorage = new XMLEntityStorage(fCurrentEntity) ; 403 fEntityStorage = new XMLEntityStorage(this) ; 404 fEntityScanner = new XMLEntityScanner(propertyManager, this) ; 405 reset(propertyManager); 406 } // <init>() 407 408 /** 409 * Adds an internal entity declaration. 410 * <p> 411 * <strong>Note:</strong> This method ignores subsequent entity 412 * declarations. 413 * <p> 414 * <strong>Note:</strong> The name should be a unique symbol. The 415 * SymbolTable can be used for this purpose. 416 * 417 * @param name The name of the entity. 418 * @param text The text of the entity. 419 * 420 * @see SymbolTable 421 */ 422 public void addInternalEntity(String name, String text) { 423 if (!fEntities.containsKey(name)) { 424 Entity entity = new Entity.InternalEntity(name, text, fInExternalSubset); 425 fEntities.put(name, entity); 426 } else{ 427 if(fWarnDuplicateEntityDef){ 428 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 429 "MSG_DUPLICATE_ENTITY_DEFINITION", 430 new Object[]{ name }, 431 XMLErrorReporter.SEVERITY_WARNING ); 432 } 433 } 434 435 } // addInternalEntity(String,String) 436 437 /** 438 * Adds an external entity declaration. 439 * <p> 440 * <strong>Note:</strong> This method ignores subsequent entity 441 * declarations. 442 * <p> 443 * <strong>Note:</strong> The name should be a unique symbol. The 444 * SymbolTable can be used for this purpose. 445 * 446 * @param name The name of the entity. 447 * @param publicId The public identifier of the entity. 448 * @param literalSystemId The system identifier of the entity. 449 * @param baseSystemId The base system identifier of the entity. 450 * This is the system identifier of the entity 451 * where <em>the entity being added</em> and 452 * is used to expand the system identifier when 453 * the system identifier is a relative URI. 454 * When null the system identifier of the first 455 * external entity on the stack is used instead. 456 * 457 * @see SymbolTable 458 */ 459 public void addExternalEntity(String name, 460 String publicId, String literalSystemId, 461 String baseSystemId) throws IOException { 462 if (!fEntities.containsKey(name)) { 463 if (baseSystemId == null) { 464 // search for the first external entity on the stack 465 int size = fEntityStack.size(); 466 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 467 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 468 } 469 for (int i = size - 1; i >= 0 ; i--) { 470 Entity.ScannedEntity externalEntity = 471 (Entity.ScannedEntity)fEntityStack.elementAt(i); 472 if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) { 473 baseSystemId = externalEntity.entityLocation.getExpandedSystemId(); 474 break; 475 } 476 } 477 } 478 Entity entity = new Entity.ExternalEntity(name, 479 new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId, 480 expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset); 481 fEntities.put(name, entity); 482 } else{ 483 if(fWarnDuplicateEntityDef){ 484 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 485 "MSG_DUPLICATE_ENTITY_DEFINITION", 486 new Object[]{ name }, 487 XMLErrorReporter.SEVERITY_WARNING ); 488 } 489 } 490 491 } // addExternalEntity(String,String,String,String) 492 493 494 /** 495 * Adds an unparsed entity declaration. 496 * <p> 497 * <strong>Note:</strong> This method ignores subsequent entity 498 * declarations. 499 * <p> 500 * <strong>Note:</strong> The name should be a unique symbol. The 501 * SymbolTable can be used for this purpose. 502 * 503 * @param name The name of the entity. 504 * @param publicId The public identifier of the entity. 505 * @param systemId The system identifier of the entity. 506 * @param notation The name of the notation. 507 * 508 * @see SymbolTable 509 */ 510 public void addUnparsedEntity(String name, 511 String publicId, String systemId, 512 String baseSystemId, String notation) { 513 if (!fEntities.containsKey(name)) { 514 Entity.ExternalEntity entity = new Entity.ExternalEntity(name, 515 new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null), 516 notation, fInExternalSubset); 517 fEntities.put(name, entity); 518 } else{ 519 if(fWarnDuplicateEntityDef){ 520 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 521 "MSG_DUPLICATE_ENTITY_DEFINITION", 522 new Object[]{ name }, 523 XMLErrorReporter.SEVERITY_WARNING ); 524 } 525 } 526 } // addUnparsedEntity(String,String,String,String) 527 528 529 /** get the entity storage object from entity manager */ 530 public XMLEntityStorage getEntityStore(){ 531 return fEntityStorage ; 532 } 533 534 /** return the entity responsible for reading the entity */ 535 public XMLEntityScanner getEntityScanner(){ 536 if(fEntityScanner == null) { 537 // default to 1.0 538 if(fXML10EntityScanner == null) { 539 fXML10EntityScanner = new XMLEntityScanner(); 540 } 541 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 542 fEntityScanner = fXML10EntityScanner; 543 } 544 return fEntityScanner; 545 546 } 547 548 public void setScannerVersion(short version) { 549 550 if(version == Constants.XML_VERSION_1_0) { 551 if(fXML10EntityScanner == null) { 552 fXML10EntityScanner = new XMLEntityScanner(); 553 } 554 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 555 fEntityScanner = fXML10EntityScanner; 556 fEntityScanner.setCurrentEntity(fCurrentEntity); 557 } else { 558 if(fXML11EntityScanner == null) { 559 fXML11EntityScanner = new XML11EntityScanner(); 560 } 561 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 562 fEntityScanner = fXML11EntityScanner; 563 fEntityScanner.setCurrentEntity(fCurrentEntity); 564 } 565 566 } 567 568 /** 569 * This method uses the passed-in XMLInputSource to make 570 * fCurrentEntity usable for reading. 571 * @param name name of the entity (XML is it's the document entity) 572 * @param xmlInputSource the input source, with sufficient information 573 * to begin scanning characters. 574 * @param literal True if this entity is started within a 575 * literal value. 576 * @param isExternal whether this entity should be treated as an internal or external entity. 577 * @throws IOException if anything can't be read 578 * XNIException If any parser-specific goes wrong. 579 * @return the encoding of the new entity or null if a character stream was employed 580 */ 581 public String setupCurrentEntity(String name, XMLInputSource xmlInputSource, 582 boolean literal, boolean isExternal) 583 throws IOException, XNIException { 584 // get information 585 586 final String publicId = xmlInputSource.getPublicId(); 587 String literalSystemId = xmlInputSource.getSystemId(); 588 String baseSystemId = xmlInputSource.getBaseSystemId(); 589 String encoding = xmlInputSource.getEncoding(); 590 final boolean encodingExternallySpecified = (encoding != null); 591 Boolean isBigEndian = null; 592 593 // create reader 594 InputStream stream = null; 595 Reader reader = xmlInputSource.getCharacterStream(); 596 597 // First chance checking strict URI 598 String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI); 599 if (baseSystemId == null) { 600 baseSystemId = expandedSystemId; 601 } 602 if (reader == null) { 603 stream = xmlInputSource.getByteStream(); 604 if (stream == null) { 605 URL location = new URL(escapeNonUSAscii(expandedSystemId)); 606 URLConnection connect = location.openConnection(); 607 if (!(connect instanceof HttpURLConnection)) { 608 stream = connect.getInputStream(); 609 } 610 else { 611 boolean followRedirects = true; 612 613 // setup URLConnection if we have an HTTPInputSource 614 if (xmlInputSource instanceof HTTPInputSource) { 615 final HttpURLConnection urlConnection = (HttpURLConnection) connect; 616 final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource; 617 618 // set request properties 619 Iterator propIter = httpInputSource.getHTTPRequestProperties(); 620 while (propIter.hasNext()) { 621 Map.Entry entry = (Map.Entry) propIter.next(); 622 urlConnection.setRequestProperty((String) entry.getKey(), (String) entry.getValue()); 623 } 624 625 // set preference for redirection 626 followRedirects = httpInputSource.getFollowHTTPRedirects(); 627 if (!followRedirects) { 628 setInstanceFollowRedirects(urlConnection, followRedirects); 629 } 630 } 631 632 stream = connect.getInputStream(); 633 634 // REVISIT: If the URLConnection has external encoding 635 // information, we should be reading it here. It's located 636 // in the charset parameter of Content-Type. -- mrglavas 637 638 if (followRedirects) { 639 String redirect = connect.getURL().toString(); 640 // E43: Check if the URL was redirected, and then 641 // update literal and expanded system IDs if needed. 642 if (!redirect.equals(expandedSystemId)) { 643 literalSystemId = redirect; 644 expandedSystemId = redirect; 645 } 646 } 647 } 648 } 649 650 // wrap this stream in RewindableInputStream 651 stream = new RewindableInputStream(stream); 652 653 // perform auto-detect of encoding if necessary 654 if (encoding == null) { 655 // read first four bytes and determine encoding 656 final byte[] b4 = new byte[4]; 657 int count = 0; 658 for (; count<4; count++ ) { 659 b4[count] = (byte)stream.read(); 660 } 661 if (count == 4) { 662 Object [] encodingDesc = getEncodingName(b4, count); 663 encoding = (String)(encodingDesc[0]); 664 isBigEndian = (Boolean)(encodingDesc[1]); 665 666 stream.reset(); 667 // Special case UTF-8 files with BOM created by Microsoft 668 // tools. It's more efficient to consume the BOM than make 669 // the reader perform extra checks. -Ac 670 if (count > 2 && encoding.equals("UTF-8")) { 671 int b0 = b4[0] & 0xFF; 672 int b1 = b4[1] & 0xFF; 673 int b2 = b4[2] & 0xFF; 674 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 675 // ignore first three bytes... 676 stream.skip(3); 677 } 678 } 679 reader = createReader(stream, encoding, isBigEndian); 680 } else { 681 reader = createReader(stream, encoding, isBigEndian); 682 } 683 } 684 685 // use specified encoding 686 else { 687 encoding = encoding.toUpperCase(Locale.ENGLISH); 688 689 // If encoding is UTF-8, consume BOM if one is present. 690 if (encoding.equals("UTF-8")) { 691 final int[] b3 = new int[3]; 692 int count = 0; 693 for (; count < 3; ++count) { 694 b3[count] = stream.read(); 695 if (b3[count] == -1) 696 break; 697 } 698 if (count == 3) { 699 if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { 700 // First three bytes are not BOM, so reset. 701 stream.reset(); 702 } 703 } else { 704 stream.reset(); 705 } 706 } 707 // If encoding is UTF-16, we still need to read the first four bytes 708 // in order to discover the byte order. 709 else if (encoding.equals("UTF-16")) { 710 final int[] b4 = new int[4]; 711 int count = 0; 712 for (; count < 4; ++count) { 713 b4[count] = stream.read(); 714 if (b4[count] == -1) 715 break; 716 } 717 stream.reset(); 718 719 String utf16Encoding = "UTF-16"; 720 if (count >= 2) { 721 final int b0 = b4[0]; 722 final int b1 = b4[1]; 723 if (b0 == 0xFE && b1 == 0xFF) { 724 // UTF-16, big-endian 725 utf16Encoding = "UTF-16BE"; 726 isBigEndian = Boolean.TRUE; 727 } 728 else if (b0 == 0xFF && b1 == 0xFE) { 729 // UTF-16, little-endian 730 utf16Encoding = "UTF-16LE"; 731 isBigEndian = Boolean.FALSE; 732 } 733 else if (count == 4) { 734 final int b2 = b4[2]; 735 final int b3 = b4[3]; 736 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 737 // UTF-16, big-endian, no BOM 738 utf16Encoding = "UTF-16BE"; 739 isBigEndian = Boolean.TRUE; 740 } 741 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 742 // UTF-16, little-endian, no BOM 743 utf16Encoding = "UTF-16LE"; 744 isBigEndian = Boolean.FALSE; 745 } 746 } 747 } 748 reader = createReader(stream, utf16Encoding, isBigEndian); 749 } 750 // If encoding is UCS-4, we still need to read the first four bytes 751 // in order to discover the byte order. 752 else if (encoding.equals("ISO-10646-UCS-4")) { 753 final int[] b4 = new int[4]; 754 int count = 0; 755 for (; count < 4; ++count) { 756 b4[count] = stream.read(); 757 if (b4[count] == -1) 758 break; 759 } 760 stream.reset(); 761 762 // Ignore unusual octet order for now. 763 if (count == 4) { 764 // UCS-4, big endian (1234) 765 if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) { 766 isBigEndian = Boolean.TRUE; 767 } 768 // UCS-4, little endian (1234) 769 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) { 770 isBigEndian = Boolean.FALSE; 771 } 772 } 773 } 774 // If encoding is UCS-2, we still need to read the first four bytes 775 // in order to discover the byte order. 776 else if (encoding.equals("ISO-10646-UCS-2")) { 777 final int[] b4 = new int[4]; 778 int count = 0; 779 for (; count < 4; ++count) { 780 b4[count] = stream.read(); 781 if (b4[count] == -1) 782 break; 783 } 784 stream.reset(); 785 786 if (count == 4) { 787 // UCS-2, big endian 788 if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) { 789 isBigEndian = Boolean.TRUE; 790 } 791 // UCS-2, little endian 792 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) { 793 isBigEndian = Boolean.FALSE; 794 } 795 } 796 } 797 798 reader = createReader(stream, encoding, isBigEndian); 799 } 800 801 // read one character at a time so we don't jump too far 802 // ahead, converting characters from the byte stream in 803 // the wrong encoding 804 if (DEBUG_ENCODINGS) { 805 System.out.println("$$$ no longer wrapping reader in OneCharReader"); 806 } 807 //reader = new OneCharReader(reader); 808 } 809 810 // We've seen a new Reader. 811 // Push it on the stack so we can close it later. 812 //fOwnReaders.add(reader); 813 814 // push entity on stack 815 if (fCurrentEntity != null) { 816 fEntityStack.push(fCurrentEntity); 817 } 818 819 // create entity 820 /* if encoding is specified externally, 'encoding' information present 821 * in the prolog of the XML document is not considered. Hence, prolog can 822 * be read in Chunks of data instead of byte by byte. 823 */ 824 fCurrentEntity = new com.sun.xml.internal.stream.Entity.ScannedEntity(name,new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId),stream, reader, encoding, literal, encodingExternallySpecified, isExternal); 825 fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified); 826 fEntityScanner.setCurrentEntity(fCurrentEntity); 827 fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 828 return encoding; 829 } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String 830 831 832 /** 833 * Checks whether an entity given by name is external. 834 * 835 * @param entityName The name of the entity to check. 836 * @return True if the entity is external, false otherwise 837 * (including when the entity is not declared). 838 */ 839 public boolean isExternalEntity(String entityName) { 840 841 Entity entity = (Entity)fEntities.get(entityName); 842 if (entity == null) { 843 return false; 844 } 845 return entity.isExternal(); 846 } 847 848 /** 849 * Checks whether the declaration of an entity given by name is 850 * // in the external subset. 851 * 852 * @param entityName The name of the entity to check. 853 * @return True if the entity was declared in the external subset, false otherwise 854 * (including when the entity is not declared). 855 */ 856 public boolean isEntityDeclInExternalSubset(String entityName) { 857 858 Entity entity = (Entity)fEntities.get(entityName); 859 if (entity == null) { 860 return false; 861 } 862 return entity.isEntityDeclInExternalSubset(); 863 } 864 865 866 867 // 868 // Public methods 869 // 870 871 /** 872 * Sets whether the document entity is standalone. 873 * 874 * @param standalone True if document entity is standalone. 875 */ 876 public void setStandalone(boolean standalone) { 877 fStandalone = standalone; 878 } 879 // setStandalone(boolean) 880 881 /** Returns true if the document entity is standalone. */ 882 public boolean isStandalone() { 883 return fStandalone; 884 } //isStandalone():boolean 885 886 public boolean isDeclaredEntity(String entityName) { 887 888 Entity entity = (Entity)fEntities.get(entityName); 889 return entity != null; 890 } 891 892 public boolean isUnparsedEntity(String entityName) { 893 894 Entity entity = (Entity)fEntities.get(entityName); 895 if (entity == null) { 896 return false; 897 } 898 return entity.isUnparsed(); 899 } 900 901 902 903 // this simply returns the fResourceIdentifier object; 904 // this should only be used with caution by callers that 905 // carefully manage the entity manager's behaviour, so that 906 // this doesn't returning meaningless or misleading data. 907 // @return a reference to the current fResourceIdentifier object 908 public XMLResourceIdentifier getCurrentResourceIdentifier() { 909 return fResourceIdentifier; 910 } 911 912 /** 913 * Sets the entity handler. When an entity starts and ends, the 914 * entity handler is notified of the change. 915 * 916 * @param entityHandler The new entity handler. 917 */ 918 919 public void setEntityHandler(com.sun.org.apache.xerces.internal.impl.XMLEntityHandler entityHandler) { 920 fEntityHandler = (XMLEntityHandler) entityHandler; 921 } // setEntityHandler(XMLEntityHandler) 922 923 //this function returns StaxXMLInputSource 924 public StaxXMLInputSource resolveEntityAsPerStax(XMLResourceIdentifier resourceIdentifier) throws java.io.IOException{ 925 926 if(resourceIdentifier == null ) return null; 927 928 String publicId = resourceIdentifier.getPublicId(); 929 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 930 String baseSystemId = resourceIdentifier.getBaseSystemId(); 931 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 932 // if no base systemId given, assume that it's relative 933 // to the systemId of the current scanned entity 934 // Sometimes the system id is not (properly) expanded. 935 // We need to expand the system id if: 936 // a. the expanded one was null; or 937 // b. the base system id was null, but becomes non-null from the current entity. 938 boolean needExpand = (expandedSystemId == null); 939 // REVISIT: why would the baseSystemId ever be null? if we 940 // didn't have to make this check we wouldn't have to reuse the 941 // fXMLResourceIdentifier object... 942 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 943 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 944 if (baseSystemId != null) 945 needExpand = true; 946 } 947 if (needExpand) 948 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 949 950 // give the entity resolver a chance 951 StaxXMLInputSource staxInputSource = null; 952 XMLInputSource xmlInputSource = null; 953 954 XMLResourceIdentifierImpl ri = null; 955 956 if (resourceIdentifier instanceof XMLResourceIdentifierImpl) { 957 ri = (XMLResourceIdentifierImpl)resourceIdentifier; 958 } else { 959 fResourceIdentifier.clear(); 960 ri = fResourceIdentifier; 961 } 962 ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 963 if(DEBUG_RESOLVER){ 964 System.out.println("BEFORE Calling resolveEntity") ; 965 } 966 967 //either of Stax or Xerces would be null 968 if(fStaxEntityResolver != null){ 969 staxInputSource = fStaxEntityResolver.resolveEntity(ri); 970 } 971 972 if(fEntityResolver != null){ 973 xmlInputSource = fEntityResolver.resolveEntity(ri); 974 } 975 976 if(xmlInputSource != null){ 977 //wrap this XMLInputSource to StaxInputSource 978 staxInputSource = new StaxXMLInputSource(xmlInputSource); 979 } 980 981 // do default resolution 982 //this works for both stax & Xerces, if staxInputSource is null, it means parser need to revert to default resolution 983 if (staxInputSource == null) { 984 // REVISIT: when systemId is null, I think we should return null. 985 // is this the right solution? -SG 986 //if (systemId != null) 987 staxInputSource = new StaxXMLInputSource(new XMLInputSource(publicId, literalSystemId, baseSystemId)); 988 }else if(staxInputSource.hasXMLStreamOrXMLEventReader()){ 989 //Waiting for the clarification from EG. - nb 990 } 991 992 if (DEBUG_RESOLVER) { 993 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 994 System.err.println(" = " + xmlInputSource); 995 } 996 997 return staxInputSource; 998 999 } 1000 1001 /** 1002 * Resolves the specified public and system identifiers. This 1003 * method first attempts to resolve the entity based on the 1004 * EntityResolver registered by the application. If no entity 1005 * resolver is registered or if the registered entity handler 1006 * is unable to resolve the entity, then default entity 1007 * resolution will occur. 1008 * 1009 * @param publicId The public identifier of the entity. 1010 * @param systemId The system identifier of the entity. 1011 * @param baseSystemId The base system identifier of the entity. 1012 * This is the system identifier of the current 1013 * entity and is used to expand the system 1014 * identifier when the system identifier is a 1015 * relative URI. 1016 * 1017 * @return Returns an input source that wraps the resolved entity. 1018 * This method will never return null. 1019 * 1020 * @throws IOException Thrown on i/o error. 1021 * @throws XNIException Thrown by entity resolver to signal an error. 1022 */ 1023 public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier) throws IOException, XNIException { 1024 if(resourceIdentifier == null ) return null; 1025 String publicId = resourceIdentifier.getPublicId(); 1026 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 1027 String baseSystemId = resourceIdentifier.getBaseSystemId(); 1028 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 1029 String namespace = resourceIdentifier.getNamespace(); 1030 1031 // if no base systemId given, assume that it's relative 1032 // to the systemId of the current scanned entity 1033 // Sometimes the system id is not (properly) expanded. 1034 // We need to expand the system id if: 1035 // a. the expanded one was null; or 1036 // b. the base system id was null, but becomes non-null from the current entity. 1037 boolean needExpand = (expandedSystemId == null); 1038 // REVISIT: why would the baseSystemId ever be null? if we 1039 // didn't have to make this check we wouldn't have to reuse the 1040 // fXMLResourceIdentifier object... 1041 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 1042 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 1043 if (baseSystemId != null) 1044 needExpand = true; 1045 } 1046 if (needExpand) 1047 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 1048 1049 // give the entity resolver a chance 1050 XMLInputSource xmlInputSource = null; 1051 1052 if (fEntityResolver != null) { 1053 resourceIdentifier.setBaseSystemId(baseSystemId); 1054 resourceIdentifier.setExpandedSystemId(expandedSystemId); 1055 xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier); 1056 } 1057 1058 // do default resolution 1059 // REVISIT: what's the correct behavior if the user provided an entity 1060 // resolver (fEntityResolver != null), but resolveEntity doesn't return 1061 // an input source (xmlInputSource == null)? 1062 // do we do default resolution, or do we just return null? -SG 1063 if (xmlInputSource == null) { 1064 // REVISIT: when systemId is null, I think we should return null. 1065 // is this the right solution? -SG 1066 //if (systemId != null) 1067 xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId); 1068 } 1069 1070 if (DEBUG_RESOLVER) { 1071 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1072 System.err.println(" = " + xmlInputSource); 1073 } 1074 1075 return xmlInputSource; 1076 1077 } // resolveEntity(XMLResourceIdentifier):XMLInputSource 1078 1079 /** 1080 * Starts a named entity. 1081 * 1082 * @param entityName The name of the entity to start. 1083 * @param literal True if this entity is started within a literal 1084 * value. 1085 * 1086 * @throws IOException Thrown on i/o error. 1087 * @throws XNIException Thrown by entity handler to signal an error. 1088 */ 1089 public void startEntity(String entityName, boolean literal) 1090 throws IOException, XNIException { 1091 1092 // was entity declared? 1093 Entity entity = (Entity)fEntityStorage.getEntity(entityName); 1094 if (entity == null) { 1095 if (fEntityHandler != null) { 1096 String encoding = null; 1097 fResourceIdentifier.clear(); 1098 fEntityAugs.removeAllItems(); 1099 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1100 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1101 fEntityAugs.removeAllItems(); 1102 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1103 fEntityHandler.endEntity(entityName, fEntityAugs); 1104 } 1105 return; 1106 } 1107 1108 // should we skip external entities? 1109 boolean external = entity.isExternal(); 1110 if (external) { 1111 boolean unparsed = entity.isUnparsed(); 1112 boolean parameter = entityName.startsWith("%"); 1113 boolean general = !parameter; 1114 if (unparsed || (general && !fExternalGeneralEntities) || 1115 (parameter && !fExternalParameterEntities)) { 1116 1117 if (fEntityHandler != null) { 1118 fResourceIdentifier.clear(); 1119 final String encoding = null; 1120 Entity.ExternalEntity externalEntity = (Entity.ExternalEntity)entity; 1121 //REVISIT: since we're storing expandedSystemId in the 1122 // externalEntity, how could this have got here if it wasn't already 1123 // expanded??? - neilg 1124 String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1125 String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1126 String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1127 fResourceIdentifier.setValues( 1128 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1129 extLitSysId, extBaseSysId, expandedSystemId); 1130 fEntityAugs.removeAllItems(); 1131 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1132 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1133 fEntityAugs.removeAllItems(); 1134 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1135 fEntityHandler.endEntity(entityName, fEntityAugs); 1136 } 1137 return; 1138 } 1139 } 1140 1141 // is entity recursive? 1142 int size = fEntityStack.size(); 1143 for (int i = size; i >= 0; i--) { 1144 Entity activeEntity = i == size 1145 ? fCurrentEntity 1146 : (Entity)fEntityStack.elementAt(i); 1147 if (activeEntity.name == entityName) { 1148 String path = entityName; 1149 for (int j = i + 1; j < size; j++) { 1150 activeEntity = (Entity)fEntityStack.elementAt(j); 1151 path = path + " -> " + activeEntity.name; 1152 } 1153 path = path + " -> " + fCurrentEntity.name; 1154 path = path + " -> " + entityName; 1155 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1156 "RecursiveReference", 1157 new Object[] { entityName, path }, 1158 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1159 1160 if (fEntityHandler != null) { 1161 fResourceIdentifier.clear(); 1162 final String encoding = null; 1163 if (external) { 1164 Entity.ExternalEntity externalEntity = (Entity.ExternalEntity)entity; 1165 // REVISIT: for the same reason above... 1166 String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1167 String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1168 String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1169 fResourceIdentifier.setValues( 1170 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1171 extLitSysId, extBaseSysId, expandedSystemId); 1172 } 1173 fEntityAugs.removeAllItems(); 1174 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1175 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1176 fEntityAugs.removeAllItems(); 1177 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1178 fEntityHandler.endEntity(entityName, fEntityAugs); 1179 } 1180 1181 return; 1182 } 1183 } 1184 1185 // resolve external entity 1186 StaxXMLInputSource staxInputSource = null; 1187 XMLInputSource xmlInputSource = null ; 1188 1189 if (external) { 1190 Entity.ExternalEntity externalEntity = (Entity.ExternalEntity)entity; 1191 staxInputSource = resolveEntityAsPerStax(externalEntity.entityLocation); 1192 /** xxx: Waiting from the EG 1193 * //simply return if there was entity resolver registered and application 1194 * //returns either XMLStreamReader or XMLEventReader. 1195 * if(staxInputSource.hasXMLStreamOrXMLEventReader()) return ; 1196 */ 1197 xmlInputSource = staxInputSource.getXMLInputSource() ; 1198 } 1199 // wrap internal entity 1200 else { 1201 Entity.InternalEntity internalEntity = (Entity.InternalEntity)entity; 1202 Reader reader = new StringReader(internalEntity.text); 1203 xmlInputSource = new XMLInputSource(null, null, null, reader, null); 1204 } 1205 1206 // start the entity 1207 startEntity(entityName, xmlInputSource, literal, external); 1208 1209 } // startEntity(String,boolean) 1210 1211 /** 1212 * Starts the document entity. The document entity has the "[xml]" 1213 * pseudo-name. 1214 * 1215 * @param xmlInputSource The input source of the document entity. 1216 * 1217 * @throws IOException Thrown on i/o error. 1218 * @throws XNIException Thrown by entity handler to signal an error. 1219 */ 1220 public void startDocumentEntity(XMLInputSource xmlInputSource) 1221 throws IOException, XNIException { 1222 startEntity(XMLEntity, xmlInputSource, false, true); 1223 } // startDocumentEntity(XMLInputSource) 1224 1225 //xxx these methods are not required. 1226 /** 1227 * Starts the DTD entity. The DTD entity has the "[dtd]" 1228 * pseudo-name. 1229 * 1230 * @param xmlInputSource The input source of the DTD entity. 1231 * 1232 * @throws IOException Thrown on i/o error. 1233 * @throws XNIException Thrown by entity handler to signal an error. 1234 */ 1235 public void startDTDEntity(XMLInputSource xmlInputSource) 1236 throws IOException, XNIException { 1237 startEntity(DTDEntity, xmlInputSource, false, true); 1238 } // startDTDEntity(XMLInputSource) 1239 1240 // indicate start of external subset so that 1241 // location of entity decls can be tracked 1242 public void startExternalSubset() { 1243 fInExternalSubset = true; 1244 } 1245 1246 public void endExternalSubset() { 1247 fInExternalSubset = false; 1248 } 1249 1250 /** 1251 * Starts an entity. 1252 * <p> 1253 * This method can be used to insert an application defined XML 1254 * entity stream into the parsing stream. 1255 * 1256 * @param name The name of the entity. 1257 * @param xmlInputSource The input source of the entity. 1258 * @param literal True if this entity is started within a 1259 * literal value. 1260 * @param isExternal whether this entity should be treated as an internal or external entity. 1261 * 1262 * @throws IOException Thrown on i/o error. 1263 * @throws XNIException Thrown by entity handler to signal an error. 1264 */ 1265 public void startEntity(String name, 1266 XMLInputSource xmlInputSource, 1267 boolean literal, boolean isExternal) 1268 throws IOException, XNIException { 1269 1270 String encoding = setupCurrentEntity(name, xmlInputSource, literal, isExternal); 1271 1272 //when entity expansion limit is set by the Application, we need to 1273 //check for the entity expansion limit set by the parser, if number of entity 1274 //expansions exceeds the entity expansion limit, parser will throw fatal error. 1275 // Note that this represents the nesting level of open entities. 1276 fEntityExpansionCount++; 1277 if( fSecurityManager != null && fEntityExpansionCount > fEntityExpansionLimit ){ 1278 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1279 "EntityExpansionLimitExceeded", 1280 new Object[]{new Integer(fEntityExpansionLimit) }, 1281 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1282 // is there anything better to do than reset the counter? 1283 // at least one can envision debugging applications where this might 1284 // be useful... 1285 fEntityExpansionCount = 0; 1286 } 1287 1288 // call handler 1289 if (fEntityHandler != null) { 1290 fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null); 1291 } 1292 1293 } // startEntity(String,XMLInputSource) 1294 1295 /** 1296 * Return the current entity being scanned. Current entity is SET using startEntity function. 1297 * @return Entity.ScannedEntity 1298 */ 1299 1300 public Entity.ScannedEntity getCurrentEntity(){ 1301 return fCurrentEntity ; 1302 } 1303 1304 /** 1305 * Return the top level entity handled by this manager, or null 1306 * if no entity was added. 1307 */ 1308 public Entity.ScannedEntity getTopLevelEntity() { 1309 return (Entity.ScannedEntity) 1310 (fEntityStack.empty() ? null : fEntityStack.elementAt(0)); 1311 } 1312 1313 1314 /** 1315 * Close all opened InputStreams and Readers opened by this parser. 1316 */ 1317 public void closeReaders() { 1318 /** this call actually does nothing, readers are closed in the endEntity method 1319 * through the current entity. 1320 * The change seems to have happened during the jdk6 development with the 1321 * addition of StAX 1322 **/ 1323 } 1324 1325 public void endEntity() throws IOException, XNIException { 1326 1327 // call handler 1328 if (DEBUG_BUFFER) { 1329 System.out.print("(endEntity: "); 1330 print(); 1331 System.out.println(); 1332 } 1333 //pop the entity from the stack 1334 Entity.ScannedEntity entity = fEntityStack.size() > 0 ? (Entity.ScannedEntity)fEntityStack.pop() : null ; 1335 1336 /** need to close the reader first since the program can end 1337 * prematurely (e.g. fEntityHandler.endEntity may throw exception) 1338 * leaving the reader open 1339 */ 1340 //close the reader 1341 if(fCurrentEntity != null){ 1342 //close the reader 1343 try{ 1344 fCurrentEntity.close(); 1345 }catch(IOException ex){ 1346 throw new XNIException(ex); 1347 } 1348 } 1349 1350 if (fEntityHandler != null) { 1351 //so this is the last opened entity, signal it to current fEntityHandler using Augmentation 1352 if(entity == null){ 1353 fEntityAugs.removeAllItems(); 1354 fEntityAugs.putItem(Constants.LAST_ENTITY, Boolean.TRUE); 1355 fEntityHandler.endEntity(fCurrentEntity.name, fEntityAugs); 1356 fEntityAugs.removeAllItems(); 1357 }else{ 1358 fEntityHandler.endEntity(fCurrentEntity.name, null); 1359 } 1360 } 1361 //check if it is a document entity 1362 boolean documentEntity = fCurrentEntity.name == XMLEntity; 1363 1364 //set popped entity as current entity 1365 fCurrentEntity = entity; 1366 fEntityScanner.setCurrentEntity(fCurrentEntity); 1367 1368 //check if there are any entity left in the stack -- if there are 1369 //no entries EOF has been reached. 1370 // throw exception when it is the last entity but it is not a document entity 1371 1372 if(fCurrentEntity == null & !documentEntity){ 1373 throw new EOFException() ; 1374 } 1375 1376 if (DEBUG_BUFFER) { 1377 System.out.print(")endEntity: "); 1378 print(); 1379 System.out.println(); 1380 } 1381 1382 } // endEntity() 1383 1384 1385 // 1386 // XMLComponent methods 1387 // 1388 public void reset(PropertyManager propertyManager){ 1389 //reset fEntityStorage 1390 fEntityStorage.reset(propertyManager); 1391 //reset XMLEntityReaderImpl 1392 fEntityScanner.reset(propertyManager); 1393 // xerces properties 1394 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 1395 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 1396 try { 1397 fStaxEntityResolver = (StaxEntityResolverWrapper)propertyManager.getProperty(STAX_ENTITY_RESOLVER); 1398 } catch (XMLConfigurationException e) { 1399 fStaxEntityResolver = null; 1400 } 1401 1402 // initialize state 1403 //fStandalone = false; 1404 fEntities.clear(); 1405 fEntityStack.removeAllElements(); 1406 fCurrentEntity = null; 1407 fValidation = false; 1408 fExternalGeneralEntities = true; 1409 fExternalParameterEntities = true; 1410 fAllowJavaEncodings = true ; 1411 1412 //test(); 1413 } 1414 1415 /** 1416 * Resets the component. The component can query the component manager 1417 * about any features and properties that affect the operation of the 1418 * component. 1419 * 1420 * @param componentManager The component manager. 1421 * 1422 * @throws SAXException Thrown by component on initialization error. 1423 * For example, if a feature or property is 1424 * required for the operation of the component, the 1425 * component manager may throw a 1426 * SAXNotRecognizedException or a 1427 * SAXNotSupportedException. 1428 */ 1429 public void reset(XMLComponentManager componentManager) 1430 throws XMLConfigurationException { 1431 1432 boolean parser_settings = componentManager.getFeature(PARSER_SETTINGS, true); 1433 1434 if (!parser_settings) { 1435 // parser settings have not been changed 1436 reset(); 1437 if(fEntityScanner != null){ 1438 fEntityScanner.reset(componentManager); 1439 } 1440 if(fEntityStorage != null){ 1441 fEntityStorage.reset(componentManager); 1442 } 1443 return; 1444 } 1445 1446 // sax features 1447 fValidation = componentManager.getFeature(VALIDATION, false); 1448 fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES, true); 1449 fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES, true); 1450 1451 // xerces features 1452 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 1453 fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF, false); 1454 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 1455 1456 // xerces properties 1457 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 1458 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 1459 fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER, null); 1460 fStaxEntityResolver = (StaxEntityResolverWrapper)componentManager.getProperty(STAX_ENTITY_RESOLVER, null); 1461 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 1462 fSecurityManager = (SecurityManager)componentManager.getProperty(SECURITY_MANAGER, null); 1463 1464 //reset general state 1465 reset(); 1466 1467 fEntityScanner.reset(componentManager); 1468 fEntityStorage.reset(componentManager); 1469 1470 } // reset(XMLComponentManager) 1471 1472 // reset general state. Should not be called other than by 1473 // a class acting as a component manager but not 1474 // implementing that interface for whatever reason. 1475 public void reset() { 1476 fEntityExpansionLimit = (fSecurityManager != null)?fSecurityManager.getEntityExpansionLimit():0; 1477 1478 // initialize state 1479 fStandalone = false; 1480 fEntities.clear(); 1481 fEntityStack.removeAllElements(); 1482 fEntityExpansionCount = 0; 1483 1484 fCurrentEntity = null; 1485 // reset scanner 1486 if(fXML10EntityScanner != null){ 1487 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1488 } 1489 if(fXML11EntityScanner != null) { 1490 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1491 } 1492 1493 // DEBUG 1494 if (DEBUG_ENTITIES) { 1495 addInternalEntity("text", "Hello, World."); 1496 addInternalEntity("empty-element", "<foo/>"); 1497 addInternalEntity("balanced-element", "<foo></foo>"); 1498 addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 1499 addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 1500 addInternalEntity("unbalanced-entity", "<foo>"); 1501 addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 1502 addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 1503 addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 1504 try { 1505 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml"); 1506 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml"); 1507 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml"); 1508 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml"); 1509 } 1510 catch (IOException ex) { 1511 // should never happen 1512 } 1513 } 1514 1515 fEntityHandler = null; 1516 1517 // reset scanner 1518 //if(fEntityScanner!=null) 1519 // fEntityScanner.reset(fSymbolTable, this,fErrorReporter); 1520 1521 } 1522 /** 1523 * Returns a list of feature identifiers that are recognized by 1524 * this component. This method may return null if no features 1525 * are recognized by this component. 1526 */ 1527 public String[] getRecognizedFeatures() { 1528 return (String[])(RECOGNIZED_FEATURES.clone()); 1529 } // getRecognizedFeatures():String[] 1530 1531 /** 1532 * Sets the state of a feature. This method is called by the component 1533 * manager any time after reset when a feature changes state. 1534 * <p> 1535 * <strong>Note:</strong> Components should silently ignore features 1536 * that do not affect the operation of the component. 1537 * 1538 * @param featureId The feature identifier. 1539 * @param state The state of the feature. 1540 * 1541 * @throws SAXNotRecognizedException The component should not throw 1542 * this exception. 1543 * @throws SAXNotSupportedException The component should not throw 1544 * this exception. 1545 */ 1546 public void setFeature(String featureId, boolean state) 1547 throws XMLConfigurationException { 1548 1549 // xerces features 1550 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 1551 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 1552 if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() && 1553 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) { 1554 fAllowJavaEncodings = state; 1555 } 1556 } 1557 1558 } // setFeature(String,boolean) 1559 1560 /** 1561 * Sets the value of a property. This method is called by the component 1562 * manager any time after reset when a property changes value. 1563 * <p> 1564 * <strong>Note:</strong> Components should silently ignore properties 1565 * that do not affect the operation of the component. 1566 * 1567 * @param propertyId The property identifier. 1568 * @param value The value of the property. 1569 * 1570 * @throws SAXNotRecognizedException The component should not throw 1571 * this exception. 1572 * @throws SAXNotSupportedException The component should not throw 1573 * this exception. 1574 */ 1575 public void setProperty(String propertyId, Object value){ 1576 // Xerces properties 1577 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 1578 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 1579 1580 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() && 1581 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) { 1582 fSymbolTable = (SymbolTable)value; 1583 return; 1584 } 1585 if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() && 1586 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) { 1587 fErrorReporter = (XMLErrorReporter)value; 1588 return; 1589 } 1590 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 1591 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 1592 fEntityResolver = (XMLEntityResolver)value; 1593 return; 1594 } 1595 if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() && 1596 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) { 1597 Integer bufferSize = (Integer)value; 1598 if (bufferSize != null && 1599 bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { 1600 fBufferSize = bufferSize.intValue(); 1601 fEntityScanner.setBufferSize(fBufferSize); 1602 fBufferPool.setExternalBufferSize(fBufferSize); 1603 } 1604 } 1605 if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && 1606 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { 1607 fSecurityManager = (SecurityManager)value; 1608 fEntityExpansionLimit = (fSecurityManager != null)?fSecurityManager.getEntityExpansionLimit():0; 1609 } 1610 } 1611 1612 } 1613 /** 1614 * Returns a list of property identifiers that are recognized by 1615 * this component. This method may return null if no properties 1616 * are recognized by this component. 1617 */ 1618 public String[] getRecognizedProperties() { 1619 return (String[])(RECOGNIZED_PROPERTIES.clone()); 1620 } // getRecognizedProperties():String[] 1621 /** 1622 * Returns the default state for a feature, or null if this 1623 * component does not want to report a default value for this 1624 * feature. 1625 * 1626 * @param featureId The feature identifier. 1627 * 1628 * @since Xerces 2.2.0 1629 */ 1630 public Boolean getFeatureDefault(String featureId) { 1631 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 1632 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 1633 return FEATURE_DEFAULTS[i]; 1634 } 1635 } 1636 return null; 1637 } // getFeatureDefault(String):Boolean 1638 1639 /** 1640 * Returns the default state for a property, or null if this 1641 * component does not want to report a default value for this 1642 * property. 1643 * 1644 * @param propertyId The property identifier. 1645 * 1646 * @since Xerces 2.2.0 1647 */ 1648 public Object getPropertyDefault(String propertyId) { 1649 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 1650 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 1651 return PROPERTY_DEFAULTS[i]; 1652 } 1653 } 1654 return null; 1655 } // getPropertyDefault(String):Object 1656 1657 // 1658 // Public static methods 1659 // 1660 1661 /** 1662 * Expands a system id and returns the system id as a URI, if 1663 * it can be expanded. A return value of null means that the 1664 * identifier is already expanded. An exception thrown 1665 * indicates a failure to expand the id. 1666 * 1667 * @param systemId The systemId to be expanded. 1668 * 1669 * @return Returns the URI string representing the expanded system 1670 * identifier. A null value indicates that the given 1671 * system identifier is already expanded. 1672 * 1673 */ 1674 public static String expandSystemId(String systemId) { 1675 return expandSystemId(systemId, null); 1676 } // expandSystemId(String):String 1677 1678 // 1679 // Public static methods 1680 // 1681 1682 // current value of the "user.dir" property 1683 private static String gUserDir; 1684 // cached URI object for the current value of the escaped "user.dir" property stored as a URI 1685 private static URI gUserDirURI; 1686 // which ASCII characters need to be escaped 1687 private static boolean gNeedEscaping[] = new boolean[128]; 1688 // the first hex character if a character needs to be escaped 1689 private static char gAfterEscaping1[] = new char[128]; 1690 // the second hex character if a character needs to be escaped 1691 private static char gAfterEscaping2[] = new char[128]; 1692 private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', 1693 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 1694 // initialize the above 3 arrays 1695 static { 1696 for (int i = 0; i <= 0x1f; i++) { 1697 gNeedEscaping[i] = true; 1698 gAfterEscaping1[i] = gHexChs[i >> 4]; 1699 gAfterEscaping2[i] = gHexChs[i & 0xf]; 1700 } 1701 gNeedEscaping[0x7f] = true; 1702 gAfterEscaping1[0x7f] = '7'; 1703 gAfterEscaping2[0x7f] = 'F'; 1704 char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}', 1705 '|', '\\', '^', '~', '[', ']', '`'}; 1706 int len = escChs.length; 1707 char ch; 1708 for (int i = 0; i < len; i++) { 1709 ch = escChs[i]; 1710 gNeedEscaping[ch] = true; 1711 gAfterEscaping1[ch] = gHexChs[ch >> 4]; 1712 gAfterEscaping2[ch] = gHexChs[ch & 0xf]; 1713 } 1714 } 1715 1716 // To escape the "user.dir" system property, by using %HH to represent 1717 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%' 1718 // and '"'. It's a static method, so needs to be synchronized. 1719 // this method looks heavy, but since the system property isn't expected 1720 // to change often, so in most cases, we only need to return the URI 1721 // that was escaped before. 1722 // According to the URI spec, non-ASCII characters (whose value >= 128) 1723 // need to be escaped too. 1724 // REVISIT: don't know how to escape non-ASCII characters, especially 1725 // which encoding to use. Leave them for now. 1726 private static synchronized URI getUserDir() throws URI.MalformedURIException { 1727 // get the user.dir property 1728 String userDir = ""; 1729 try { 1730 userDir = System.getProperty("user.dir"); 1731 } 1732 catch (SecurityException se) { 1733 } 1734 1735 // return empty string if property value is empty string. 1736 if (userDir.length() == 0) 1737 return new URI("file", "", "", null, null); 1738 // compute the new escaped value if the new property value doesn't 1739 // match the previous one 1740 if (gUserDirURI != null && userDir.equals(gUserDir)) { 1741 return gUserDirURI; 1742 } 1743 1744 // record the new value as the global property value 1745 gUserDir = userDir; 1746 1747 char separator = java.io.File.separatorChar; 1748 userDir = userDir.replace(separator, '/'); 1749 1750 int len = userDir.length(), ch; 1751 StringBuffer buffer = new StringBuffer(len*3); 1752 // change C:/blah to /C:/blah 1753 if (len >= 2 && userDir.charAt(1) == ':') { 1754 ch = Character.toUpperCase(userDir.charAt(0)); 1755 if (ch >= 'A' && ch <= 'Z') { 1756 buffer.append('/'); 1757 } 1758 } 1759 1760 // for each character in the path 1761 int i = 0; 1762 for (; i < len; i++) { 1763 ch = userDir.charAt(i); 1764 // if it's not an ASCII character, break here, and use UTF-8 encoding 1765 if (ch >= 128) 1766 break; 1767 if (gNeedEscaping[ch]) { 1768 buffer.append('%'); 1769 buffer.append(gAfterEscaping1[ch]); 1770 buffer.append(gAfterEscaping2[ch]); 1771 // record the fact that it's escaped 1772 } 1773 else { 1774 buffer.append((char)ch); 1775 } 1776 } 1777 1778 // we saw some non-ascii character 1779 if (i < len) { 1780 // get UTF-8 bytes for the remaining sub-string 1781 byte[] bytes = null; 1782 byte b; 1783 try { 1784 bytes = userDir.substring(i).getBytes("UTF-8"); 1785 } catch (java.io.UnsupportedEncodingException e) { 1786 // should never happen 1787 return new URI("file", "", userDir, null, null); 1788 } 1789 len = bytes.length; 1790 1791 // for each byte 1792 for (i = 0; i < len; i++) { 1793 b = bytes[i]; 1794 // for non-ascii character: make it positive, then escape 1795 if (b < 0) { 1796 ch = b + 256; 1797 buffer.append('%'); 1798 buffer.append(gHexChs[ch >> 4]); 1799 buffer.append(gHexChs[ch & 0xf]); 1800 } 1801 else if (gNeedEscaping[b]) { 1802 buffer.append('%'); 1803 buffer.append(gAfterEscaping1[b]); 1804 buffer.append(gAfterEscaping2[b]); 1805 } 1806 else { 1807 buffer.append((char)b); 1808 } 1809 } 1810 } 1811 1812 // change blah/blah to blah/blah/ 1813 if (!userDir.endsWith("/")) 1814 buffer.append('/'); 1815 1816 gUserDirURI = new URI("file", "", buffer.toString(), null, null); 1817 1818 return gUserDirURI; 1819 } 1820 1821 /** 1822 * Absolutizes a URI using the current value 1823 * of the "user.dir" property as the base URI. If 1824 * the URI is already absolute, this is a no-op. 1825 * 1826 * @param uri the URI to absolutize 1827 */ 1828 public static void absolutizeAgainstUserDir(URI uri) 1829 throws URI.MalformedURIException { 1830 uri.absolutize(getUserDir()); 1831 } 1832 1833 /** 1834 * Expands a system id and returns the system id as a URI, if 1835 * it can be expanded. A return value of null means that the 1836 * identifier is already expanded. An exception thrown 1837 * indicates a failure to expand the id. 1838 * 1839 * @param systemId The systemId to be expanded. 1840 * 1841 * @return Returns the URI string representing the expanded system 1842 * identifier. A null value indicates that the given 1843 * system identifier is already expanded. 1844 * 1845 */ 1846 public static String expandSystemId(String systemId, String baseSystemId) { 1847 1848 // check for bad parameters id 1849 if (systemId == null || systemId.length() == 0) { 1850 return systemId; 1851 } 1852 // if id already expanded, return 1853 try { 1854 URI uri = new URI(systemId); 1855 if (uri != null) { 1856 return systemId; 1857 } 1858 } catch (URI.MalformedURIException e) { 1859 // continue on... 1860 } 1861 // normalize id 1862 String id = fixURI(systemId); 1863 1864 // normalize base 1865 URI base = null; 1866 URI uri = null; 1867 try { 1868 if (baseSystemId == null || baseSystemId.length() == 0 || 1869 baseSystemId.equals(systemId)) { 1870 String dir = getUserDir().toString(); 1871 base = new URI("file", "", dir, null, null); 1872 } else { 1873 try { 1874 base = new URI(fixURI(baseSystemId)); 1875 } catch (URI.MalformedURIException e) { 1876 if (baseSystemId.indexOf(':') != -1) { 1877 // for xml schemas we might have baseURI with 1878 // a specified drive 1879 base = new URI("file", "", fixURI(baseSystemId), null, null); 1880 } else { 1881 String dir = getUserDir().toString(); 1882 dir = dir + fixURI(baseSystemId); 1883 base = new URI("file", "", dir, null, null); 1884 } 1885 } 1886 } 1887 // expand id 1888 uri = new URI(base, id); 1889 } catch (Exception e) { 1890 // let it go through 1891 1892 } 1893 1894 if (uri == null) { 1895 return systemId; 1896 } 1897 return uri.toString(); 1898 1899 } // expandSystemId(String,String):String 1900 1901 /** 1902 * Expands a system id and returns the system id as a URI, if 1903 * it can be expanded. A return value of null means that the 1904 * identifier is already expanded. An exception thrown 1905 * indicates a failure to expand the id. 1906 * 1907 * @param systemId The systemId to be expanded. 1908 * 1909 * @return Returns the URI string representing the expanded system 1910 * identifier. A null value indicates that the given 1911 * system identifier is already expanded. 1912 * 1913 */ 1914 public static String expandSystemId(String systemId, String baseSystemId, 1915 boolean strict) 1916 throws URI.MalformedURIException { 1917 1918 // check if there is a system id before 1919 // trying to expand it. 1920 if (systemId == null) { 1921 return null; 1922 } 1923 1924 // system id has to be a valid URI 1925 if (strict) { 1926 1927 1928 // check if there is a system id before 1929 // trying to expand it. 1930 if (systemId == null) { 1931 return null; 1932 } 1933 1934 try { 1935 // if it's already an absolute one, return it 1936 new URI(systemId); 1937 return systemId; 1938 } 1939 catch (URI.MalformedURIException ex) { 1940 } 1941 URI base = null; 1942 // if there isn't a base uri, use the working directory 1943 if (baseSystemId == null || baseSystemId.length() == 0) { 1944 base = new URI("file", "", getUserDir().toString(), null, null); 1945 } 1946 // otherwise, use the base uri 1947 else { 1948 try { 1949 base = new URI(baseSystemId); 1950 } 1951 catch (URI.MalformedURIException e) { 1952 // assume "base" is also a relative uri 1953 String dir = getUserDir().toString(); 1954 dir = dir + baseSystemId; 1955 base = new URI("file", "", dir, null, null); 1956 } 1957 } 1958 // absolutize the system id using the base 1959 URI uri = new URI(base, systemId); 1960 // return the string rep of the new uri (an absolute one) 1961 return uri.toString(); 1962 1963 // if any exception is thrown, it'll get thrown to the caller. 1964 } 1965 1966 // Assume the URIs are well-formed. If it turns out they're not, try fixing them up. 1967 try { 1968 return expandSystemIdStrictOff(systemId, baseSystemId); 1969 } 1970 catch (URI.MalformedURIException e) { 1971 /** Xerces URI rejects unicode, try java.net.URI 1972 * this is not ideal solution, but it covers known cases which either 1973 * Xerces URI or java.net.URI can handle alone 1974 * will file bug against java.net.URI 1975 */ 1976 try { 1977 return expandSystemIdStrictOff1(systemId, baseSystemId); 1978 } catch (URISyntaxException ex) { 1979 // continue on... 1980 } 1981 } 1982 // check for bad parameters id 1983 if (systemId.length() == 0) { 1984 return systemId; 1985 } 1986 1987 // normalize id 1988 String id = fixURI(systemId); 1989 1990 // normalize base 1991 URI base = null; 1992 URI uri = null; 1993 try { 1994 if (baseSystemId == null || baseSystemId.length() == 0 || 1995 baseSystemId.equals(systemId)) { 1996 base = getUserDir(); 1997 } 1998 else { 1999 try { 2000 base = new URI(fixURI(baseSystemId).trim()); 2001 } 2002 catch (URI.MalformedURIException e) { 2003 if (baseSystemId.indexOf(':') != -1) { 2004 // for xml schemas we might have baseURI with 2005 // a specified drive 2006 base = new URI("file", "", fixURI(baseSystemId).trim(), null, null); 2007 } 2008 else { 2009 base = new URI(getUserDir(), fixURI(baseSystemId)); 2010 } 2011 } 2012 } 2013 // expand id 2014 uri = new URI(base, id.trim()); 2015 } 2016 catch (Exception e) { 2017 // let it go through 2018 2019 } 2020 2021 if (uri == null) { 2022 return systemId; 2023 } 2024 return uri.toString(); 2025 2026 } // expandSystemId(String,String,boolean):String 2027 2028 /** 2029 * Helper method for expandSystemId(String,String,boolean):String 2030 */ 2031 private static String expandSystemIdStrictOn(String systemId, String baseSystemId) 2032 throws URI.MalformedURIException { 2033 2034 URI systemURI = new URI(systemId, true); 2035 // If it's already an absolute one, return it 2036 if (systemURI.isAbsoluteURI()) { 2037 return systemId; 2038 } 2039 2040 // If there isn't a base URI, use the working directory 2041 URI baseURI = null; 2042 if (baseSystemId == null || baseSystemId.length() == 0) { 2043 baseURI = getUserDir(); 2044 } 2045 else { 2046 baseURI = new URI(baseSystemId, true); 2047 if (!baseURI.isAbsoluteURI()) { 2048 // assume "base" is also a relative uri 2049 baseURI.absolutize(getUserDir()); 2050 } 2051 } 2052 2053 // absolutize the system identifier using the base URI 2054 systemURI.absolutize(baseURI); 2055 2056 // return the string rep of the new uri (an absolute one) 2057 return systemURI.toString(); 2058 2059 // if any exception is thrown, it'll get thrown to the caller. 2060 2061 } // expandSystemIdStrictOn(String,String):String 2062 2063 /** 2064 * Attempt to set whether redirects will be followed for an <code>HttpURLConnection</code>. 2065 * This may fail on earlier JDKs which do not support setting this preference. 2066 */ 2067 public static void setInstanceFollowRedirects(HttpURLConnection urlCon, boolean followRedirects) { 2068 try { 2069 Method method = HttpURLConnection.class.getMethod("setInstanceFollowRedirects", new Class[] {Boolean.TYPE}); 2070 method.invoke(urlCon, new Object[] {followRedirects ? Boolean.TRUE : Boolean.FALSE}); 2071 } 2072 // setInstanceFollowRedirects doesn't exist. 2073 catch (Exception exc) {} 2074 } 2075 2076 2077 /** 2078 * Helper method for expandSystemId(String,String,boolean):String 2079 */ 2080 private static String expandSystemIdStrictOff(String systemId, String baseSystemId) 2081 throws URI.MalformedURIException { 2082 2083 URI systemURI = new URI(systemId, true); 2084 // If it's already an absolute one, return it 2085 if (systemURI.isAbsoluteURI()) { 2086 if (systemURI.getScheme().length() > 1) { 2087 return systemId; 2088 } 2089 /** 2090 * If the scheme's length is only one character, 2091 * it's likely that this was intended as a file 2092 * path. Fixing this up in expandSystemId to 2093 * maintain backwards compatibility. 2094 */ 2095 throw new URI.MalformedURIException(); 2096 } 2097 2098 // If there isn't a base URI, use the working directory 2099 URI baseURI = null; 2100 if (baseSystemId == null || baseSystemId.length() == 0) { 2101 baseURI = getUserDir(); 2102 } 2103 else { 2104 baseURI = new URI(baseSystemId, true); 2105 if (!baseURI.isAbsoluteURI()) { 2106 // assume "base" is also a relative uri 2107 baseURI.absolutize(getUserDir()); 2108 } 2109 } 2110 2111 // absolutize the system identifier using the base URI 2112 systemURI.absolutize(baseURI); 2113 2114 // return the string rep of the new uri (an absolute one) 2115 return systemURI.toString(); 2116 2117 // if any exception is thrown, it'll get thrown to the caller. 2118 2119 } // expandSystemIdStrictOff(String,String):String 2120 2121 private static String expandSystemIdStrictOff1(String systemId, String baseSystemId) 2122 throws URISyntaxException, URI.MalformedURIException { 2123 2124 java.net.URI systemURI = new java.net.URI(systemId); 2125 // If it's already an absolute one, return it 2126 if (systemURI.isAbsolute()) { 2127 if (systemURI.getScheme().length() > 1) { 2128 return systemId; 2129 } 2130 /** 2131 * If the scheme's length is only one character, 2132 * it's likely that this was intended as a file 2133 * path. Fixing this up in expandSystemId to 2134 * maintain backwards compatibility. 2135 */ 2136 throw new URISyntaxException(systemId, "the scheme's length is only one character"); 2137 } 2138 2139 // If there isn't a base URI, use the working directory 2140 URI baseURI = null; 2141 if (baseSystemId == null || baseSystemId.length() == 0) { 2142 baseURI = getUserDir(); 2143 } 2144 else { 2145 baseURI = new URI(baseSystemId, true); 2146 if (!baseURI.isAbsoluteURI()) { 2147 // assume "base" is also a relative uri 2148 baseURI.absolutize(getUserDir()); 2149 } 2150 } 2151 2152 // absolutize the system identifier using the base URI 2153 // systemURI.absolutize(baseURI); 2154 systemURI = (new java.net.URI(baseURI.toString())).resolve(systemURI); 2155 2156 // return the string rep of the new uri (an absolute one) 2157 return systemURI.toString(); 2158 2159 // if any exception is thrown, it'll get thrown to the caller. 2160 2161 } // expandSystemIdStrictOff(String,String):String 2162 2163 // 2164 // Protected methods 2165 // 2166 2167 2168 /** 2169 * Returns the IANA encoding name that is auto-detected from 2170 * the bytes specified, with the endian-ness of that encoding where appropriate. 2171 * 2172 * @param b4 The first four bytes of the input. 2173 * @param count The number of bytes actually read. 2174 * @return a 2-element array: the first element, an IANA-encoding string, 2175 * the second element a Boolean which is true iff the document is big endian, false 2176 * if it's little-endian, and null if the distinction isn't relevant. 2177 */ 2178 protected Object[] getEncodingName(byte[] b4, int count) { 2179 2180 if (count < 2) { 2181 return defaultEncoding; 2182 } 2183 2184 // UTF-16, with BOM 2185 int b0 = b4[0] & 0xFF; 2186 int b1 = b4[1] & 0xFF; 2187 if (b0 == 0xFE && b1 == 0xFF) { 2188 // UTF-16, big-endian 2189 return new Object [] {"UTF-16BE", new Boolean(true)}; 2190 } 2191 if (b0 == 0xFF && b1 == 0xFE) { 2192 // UTF-16, little-endian 2193 return new Object [] {"UTF-16LE", new Boolean(false)}; 2194 } 2195 2196 // default to UTF-8 if we don't have enough bytes to make a 2197 // good determination of the encoding 2198 if (count < 3) { 2199 return defaultEncoding; 2200 } 2201 2202 // UTF-8 with a BOM 2203 int b2 = b4[2] & 0xFF; 2204 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 2205 return defaultEncoding; 2206 } 2207 2208 // default to UTF-8 if we don't have enough bytes to make a 2209 // good determination of the encoding 2210 if (count < 4) { 2211 return defaultEncoding; 2212 } 2213 2214 // other encodings 2215 int b3 = b4[3] & 0xFF; 2216 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2217 // UCS-4, big endian (1234) 2218 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2219 } 2220 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2221 // UCS-4, little endian (4321) 2222 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2223 } 2224 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2225 // UCS-4, unusual octet order (2143) 2226 // REVISIT: What should this be? 2227 return new Object [] {"ISO-10646-UCS-4", null}; 2228 } 2229 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2230 // UCS-4, unusual octect order (3412) 2231 // REVISIT: What should this be? 2232 return new Object [] {"ISO-10646-UCS-4", null}; 2233 } 2234 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2235 // UTF-16, big-endian, no BOM 2236 // (or could turn out to be UCS-2... 2237 // REVISIT: What should this be? 2238 return new Object [] {"UTF-16BE", new Boolean(true)}; 2239 } 2240 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2241 // UTF-16, little-endian, no BOM 2242 // (or could turn out to be UCS-2... 2243 return new Object [] {"UTF-16LE", new Boolean(false)}; 2244 } 2245 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2246 // EBCDIC 2247 // a la xerces1, return CP037 instead of EBCDIC here 2248 return new Object [] {"CP037", null}; 2249 } 2250 2251 return defaultEncoding; 2252 2253 } // getEncodingName(byte[],int):Object[] 2254 2255 /** 2256 * Creates a reader capable of reading the given input stream in 2257 * the specified encoding. 2258 * 2259 * @param inputStream The input stream. 2260 * @param encoding The encoding name that the input stream is 2261 * encoded using. If the user has specified that 2262 * Java encoding names are allowed, then the 2263 * encoding name may be a Java encoding name; 2264 * otherwise, it is an ianaEncoding name. 2265 * @param isBigEndian For encodings (like uCS-4), whose names cannot 2266 * specify a byte order, this tells whether the order is bigEndian. null menas 2267 * unknown or not relevant. 2268 * 2269 * @return Returns a reader. 2270 */ 2271 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 2272 throws IOException { 2273 2274 // normalize encoding name 2275 if (encoding == null) { 2276 encoding = "UTF-8"; 2277 } 2278 2279 // try to use an optimized reader 2280 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 2281 if (ENCODING.equals("UTF-8")) { 2282 if (DEBUG_ENCODINGS) { 2283 System.out.println("$$$ creating UTF8Reader"); 2284 } 2285 return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 2286 } 2287 if (ENCODING.equals("US-ASCII")) { 2288 if (DEBUG_ENCODINGS) { 2289 System.out.println("$$$ creating ASCIIReader"); 2290 } 2291 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 2292 } 2293 if(ENCODING.equals("ISO-10646-UCS-4")) { 2294 if(isBigEndian != null) { 2295 boolean isBE = isBigEndian.booleanValue(); 2296 if(isBE) { 2297 return new UCSReader(inputStream, UCSReader.UCS4BE); 2298 } else { 2299 return new UCSReader(inputStream, UCSReader.UCS4LE); 2300 } 2301 } else { 2302 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2303 "EncodingByteOrderUnsupported", 2304 new Object[] { encoding }, 2305 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2306 } 2307 } 2308 if(ENCODING.equals("ISO-10646-UCS-2")) { 2309 if(isBigEndian != null) { // sould never happen with this encoding... 2310 boolean isBE = isBigEndian.booleanValue(); 2311 if(isBE) { 2312 return new UCSReader(inputStream, UCSReader.UCS2BE); 2313 } else { 2314 return new UCSReader(inputStream, UCSReader.UCS2LE); 2315 } 2316 } else { 2317 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2318 "EncodingByteOrderUnsupported", 2319 new Object[] { encoding }, 2320 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2321 } 2322 } 2323 2324 // check for valid name 2325 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 2326 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 2327 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 2328 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2329 "EncodingDeclInvalid", 2330 new Object[] { encoding }, 2331 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2332 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 2333 // because every byte is a valid ISO Latin 1 character. 2334 // It may not translate correctly but if we failed on 2335 // the encoding anyway, then we're expecting the content 2336 // of the document to be bad. This will just prevent an 2337 // invalid UTF-8 sequence to be detected. This is only 2338 // important when continue-after-fatal-error is turned 2339 // on. -Ac 2340 encoding = "ISO-8859-1"; 2341 } 2342 2343 // try to use a Java reader 2344 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 2345 if (javaEncoding == null) { 2346 if(fAllowJavaEncodings) { 2347 javaEncoding = encoding; 2348 } else { 2349 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2350 "EncodingDeclInvalid", 2351 new Object[] { encoding }, 2352 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2353 // see comment above. 2354 javaEncoding = "ISO8859_1"; 2355 } 2356 } 2357 if (DEBUG_ENCODINGS) { 2358 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 2359 if (javaEncoding == encoding) { 2360 System.out.print(" (IANA encoding)"); 2361 } 2362 System.out.println(); 2363 } 2364 return new BufferedReader( new InputStreamReader(inputStream, javaEncoding)); 2365 2366 } // createReader(InputStream,String, Boolean): Reader 2367 2368 2369 /** 2370 * Return the public identifier for the current document event. 2371 * <p> 2372 * The return value is the public identifier of the document 2373 * entity or of the external parsed entity in which the markup 2374 * triggering the event appears. 2375 * 2376 * @return A string containing the public identifier, or 2377 * null if none is available. 2378 */ 2379 public String getPublicId() { 2380 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 2381 } // getPublicId():String 2382 2383 /** 2384 * Return the expanded system identifier for the current document event. 2385 * <p> 2386 * The return value is the expanded system identifier of the document 2387 * entity or of the external parsed entity in which the markup 2388 * triggering the event appears. 2389 * <p> 2390 * If the system identifier is a URL, the parser must resolve it 2391 * fully before passing it to the application. 2392 * 2393 * @return A string containing the expanded system identifier, or null 2394 * if none is available. 2395 */ 2396 public String getExpandedSystemId() { 2397 if (fCurrentEntity != null) { 2398 if (fCurrentEntity.entityLocation != null && 2399 fCurrentEntity.entityLocation.getExpandedSystemId() != null ) { 2400 return fCurrentEntity.entityLocation.getExpandedSystemId(); 2401 } else { 2402 // search for the first external entity on the stack 2403 int size = fEntityStack.size(); 2404 for (int i = size - 1; i >= 0 ; i--) { 2405 Entity.ScannedEntity externalEntity = 2406 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2407 2408 if (externalEntity.entityLocation != null && 2409 externalEntity.entityLocation.getExpandedSystemId() != null) { 2410 return externalEntity.entityLocation.getExpandedSystemId(); 2411 } 2412 } 2413 } 2414 } 2415 return null; 2416 } // getExpandedSystemId():String 2417 2418 /** 2419 * Return the literal system identifier for the current document event. 2420 * <p> 2421 * The return value is the literal system identifier of the document 2422 * entity or of the external parsed entity in which the markup 2423 * triggering the event appears. 2424 * <p> 2425 * @return A string containing the literal system identifier, or null 2426 * if none is available. 2427 */ 2428 public String getLiteralSystemId() { 2429 if (fCurrentEntity != null) { 2430 if (fCurrentEntity.entityLocation != null && 2431 fCurrentEntity.entityLocation.getLiteralSystemId() != null ) { 2432 return fCurrentEntity.entityLocation.getLiteralSystemId(); 2433 } else { 2434 // search for the first external entity on the stack 2435 int size = fEntityStack.size(); 2436 for (int i = size - 1; i >= 0 ; i--) { 2437 Entity.ScannedEntity externalEntity = 2438 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2439 2440 if (externalEntity.entityLocation != null && 2441 externalEntity.entityLocation.getLiteralSystemId() != null) { 2442 return externalEntity.entityLocation.getLiteralSystemId(); 2443 } 2444 } 2445 } 2446 } 2447 return null; 2448 } // getLiteralSystemId():String 2449 2450 /** 2451 * Return the line number where the current document event ends. 2452 * <p> 2453 * <strong>Warning:</strong> The return value from the method 2454 * is intended only as an approximation for the sake of error 2455 * reporting; it is not intended to provide sufficient information 2456 * to edit the character content of the original XML document. 2457 * <p> 2458 * The return value is an approximation of the line number 2459 * in the document entity or external parsed entity where the 2460 * markup triggering the event appears. 2461 * <p> 2462 * If possible, the SAX driver should provide the line position 2463 * of the first character after the text associated with the document 2464 * event. The first line in the document is line 1. 2465 * 2466 * @return The line number, or -1 if none is available. 2467 */ 2468 public int getLineNumber() { 2469 if (fCurrentEntity != null) { 2470 if (fCurrentEntity.isExternal()) { 2471 return fCurrentEntity.lineNumber; 2472 } else { 2473 // search for the first external entity on the stack 2474 int size = fEntityStack.size(); 2475 for (int i=size-1; i>0 ; i--) { 2476 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2477 if (firstExternalEntity.isExternal()) { 2478 return firstExternalEntity.lineNumber; 2479 } 2480 } 2481 } 2482 } 2483 2484 return -1; 2485 2486 } // getLineNumber():int 2487 2488 /** 2489 * Return the column number where the current document event ends. 2490 * <p> 2491 * <strong>Warning:</strong> The return value from the method 2492 * is intended only as an approximation for the sake of error 2493 * reporting; it is not intended to provide sufficient information 2494 * to edit the character content of the original XML document. 2495 * <p> 2496 * The return value is an approximation of the column number 2497 * in the document entity or external parsed entity where the 2498 * markup triggering the event appears. 2499 * <p> 2500 * If possible, the SAX driver should provide the line position 2501 * of the first character after the text associated with the document 2502 * event. 2503 * <p> 2504 * If possible, the SAX driver should provide the line position 2505 * of the first character after the text associated with the document 2506 * event. The first column in each line is column 1. 2507 * 2508 * @return The column number, or -1 if none is available. 2509 */ 2510 public int getColumnNumber() { 2511 if (fCurrentEntity != null) { 2512 if (fCurrentEntity.isExternal()) { 2513 return fCurrentEntity.columnNumber; 2514 } else { 2515 // search for the first external entity on the stack 2516 int size = fEntityStack.size(); 2517 for (int i=size-1; i>0 ; i--) { 2518 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2519 if (firstExternalEntity.isExternal()) { 2520 return firstExternalEntity.columnNumber; 2521 } 2522 } 2523 } 2524 } 2525 2526 return -1; 2527 } // getColumnNumber():int 2528 2529 2530 // 2531 // Protected static methods 2532 // 2533 2534 /** 2535 * Fixes a platform dependent filename to standard URI form. 2536 * 2537 * @param str The string to fix. 2538 * 2539 * @return Returns the fixed URI string. 2540 */ 2541 protected static String fixURI(String str) { 2542 2543 // handle platform dependent strings 2544 str = str.replace(java.io.File.separatorChar, '/'); 2545 2546 // Windows fix 2547 if (str.length() >= 2) { 2548 char ch1 = str.charAt(1); 2549 // change "C:blah" to "/C:blah" 2550 if (ch1 == ':') { 2551 char ch0 = Character.toUpperCase(str.charAt(0)); 2552 if (ch0 >= 'A' && ch0 <= 'Z') { 2553 str = "/" + str; 2554 } 2555 } 2556 // change "//blah" to "file://blah" 2557 else if (ch1 == '/' && str.charAt(0) == '/') { 2558 str = "file:" + str; 2559 } 2560 } 2561 2562 // replace spaces in file names with %20. 2563 // Original comment from JDK5: the following algorithm might not be 2564 // very performant, but people who want to use invalid URI's have to 2565 // pay the price. 2566 int pos = str.indexOf(' '); 2567 if (pos >= 0) { 2568 StringBuilder sb = new StringBuilder(str.length()); 2569 // put characters before ' ' into the string builder 2570 for (int i = 0; i < pos; i++) 2571 sb.append(str.charAt(i)); 2572 // and %20 for the space 2573 sb.append("%20"); 2574 // for the remamining part, also convert ' ' to "%20". 2575 for (int i = pos+1; i < str.length(); i++) { 2576 if (str.charAt(i) == ' ') 2577 sb.append("%20"); 2578 else 2579 sb.append(str.charAt(i)); 2580 } 2581 str = sb.toString(); 2582 } 2583 2584 // done 2585 return str; 2586 2587 } // fixURI(String):String 2588 2589 /** 2590 * Escape invalid URI characters. 2591 * 2592 * Passed a URI that contains invalid characters (like spaces, non-ASCII Unicode characters, and the like), 2593 * this function percent encodes the invalid characters per the URI specification (i.e., as a sequence of 2594 * %-encoded UTF-8 octets). 2595 * 2596 * N.B. There are two problems. If the URI contains a '%' character, that might be an indication that 2597 * the URI has already been escaped by the author, or it might be an invalid '%'. In the former case, 2598 * it's important not to escape it, or we'll wind up with invalid, doubly-escaped '%'s. In the latter, 2599 * the URI is broken if we don't encode it. Similarly, a '#' character might be the start of a fragment 2600 * identifier or it might be an invalid '#'. 2601 * 2602 * Given that the former is vastly more likely than the latter in each case (most users are familiar with 2603 * the magic status of '%' and '#' and they occur relatively infrequently in filenames, and if the user parses 2604 * a proper Java File, we will already have %-escaped the URI), we simply assume that %'s and #'s are legit. 2605 * 2606 * Very rarely, we may be wrong. If so, tell the user to fix the clearly broken URI. 2607 */ 2608 protected static String escapeNonUSAscii(String str) { 2609 if (str == null) { 2610 return str; 2611 } 2612 2613 // get UTF-8 bytes for the string 2614 StringBuffer buffer = new StringBuffer(); 2615 byte[] bytes = null; 2616 byte b; 2617 try { 2618 bytes = str.getBytes("UTF-8"); 2619 } catch (java.io.UnsupportedEncodingException e) { 2620 // should never happen 2621 return str; 2622 } 2623 int len = bytes.length; 2624 int ch; 2625 2626 // for each byte 2627 for (int i = 0; i < len; i++) { 2628 b = bytes[i]; 2629 // for non-ascii character: make it positive, then escape 2630 if (b < 0) { 2631 ch = b + 256; 2632 buffer.append('%'); 2633 buffer.append(gHexChs[ch >> 4]); 2634 buffer.append(gHexChs[ch & 0xf]); 2635 } 2636 else if (b != '%' && b != '#' && gNeedEscaping[b]) { 2637 buffer.append('%'); 2638 buffer.append(gAfterEscaping1[b]); 2639 buffer.append(gAfterEscaping2[b]); 2640 } 2641 else { 2642 buffer.append((char)b); 2643 } 2644 } 2645 return buffer.toString(); 2646 } 2647 2648 // 2649 // Package visible methods 2650 // 2651 /** Prints the contents of the buffer. */ 2652 final void print() { 2653 if (DEBUG_BUFFER) { 2654 if (fCurrentEntity != null) { 2655 System.out.print('['); 2656 System.out.print(fCurrentEntity.count); 2657 System.out.print(' '); 2658 System.out.print(fCurrentEntity.position); 2659 if (fCurrentEntity.count > 0) { 2660 System.out.print(" \""); 2661 for (int i = 0; i < fCurrentEntity.count; i++) { 2662 if (i == fCurrentEntity.position) { 2663 System.out.print('^'); 2664 } 2665 char c = fCurrentEntity.ch[i]; 2666 switch (c) { 2667 case '\n': { 2668 System.out.print("\\n"); 2669 break; 2670 } 2671 case '\r': { 2672 System.out.print("\\r"); 2673 break; 2674 } 2675 case '\t': { 2676 System.out.print("\\t"); 2677 break; 2678 } 2679 case '\\': { 2680 System.out.print("\\\\"); 2681 break; 2682 } 2683 default: { 2684 System.out.print(c); 2685 } 2686 } 2687 } 2688 if (fCurrentEntity.position == fCurrentEntity.count) { 2689 System.out.print('^'); 2690 } 2691 System.out.print('"'); 2692 } 2693 System.out.print(']'); 2694 System.out.print(" @ "); 2695 System.out.print(fCurrentEntity.lineNumber); 2696 System.out.print(','); 2697 System.out.print(fCurrentEntity.columnNumber); 2698 } else { 2699 System.out.print("*NO CURRENT ENTITY*"); 2700 } 2701 } 2702 } // print() 2703 2704 /** 2705 * Buffer used in entity manager to reuse character arrays instead 2706 * of creating new ones every time. 2707 * 2708 * @xerces.internal 2709 * 2710 * @author Ankit Pasricha, IBM 2711 */ 2712 private static class CharacterBuffer { 2713 2714 /** character buffer */ 2715 private char[] ch; 2716 2717 /** whether the buffer is for an external or internal scanned entity */ 2718 private boolean isExternal; 2719 2720 public CharacterBuffer(boolean isExternal, int size) { 2721 this.isExternal = isExternal; 2722 ch = new char[size]; 2723 } 2724 } 2725 2726 2727 /** 2728 * Stores a number of character buffers and provides it to the entity 2729 * manager to use when an entity is seen. 2730 * 2731 * @xerces.internal 2732 * 2733 * @author Ankit Pasricha, IBM 2734 */ 2735 private static class CharacterBufferPool { 2736 2737 private static final int DEFAULT_POOL_SIZE = 3; 2738 2739 private CharacterBuffer[] fInternalBufferPool; 2740 private CharacterBuffer[] fExternalBufferPool; 2741 2742 private int fExternalBufferSize; 2743 private int fInternalBufferSize; 2744 private int poolSize; 2745 2746 private int fInternalTop; 2747 private int fExternalTop; 2748 2749 public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { 2750 this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); 2751 } 2752 2753 public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { 2754 fExternalBufferSize = externalBufferSize; 2755 fInternalBufferSize = internalBufferSize; 2756 this.poolSize = poolSize; 2757 init(); 2758 } 2759 2760 /** Initializes buffer pool. **/ 2761 private void init() { 2762 fInternalBufferPool = new CharacterBuffer[poolSize]; 2763 fExternalBufferPool = new CharacterBuffer[poolSize]; 2764 fInternalTop = -1; 2765 fExternalTop = -1; 2766 } 2767 2768 /** Retrieves buffer from pool. **/ 2769 public CharacterBuffer getBuffer(boolean external) { 2770 if (external) { 2771 if (fExternalTop > -1) { 2772 return (CharacterBuffer)fExternalBufferPool[fExternalTop--]; 2773 } 2774 else { 2775 return new CharacterBuffer(true, fExternalBufferSize); 2776 } 2777 } 2778 else { 2779 if (fInternalTop > -1) { 2780 return (CharacterBuffer)fInternalBufferPool[fInternalTop--]; 2781 } 2782 else { 2783 return new CharacterBuffer(false, fInternalBufferSize); 2784 } 2785 } 2786 } 2787 2788 /** Returns buffer to pool. **/ 2789 public void returnToPool(CharacterBuffer buffer) { 2790 if (buffer.isExternal) { 2791 if (fExternalTop < fExternalBufferPool.length - 1) { 2792 fExternalBufferPool[++fExternalTop] = buffer; 2793 } 2794 } 2795 else if (fInternalTop < fInternalBufferPool.length - 1) { 2796 fInternalBufferPool[++fInternalTop] = buffer; 2797 } 2798 } 2799 2800 /** Sets the size of external buffers and dumps the old pool. **/ 2801 public void setExternalBufferSize(int bufferSize) { 2802 fExternalBufferSize = bufferSize; 2803 fExternalBufferPool = new CharacterBuffer[poolSize]; 2804 fExternalTop = -1; 2805 } 2806 } 2807 2808 /** 2809 * This class wraps the byte inputstreams we're presented with. 2810 * We need it because java.io.InputStreams don't provide 2811 * functionality to reread processed bytes, and they have a habit 2812 * of reading more than one character when you call their read() 2813 * methods. This means that, once we discover the true (declared) 2814 * encoding of a document, we can neither backtrack to read the 2815 * whole doc again nor start reading where we are with a new 2816 * reader. 2817 * 2818 * This class allows rewinding an inputStream by allowing a mark 2819 * to be set, and the stream reset to that position. <strong>The 2820 * class assumes that it needs to read one character per 2821 * invocation when it's read() method is inovked, but uses the 2822 * underlying InputStream's read(char[], offset length) method--it 2823 * won't buffer data read this way!</strong> 2824 * 2825 * @xerces.internal 2826 * 2827 * @author Neil Graham, IBM 2828 * @author Glenn Marcy, IBM 2829 */ 2830 2831 protected final class RewindableInputStream extends InputStream { 2832 2833 private InputStream fInputStream; 2834 private byte[] fData; 2835 private int fStartOffset; 2836 private int fEndOffset; 2837 private int fOffset; 2838 private int fLength; 2839 private int fMark; 2840 2841 public RewindableInputStream(InputStream is) { 2842 fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE]; 2843 fInputStream = is; 2844 fStartOffset = 0; 2845 fEndOffset = -1; 2846 fOffset = 0; 2847 fLength = 0; 2848 fMark = 0; 2849 } 2850 2851 public void setStartOffset(int offset) { 2852 fStartOffset = offset; 2853 } 2854 2855 public void rewind() { 2856 fOffset = fStartOffset; 2857 } 2858 2859 public int read() throws IOException { 2860 int b = 0; 2861 if (fOffset < fLength) { 2862 return fData[fOffset++] & 0xff; 2863 } 2864 if (fOffset == fEndOffset) { 2865 return -1; 2866 } 2867 if (fOffset == fData.length) { 2868 byte[] newData = new byte[fOffset << 1]; 2869 System.arraycopy(fData, 0, newData, 0, fOffset); 2870 fData = newData; 2871 } 2872 b = fInputStream.read(); 2873 if (b == -1) { 2874 fEndOffset = fOffset; 2875 return -1; 2876 } 2877 fData[fLength++] = (byte)b; 2878 fOffset++; 2879 return b & 0xff; 2880 } 2881 2882 public int read(byte[] b, int off, int len) throws IOException { 2883 int bytesLeft = fLength - fOffset; 2884 if (bytesLeft == 0) { 2885 if (fOffset == fEndOffset) { 2886 return -1; 2887 } 2888 2889 /** 2890 * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); 2891 * //System.out.println("fInputStream = " + fInputStream ); 2892 * // better get some more for the voracious reader... */ 2893 2894 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { 2895 2896 if (!fCurrentEntity.xmlDeclChunkRead) 2897 { 2898 fCurrentEntity.xmlDeclChunkRead = true; 2899 len = fCurrentEntity.DEFAULT_XMLDECL_BUFFER_SIZE; 2900 } 2901 return fInputStream.read(b, off, len); 2902 } 2903 2904 int returnedVal = read(); 2905 if(returnedVal == -1) { 2906 fEndOffset = fOffset; 2907 return -1; 2908 } 2909 b[off] = (byte)returnedVal; 2910 return 1; 2911 2912 } 2913 if (len < bytesLeft) { 2914 if (len <= 0) { 2915 return 0; 2916 } 2917 } else { 2918 len = bytesLeft; 2919 } 2920 if (b != null) { 2921 System.arraycopy(fData, fOffset, b, off, len); 2922 } 2923 fOffset += len; 2924 return len; 2925 } 2926 2927 public long skip(long n) 2928 throws IOException { 2929 int bytesLeft; 2930 if (n <= 0) { 2931 return 0; 2932 } 2933 bytesLeft = fLength - fOffset; 2934 if (bytesLeft == 0) { 2935 if (fOffset == fEndOffset) { 2936 return 0; 2937 } 2938 return fInputStream.skip(n); 2939 } 2940 if (n <= bytesLeft) { 2941 fOffset += n; 2942 return n; 2943 } 2944 fOffset += bytesLeft; 2945 if (fOffset == fEndOffset) { 2946 return bytesLeft; 2947 } 2948 n -= bytesLeft; 2949 /* 2950 * In a manner of speaking, when this class isn't permitting more 2951 * than one byte at a time to be read, it is "blocking". The 2952 * available() method should indicate how much can be read without 2953 * blocking, so while we're in this mode, it should only indicate 2954 * that bytes in its buffer are available; otherwise, the result of 2955 * available() on the underlying InputStream is appropriate. 2956 */ 2957 return fInputStream.skip(n) + bytesLeft; 2958 } 2959 2960 public int available() throws IOException { 2961 int bytesLeft = fLength - fOffset; 2962 if (bytesLeft == 0) { 2963 if (fOffset == fEndOffset) { 2964 return -1; 2965 } 2966 return fCurrentEntity.mayReadChunks ? fInputStream.available() 2967 : 0; 2968 } 2969 return bytesLeft; 2970 } 2971 2972 public void mark(int howMuch) { 2973 fMark = fOffset; 2974 } 2975 2976 public void reset() { 2977 fOffset = fMark; 2978 //test(); 2979 } 2980 2981 public boolean markSupported() { 2982 return true; 2983 } 2984 2985 public void close() throws IOException { 2986 if (fInputStream != null) { 2987 fInputStream.close(); 2988 fInputStream = null; 2989 } 2990 } 2991 } // end of RewindableInputStream class 2992 2993 public void test(){ 2994 //System.out.println("TESTING: Added familytree to entityManager"); 2995 //Usecase1 2996 fEntityStorage.addExternalEntity("entityUsecase1",null, 2997 "/space/home/stax/sun/6thJan2004/zephyr/data/test.txt", 2998 "/space/home/stax/sun/6thJan2004/zephyr/data/entity.xml"); 2999 3000 //Usecase2 3001 fEntityStorage.addInternalEntity("entityUsecase2","<Test>value</Test>"); 3002 fEntityStorage.addInternalEntity("entityUsecase3","value3"); 3003 fEntityStorage.addInternalEntity("text", "Hello World."); 3004 fEntityStorage.addInternalEntity("empty-element", "<foo/>"); 3005 fEntityStorage.addInternalEntity("balanced-element", "<foo></foo>"); 3006 fEntityStorage.addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 3007 fEntityStorage.addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 3008 fEntityStorage.addInternalEntity("unbalanced-entity", "<foo>"); 3009 fEntityStorage.addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 3010 fEntityStorage.addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 3011 fEntityStorage.addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 3012 fEntityStorage.addInternalEntity("ch","©"); 3013 fEntityStorage.addInternalEntity("ch1","T"); 3014 fEntityStorage.addInternalEntity("% ch2","param"); 3015 } 3016 3017 } // class XMLEntityManager