1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 24 25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 29 import com.sun.org.apache.xerces.internal.util.EncodingMap; 30 import com.sun.org.apache.xerces.internal.util.SymbolTable; 31 import com.sun.org.apache.xerces.internal.util.XMLChar; 32 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 33 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 36 import com.sun.org.apache.xerces.internal.xni.*; 37 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 38 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 39 import com.sun.xml.internal.stream.Entity; 40 import com.sun.xml.internal.stream.Entity.ScannedEntity; 41 import com.sun.xml.internal.stream.XMLBufferListener; 42 import java.io.EOFException; 43 import java.io.IOException; 44 import java.io.InputStream; 45 import java.io.InputStreamReader; 46 import java.io.Reader; 47 import java.util.Locale; 48 import java.util.Vector; 49 50 /** 51 * Implements the entity scanner methods. 52 * 53 * @author Neeraj Bajaj, Sun Microsystems 54 * @author Andy Clark, IBM 55 * @author Arnaud Le Hors, IBM 56 * @author K.Venugopal Sun Microsystems 57 * 58 */ 59 public class XMLEntityScanner implements XMLLocator { 60 61 62 protected Entity.ScannedEntity fCurrentEntity = null ; 63 protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; 64 65 protected XMLEntityManager fEntityManager ; 66 67 /** Security manager. */ 68 protected XMLSecurityManager fSecurityManager = null; 69 70 /** Limit analyzer. */ 71 protected XMLLimitAnalyzer fLimitAnalyzer = null; 72 73 /** Debug switching readers for encodings. */ 74 private static final boolean DEBUG_ENCODINGS = false; 75 /** Listeners which should know when load is being called */ 76 private Vector listeners = new Vector(); 77 78 private static final boolean [] VALID_NAMES = new boolean[127]; 79 80 /** 81 * Debug printing of buffer. This debugging flag works best when you 82 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 83 * 64 characters. 84 */ 85 private static final boolean DEBUG_BUFFER = false; 86 private static final boolean DEBUG_SKIP_STRING = false; 87 /** 88 * To signal the end of the document entity, this exception will be thrown. 89 */ 90 private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { 91 private static final long serialVersionUID = 980337771224675268L; 92 public Throwable fillInStackTrace() { 93 return this; 94 } 95 }; 96 97 protected SymbolTable fSymbolTable = null; 98 protected XMLErrorReporter fErrorReporter = null; 99 int [] whiteSpaceLookup = new int[100]; 100 int whiteSpaceLen = 0; 101 boolean whiteSpaceInfoNeeded = true; 102 103 /** 104 * Allow Java encoding names. This feature identifier is: 105 * http://apache.org/xml/features/allow-java-encodings 106 */ 107 protected boolean fAllowJavaEncodings; 108 109 //Will be used only during internal subsets. 110 //for appending data. 111 112 /** Property identifier: symbol table. */ 113 protected static final String SYMBOL_TABLE = 114 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 115 116 /** Property identifier: error reporter. */ 117 protected static final String ERROR_REPORTER = 118 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 119 120 /** Feature identifier: allow Java encodings. */ 121 protected static final String ALLOW_JAVA_ENCODINGS = 122 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 123 124 protected PropertyManager fPropertyManager = null ; 125 126 boolean isExternal = false; 127 static { 128 129 for(int i=0x0041;i<=0x005A ; i++){ 130 VALID_NAMES[i]=true; 131 } 132 for(int i=0x0061;i<=0x007A; i++){ 133 VALID_NAMES[i]=true; 134 } 135 for(int i=0x0030;i<=0x0039; i++){ 136 VALID_NAMES[i]=true; 137 } 138 VALID_NAMES[45]=true; 139 VALID_NAMES[46]=true; 140 VALID_NAMES[58]=true; 141 VALID_NAMES[95]=true; 142 } 143 // SAPJVM: Remember, that the XML version has explicitly been set, 144 // so that XMLStreamReader.getVersion() can find that out. 145 boolean xmlVersionSetExplicitly = false; 146 // 147 // Constructors 148 // 149 150 /** Default constructor. */ 151 public XMLEntityScanner() { 152 } // <init>() 153 154 155 /** private constructor, this class can only be instantiated within this class. Instance of this class should 156 * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) 157 * @see getEntityScanner() 158 * @see getEntityScanner(ScannedEntity) 159 */ 160 public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { 161 fEntityManager = entityManager ; 162 reset(propertyManager); 163 } // <init>() 164 165 166 // set buffer size: 167 public final void setBufferSize(int size) { 168 // REVISIT: Buffer size passed to entity scanner 169 // was not being kept in synch with the actual size 170 // of the buffers in each scanned entity. If any 171 // of the buffers were actually resized, it was possible 172 // that the parser would throw an ArrayIndexOutOfBoundsException 173 // for documents which contained names which are longer than 174 // the current buffer size. Conceivably the buffer size passed 175 // to entity scanner could be used to determine a minimum size 176 // for resizing, if doubling its size is smaller than this 177 // minimum. -- mrglavas 178 fBufferSize = size; 179 } 180 181 /** 182 * Resets the components. 183 */ 184 public void reset(PropertyManager propertyManager){ 185 fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; 186 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; 187 resetCommon(); 188 } 189 190 /** 191 * Resets the component. The component can query the component manager 192 * about any features and properties that affect the operation of the 193 * component. 194 * 195 * @param componentManager The component manager. 196 * 197 * @throws SAXException Thrown by component on initialization error. 198 * For example, if a feature or property is 199 * required for the operation of the component, the 200 * component manager may throw a 201 * SAXNotRecognizedException or a 202 * SAXNotSupportedException. 203 */ 204 public void reset(XMLComponentManager componentManager) 205 throws XMLConfigurationException { 206 // xerces features 207 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 208 209 //xerces properties 210 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 211 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 212 resetCommon(); 213 } // reset(XMLComponentManager) 214 215 216 public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, 217 XMLErrorReporter reporter) { 218 fCurrentEntity = null; 219 fSymbolTable = symbolTable; 220 fEntityManager = entityManager; 221 fErrorReporter = reporter; 222 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 223 fSecurityManager = fEntityManager.fSecurityManager; 224 } 225 226 private void resetCommon() { 227 fCurrentEntity = null; 228 whiteSpaceLen = 0; 229 whiteSpaceInfoNeeded = true; 230 listeners.clear(); 231 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 232 fSecurityManager = fEntityManager.fSecurityManager; 233 } 234 235 /** 236 * Returns the XML version of the current entity. This will normally be the 237 * value from the XML or text declaration or defaulted by the parser. Note that 238 * that this value may be different than the version of the processing rules 239 * applied to the current entity. For instance, an XML 1.1 document may refer to 240 * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire 241 * document. Also note that, for a given entity, this value can only be considered 242 * final once the XML or text declaration has been read or once it has been 243 * determined that there is no such declaration. 244 */ 245 public final String getXMLVersion() { 246 if (fCurrentEntity != null) { 247 return fCurrentEntity.xmlVersion; 248 } 249 return null; 250 } // getXMLVersion():String 251 252 /** 253 * Sets the XML version. This method is used by the 254 * scanners to report the value of the version pseudo-attribute 255 * in an XML or text declaration. 256 * 257 * @param xmlVersion the XML version of the current entity 258 */ 259 public final void setXMLVersion(String xmlVersion) { 260 xmlVersionSetExplicitly = true; // SAPJVM 261 fCurrentEntity.xmlVersion = xmlVersion; 262 } // setXMLVersion(String) 263 264 265 /** set the instance of current scanned entity. 266 * @param ScannedEntity 267 */ 268 269 public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ 270 fCurrentEntity = scannedEntity ; 271 if(fCurrentEntity != null){ 272 isExternal = fCurrentEntity.isExternal(); 273 if(DEBUG_BUFFER) 274 System.out.println("Current Entity is "+scannedEntity.name); 275 } 276 } 277 278 public Entity.ScannedEntity getCurrentEntity(){ 279 return fCurrentEntity ; 280 } 281 // 282 // XMLEntityReader methods 283 // 284 285 /** 286 * Returns the base system identifier of the currently scanned 287 * entity, or null if none is available. 288 */ 289 public final String getBaseSystemId() { 290 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 291 } // getBaseSystemId():String 292 293 /** 294 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) 295 */ 296 public void setBaseSystemId(String systemId) { 297 //no-op 298 } 299 300 ///////////// Locator methods start. 301 public final int getLineNumber(){ 302 //if the entity is closed, we should return -1 303 //xxx at first place why such call should be there... 304 return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; 305 } 306 307 /** 308 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) 309 */ 310 public void setLineNumber(int line) { 311 //no-op 312 } 313 314 315 public final int getColumnNumber(){ 316 //if the entity is closed, we should return -1 317 //xxx at first place why such call should be there... 318 return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; 319 } 320 321 /** 322 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) 323 */ 324 public void setColumnNumber(int col) { 325 // no-op 326 } 327 328 329 public final int getCharacterOffset(){ 330 return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; 331 } 332 333 /** Returns the expanded system identifier. */ 334 public final String getExpandedSystemId() { 335 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 336 } 337 338 /** 339 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) 340 */ 341 public void setExpandedSystemId(String systemId) { 342 //no-op 343 } 344 345 /** Returns the literal system identifier. */ 346 public final String getLiteralSystemId() { 347 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; 348 } 349 350 /** 351 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) 352 */ 353 public void setLiteralSystemId(String systemId) { 354 //no-op 355 } 356 357 /** Returns the public identifier. */ 358 public final String getPublicId() { 359 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 360 } 361 362 /** 363 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) 364 */ 365 public void setPublicId(String publicId) { 366 //no-op 367 } 368 369 ///////////////// Locator methods finished. 370 371 /** the version of the current entity being scanned */ 372 public void setVersion(String version){ 373 fCurrentEntity.version = version; 374 } 375 376 public String getVersion(){ 377 if (fCurrentEntity != null) 378 return fCurrentEntity.version ; 379 return null; 380 } 381 382 /** 383 * Returns the encoding of the current entity. 384 * Note that, for a given entity, this value can only be 385 * considered final once the encoding declaration has been read (or once it 386 * has been determined that there is no such declaration) since, no encoding 387 * having been specified on the XMLInputSource, the parser 388 * will make an initial "guess" which could be in error. 389 */ 390 public final String getEncoding() { 391 if (fCurrentEntity != null) { 392 return fCurrentEntity.encoding; 393 } 394 return null; 395 } // getEncoding():String 396 397 /** 398 * Sets the encoding of the scanner. This method is used by the 399 * scanners if the XMLDecl or TextDecl line contains an encoding 400 * pseudo-attribute. 401 * <p> 402 * <strong>Note:</strong> The underlying character reader on the 403 * current entity will be changed to accomodate the new encoding. 404 * However, the new encoding is ignored if the current reader was 405 * not constructed from an input stream (e.g. an external entity 406 * that is resolved directly to the appropriate java.io.Reader 407 * object). 408 * 409 * @param encoding The IANA encoding name of the new encoding. 410 * 411 * @throws IOException Thrown if the new encoding is not supported. 412 * 413 * @see com.sun.org.apache.xerces.internal.util.EncodingMap 414 */ 415 public final void setEncoding(String encoding) throws IOException { 416 417 if (DEBUG_ENCODINGS) { 418 System.out.println("$$$ setEncoding: "+encoding); 419 } 420 421 if (fCurrentEntity.stream != null) { 422 // if the encoding is the same, don't change the reader and 423 // re-use the original reader used by the OneCharReader 424 // NOTE: Besides saving an object, this overcomes deficiencies 425 // in the UTF-16 reader supplied with the standard Java 426 // distribution (up to and including 1.3). The UTF-16 427 // decoder buffers 8K blocks even when only asked to read 428 // a single char! -Ac 429 if (fCurrentEntity.encoding == null || 430 !fCurrentEntity.encoding.equals(encoding)) { 431 // UTF-16 is a bit of a special case. If the encoding is UTF-16, 432 // and we know the endian-ness, we shouldn't change readers. 433 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce 434 // the endian-ness from the encoding we presently have. 435 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { 436 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 437 if(ENCODING.equals("UTF-16")) return; 438 if(ENCODING.equals("ISO-10646-UCS-4")) { 439 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 440 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); 441 } else { 442 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); 443 } 444 return; 445 } 446 if(ENCODING.equals("ISO-10646-UCS-2")) { 447 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 448 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); 449 } else { 450 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); 451 } 452 return; 453 } 454 } 455 // wrap a new reader around the input stream, changing 456 // the encoding 457 if (DEBUG_ENCODINGS) { 458 System.out.println("$$$ creating new reader from stream: "+ 459 fCurrentEntity.stream); 460 } 461 //fCurrentEntity.stream.reset(); 462 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); 463 fCurrentEntity.encoding = encoding; 464 465 } else { 466 if (DEBUG_ENCODINGS) 467 System.out.println("$$$ reusing old reader on stream"); 468 } 469 } 470 471 } // setEncoding(String) 472 473 /** Returns true if the current entity being scanned is external. */ 474 public final boolean isExternal() { 475 return fCurrentEntity.isExternal(); 476 } // isExternal():boolean 477 478 public int getChar(int relative) throws IOException{ 479 if(arrangeCapacity(relative + 1, false)){ 480 return fCurrentEntity.ch[fCurrentEntity.position + relative]; 481 }else{ 482 return -1; 483 } 484 }//getChar() 485 486 /** 487 * Returns the next character on the input. 488 * <p> 489 * <strong>Note:</strong> The character is <em>not</em> consumed. 490 * 491 * @throws IOException Thrown if i/o error occurs. 492 * @throws EOFException Thrown on end of file. 493 */ 494 public int peekChar() throws IOException { 495 if (DEBUG_BUFFER) { 496 System.out.print("(peekChar: "); 497 print(); 498 System.out.println(); 499 } 500 501 // load more characters, if needed 502 if (fCurrentEntity.position == fCurrentEntity.count) { 503 load(0, true, true); 504 } 505 506 // peek at character 507 int c = fCurrentEntity.ch[fCurrentEntity.position]; 508 509 // return peeked character 510 if (DEBUG_BUFFER) { 511 System.out.print(")peekChar: "); 512 print(); 513 if (isExternal) { 514 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); 515 } else { 516 System.out.println(" -> '"+(char)c+"'"); 517 } 518 } 519 if (isExternal) { 520 return c != '\r' ? c : '\n'; 521 } else { 522 return c; 523 } 524 525 } // peekChar():int 526 527 /** 528 * Returns the next character on the input. 529 * <p> 530 * <strong>Note:</strong> The character is consumed. 531 * 532 * @throws IOException Thrown if i/o error occurs. 533 * @throws EOFException Thrown on end of file. 534 */ 535 public int scanChar() throws IOException { 536 if (DEBUG_BUFFER) { 537 System.out.print("(scanChar: "); 538 print(); 539 System.out.println(); 540 } 541 542 // load more characters, if needed 543 if (fCurrentEntity.position == fCurrentEntity.count) { 544 load(0, true, true); 545 } 546 547 // scan character 548 int c = fCurrentEntity.ch[fCurrentEntity.position++]; 549 if (c == '\n' || 550 (c == '\r' && isExternal)) { 551 fCurrentEntity.lineNumber++; 552 fCurrentEntity.columnNumber = 1; 553 if (fCurrentEntity.position == fCurrentEntity.count) { 554 invokeListeners(1); 555 fCurrentEntity.ch[0] = (char)c; 556 load(1, false, false); 557 } 558 if (c == '\r' && isExternal) { 559 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { 560 fCurrentEntity.position--; 561 } 562 c = '\n'; 563 } 564 } 565 566 // return character that was scanned 567 if (DEBUG_BUFFER) { 568 System.out.print(")scanChar: "); 569 print(); 570 System.out.println(" -> '"+(char)c+"'"); 571 } 572 fCurrentEntity.columnNumber++; 573 return c; 574 575 } // scanChar():int 576 577 /** 578 * Returns a string matching the NMTOKEN production appearing immediately 579 * on the input as a symbol, or null if NMTOKEN Name string is present. 580 * <p> 581 * <strong>Note:</strong> The NMTOKEN characters are consumed. 582 * <p> 583 * <strong>Note:</strong> The string returned must be a symbol. The 584 * SymbolTable can be used for this purpose. 585 * 586 * @throws IOException Thrown if i/o error occurs. 587 * @throws EOFException Thrown on end of file. 588 * 589 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 590 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 591 */ 592 public String scanNmtoken() throws IOException { 593 if (DEBUG_BUFFER) { 594 System.out.print("(scanNmtoken: "); 595 print(); 596 System.out.println(); 597 } 598 599 // load more characters, if needed 600 if (fCurrentEntity.position == fCurrentEntity.count) { 601 load(0, true, true); 602 } 603 604 // scan nmtoken 605 int offset = fCurrentEntity.position; 606 boolean vc = false; 607 char c; 608 while (true){ 609 //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { 610 c = fCurrentEntity.ch[fCurrentEntity.position]; 611 if(c < 127){ 612 vc = VALID_NAMES[c]; 613 }else{ 614 vc = XMLChar.isName(c); 615 } 616 if(!vc)break; 617 618 if (++fCurrentEntity.position == fCurrentEntity.count) { 619 int length = fCurrentEntity.position - offset; 620 invokeListeners(length); 621 if (length == fCurrentEntity.fBufferSize) { 622 // bad luck we have to resize our buffer 623 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 624 System.arraycopy(fCurrentEntity.ch, offset, 625 tmp, 0, length); 626 fCurrentEntity.ch = tmp; 627 fCurrentEntity.fBufferSize *= 2; 628 } else { 629 System.arraycopy(fCurrentEntity.ch, offset, 630 fCurrentEntity.ch, 0, length); 631 } 632 offset = 0; 633 if (load(length, false, false)) { 634 break; 635 } 636 } 637 } 638 int length = fCurrentEntity.position - offset; 639 fCurrentEntity.columnNumber += length; 640 641 // return nmtoken 642 String symbol = null; 643 if (length > 0) { 644 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 645 } 646 if (DEBUG_BUFFER) { 647 System.out.print(")scanNmtoken: "); 648 print(); 649 System.out.println(" -> "+String.valueOf(symbol)); 650 } 651 return symbol; 652 653 } // scanNmtoken():String 654 655 /** 656 * Returns a string matching the Name production appearing immediately 657 * on the input as a symbol, or null if no Name string is present. 658 * <p> 659 * <strong>Note:</strong> The Name characters are consumed. 660 * <p> 661 * <strong>Note:</strong> The string returned must be a symbol. The 662 * SymbolTable can be used for this purpose. 663 * 664 * @throws IOException Thrown if i/o error occurs. 665 * @throws EOFException Thrown on end of file. 666 * 667 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 668 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 669 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 670 */ 671 public String scanName() throws IOException { 672 if (DEBUG_BUFFER) { 673 System.out.print("(scanName: "); 674 print(); 675 System.out.println(); 676 } 677 678 // load more characters, if needed 679 if (fCurrentEntity.position == fCurrentEntity.count) { 680 load(0, true, true); 681 } 682 683 // scan name 684 int offset = fCurrentEntity.position; 685 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 686 if (++fCurrentEntity.position == fCurrentEntity.count) { 687 invokeListeners(1); 688 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 689 offset = 0; 690 if (load(1, false, false)) { 691 fCurrentEntity.columnNumber++; 692 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 693 694 if (DEBUG_BUFFER) { 695 System.out.print(")scanName: "); 696 print(); 697 System.out.println(" -> "+String.valueOf(symbol)); 698 } 699 return symbol; 700 } 701 } 702 boolean vc =false; 703 while (true ){ 704 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 705 char c = fCurrentEntity.ch[fCurrentEntity.position]; 706 if(c < 127){ 707 vc = VALID_NAMES[c]; 708 }else{ 709 vc = XMLChar.isName(c); 710 } 711 if(!vc)break; 712 if (++fCurrentEntity.position == fCurrentEntity.count) { 713 int length = fCurrentEntity.position - offset; 714 invokeListeners(length); 715 if (length == fCurrentEntity.fBufferSize) { 716 // bad luck we have to resize our buffer 717 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 718 System.arraycopy(fCurrentEntity.ch, offset, 719 tmp, 0, length); 720 fCurrentEntity.ch = tmp; 721 fCurrentEntity.fBufferSize *= 2; 722 } else { 723 System.arraycopy(fCurrentEntity.ch, offset, 724 fCurrentEntity.ch, 0, length); 725 } 726 offset = 0; 727 if (load(length, false, false)) { 728 break; 729 } 730 } 731 } 732 } 733 int length = fCurrentEntity.position - offset; 734 fCurrentEntity.columnNumber += length; 735 736 // return name 737 String symbol; 738 if (length > 0) { 739 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 740 } else 741 symbol = null; 742 if (DEBUG_BUFFER) { 743 System.out.print(")scanName: "); 744 print(); 745 System.out.println(" -> "+String.valueOf(symbol)); 746 } 747 return symbol; 748 749 } // scanName():String 750 751 /** 752 * Scans a qualified name from the input, setting the fields of the 753 * QName structure appropriately. 754 * <p> 755 * <strong>Note:</strong> The qualified name characters are consumed. 756 * <p> 757 * <strong>Note:</strong> The strings used to set the values of the 758 * QName structure must be symbols. The SymbolTable can be used for 759 * this purpose. 760 * 761 * @param qname The qualified name structure to fill. 762 * 763 * @return Returns true if a qualified name appeared immediately on 764 * the input and was scanned, false otherwise. 765 * 766 * @throws IOException Thrown if i/o error occurs. 767 * @throws EOFException Thrown on end of file. 768 * 769 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 770 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 771 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 772 */ 773 public boolean scanQName(QName qname) throws IOException { 774 if (DEBUG_BUFFER) { 775 System.out.print("(scanQName, "+qname+": "); 776 print(); 777 System.out.println(); 778 } 779 780 // load more characters, if needed 781 if (fCurrentEntity.position == fCurrentEntity.count) { 782 load(0, true, true); 783 } 784 785 // scan qualified name 786 int offset = fCurrentEntity.position; 787 788 //making a check if if the specified character is a valid name start character 789 //as defined by production [5] in the XML 1.0 specification. 790 // Name ::= (Letter | '_' | ':') (NameChar)* 791 792 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 793 if (++fCurrentEntity.position == fCurrentEntity.count) { 794 invokeListeners(1); 795 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 796 offset = 0; 797 798 if (load(1, false, false)) { 799 fCurrentEntity.columnNumber++; 800 //adding into symbol table. 801 //XXX We are trying to add single character in SymbolTable?????? 802 String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 803 qname.setValues(null, name, name, null); 804 if (DEBUG_BUFFER) { 805 System.out.print(")scanQName, "+qname+": "); 806 print(); 807 System.out.println(" -> true"); 808 } 809 return true; 810 } 811 } 812 int index = -1; 813 boolean vc = false; 814 while ( true){ 815 816 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 817 char c = fCurrentEntity.ch[fCurrentEntity.position]; 818 if(c < 127){ 819 vc = VALID_NAMES[c]; 820 }else{ 821 vc = XMLChar.isName(c); 822 } 823 if(!vc)break; 824 if (c == ':') { 825 if (index != -1) { 826 break; 827 } 828 index = fCurrentEntity.position; 829 //check prefix before further read 830 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset); 831 } 832 if (++fCurrentEntity.position == fCurrentEntity.count) { 833 int length = fCurrentEntity.position - offset; 834 //check localpart before loading more data 835 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length - index - 1); 836 invokeListeners(length); 837 if (length == fCurrentEntity.fBufferSize) { 838 // bad luck we have to resize our buffer 839 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 840 System.arraycopy(fCurrentEntity.ch, offset, 841 tmp, 0, length); 842 fCurrentEntity.ch = tmp; 843 fCurrentEntity.fBufferSize *= 2; 844 } else { 845 System.arraycopy(fCurrentEntity.ch, offset, 846 fCurrentEntity.ch, 0, length); 847 } 848 if (index != -1) { 849 index = index - offset; 850 } 851 offset = 0; 852 if (load(length, false, false)) { 853 break; 854 } 855 } 856 } 857 int length = fCurrentEntity.position - offset; 858 fCurrentEntity.columnNumber += length; 859 if (length > 0) { 860 String prefix = null; 861 String localpart = null; 862 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, 863 offset, length); 864 865 if (index != -1) { 866 int prefixLength = index - offset; 867 //check the result: prefix 868 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength); 869 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, 870 offset, prefixLength); 871 int len = length - prefixLength - 1; 872 //check the result: localpart 873 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len); 874 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, 875 index + 1, len); 876 877 } else { 878 localpart = rawname; 879 //check the result: localpart 880 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); 881 } 882 qname.setValues(prefix, localpart, rawname, null); 883 if (DEBUG_BUFFER) { 884 System.out.print(")scanQName, "+qname+": "); 885 print(); 886 System.out.println(" -> true"); 887 } 888 return true; 889 } 890 } 891 892 // no qualified name found 893 if (DEBUG_BUFFER) { 894 System.out.print(")scanQName, "+qname+": "); 895 print(); 896 System.out.println(" -> false"); 897 } 898 return false; 899 900 } // scanQName(QName):boolean 901 902 /** 903 * Checks whether the value of the specified Limit exceeds its limit 904 * 905 * @param limit The Limit to be checked. 906 * @param entity The current entity. 907 * @param offset The index of the first byte 908 * @param length The length of the entity scanned. 909 */ 910 protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) { 911 fLimitAnalyzer.addValue(limit, null, length); 912 if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) { 913 fSecurityManager.debugPrint(fLimitAnalyzer); 914 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(), 915 new Object[]{new String(entity.ch, offset, length), 916 fLimitAnalyzer.getTotalValue(limit), 917 fSecurityManager.getLimit(limit), 918 fSecurityManager.getStateLiteral(limit)}, 919 XMLErrorReporter.SEVERITY_FATAL_ERROR); 920 } 921 } 922 923 /** 924 * CHANGED: 925 * Scans a range of parsed character data, This function appends the character data to 926 * the supplied buffer. 927 * <p> 928 * <strong>Note:</strong> The characters are consumed. 929 * <p> 930 * <strong>Note:</strong> This method does not guarantee to return 931 * the longest run of parsed character data. This method may return 932 * before markup due to reaching the end of the input buffer or any 933 * other reason. 934 * <p> 935 * 936 * @param content The content structure to fill. 937 * 938 * @return Returns the next character on the input, if known. This 939 * value may be -1 but this does <em>note</em> designate 940 * end of file. 941 * 942 * @throws IOException Thrown if i/o error occurs. 943 * @throws EOFException Thrown on end of file. 944 */ 945 public int scanContent(XMLString content) throws IOException { 946 if (DEBUG_BUFFER) { 947 System.out.print("(scanContent: "); 948 print(); 949 System.out.println(); 950 } 951 952 // load more characters, if needed 953 if (fCurrentEntity.position == fCurrentEntity.count) { 954 load(0, true, true); 955 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 956 invokeListeners(0); 957 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 958 load(1, false, false); 959 fCurrentEntity.position = 0; 960 } 961 962 // normalize newlines 963 int offset = fCurrentEntity.position; 964 int c = fCurrentEntity.ch[offset]; 965 int newlines = 0; 966 if (c == '\n' || (c == '\r' && isExternal)) { 967 if (DEBUG_BUFFER) { 968 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 969 print(); 970 System.out.println(); 971 } 972 do { 973 c = fCurrentEntity.ch[fCurrentEntity.position++]; 974 if (c == '\r' && isExternal) { 975 newlines++; 976 fCurrentEntity.lineNumber++; 977 fCurrentEntity.columnNumber = 1; 978 if (fCurrentEntity.position == fCurrentEntity.count) { 979 offset = 0; 980 fCurrentEntity.position = newlines; 981 if (load(newlines, false, true)) { 982 break; 983 } 984 } 985 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 986 fCurrentEntity.position++; 987 offset++; 988 } 989 /*** NEWLINE NORMALIZATION ***/ 990 else { 991 newlines++; 992 } 993 } else if (c == '\n') { 994 newlines++; 995 fCurrentEntity.lineNumber++; 996 fCurrentEntity.columnNumber = 1; 997 if (fCurrentEntity.position == fCurrentEntity.count) { 998 offset = 0; 999 fCurrentEntity.position = newlines; 1000 if (load(newlines, false, true)) { 1001 break; 1002 } 1003 } 1004 } else { 1005 fCurrentEntity.position--; 1006 break; 1007 } 1008 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1009 for (int i = offset; i < fCurrentEntity.position; i++) { 1010 fCurrentEntity.ch[i] = '\n'; 1011 } 1012 int length = fCurrentEntity.position - offset; 1013 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1014 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 1015 //on buffering the data.. 1016 content.setValues(fCurrentEntity.ch, offset, length); 1017 //content.append(fCurrentEntity.ch, offset, length); 1018 if (DEBUG_BUFFER) { 1019 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1020 print(); 1021 System.out.println(); 1022 } 1023 return -1; 1024 } 1025 if (DEBUG_BUFFER) { 1026 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1027 print(); 1028 System.out.println(); 1029 } 1030 } 1031 1032 while (fCurrentEntity.position < fCurrentEntity.count) { 1033 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1034 if (!XMLChar.isContent(c)) { 1035 fCurrentEntity.position--; 1036 break; 1037 } 1038 } 1039 int length = fCurrentEntity.position - offset; 1040 fCurrentEntity.columnNumber += length - newlines; 1041 if (fCurrentEntity.isGE) { 1042 checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length); 1043 } 1044 1045 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 1046 //on buffering the data.. 1047 content.setValues(fCurrentEntity.ch, offset, length); 1048 //content.append(fCurrentEntity.ch, offset, length); 1049 // return next character 1050 if (fCurrentEntity.position != fCurrentEntity.count) { 1051 c = fCurrentEntity.ch[fCurrentEntity.position]; 1052 // REVISIT: Does this need to be updated to fix the 1053 // #x0D ^#x0A newline normalization problem? -Ac 1054 if (c == '\r' && isExternal) { 1055 c = '\n'; 1056 } 1057 } else { 1058 c = -1; 1059 } 1060 if (DEBUG_BUFFER) { 1061 System.out.print(")scanContent: "); 1062 print(); 1063 System.out.println(" -> '"+(char)c+"'"); 1064 } 1065 return c; 1066 1067 } // scanContent(XMLString):int 1068 1069 /** 1070 * Scans a range of attribute value data, setting the fields of the 1071 * XMLString structure, appropriately. 1072 * <p> 1073 * <strong>Note:</strong> The characters are consumed. 1074 * <p> 1075 * <strong>Note:</strong> This method does not guarantee to return 1076 * the longest run of attribute value data. This method may return 1077 * before the quote character due to reaching the end of the input 1078 * buffer or any other reason. 1079 * <p> 1080 * <strong>Note:</strong> The fields contained in the XMLString 1081 * structure are not guaranteed to remain valid upon subsequent calls 1082 * to the entity scanner. Therefore, the caller is responsible for 1083 * immediately using the returned character data or making a copy of 1084 * the character data. 1085 * 1086 * @param quote The quote character that signifies the end of the 1087 * attribute value data. 1088 * @param content The content structure to fill. 1089 * 1090 * @return Returns the next character on the input, if known. This 1091 * value may be -1 but this does <em>note</em> designate 1092 * end of file. 1093 * 1094 * @throws IOException Thrown if i/o error occurs. 1095 * @throws EOFException Thrown on end of file. 1096 */ 1097 public int scanLiteral(int quote, XMLString content) 1098 throws IOException { 1099 if (DEBUG_BUFFER) { 1100 System.out.print("(scanLiteral, '"+(char)quote+"': "); 1101 print(); 1102 System.out.println(); 1103 } 1104 // load more characters, if needed 1105 if (fCurrentEntity.position == fCurrentEntity.count) { 1106 load(0, true, true); 1107 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1108 invokeListeners(0); 1109 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 1110 load(1, false, false); 1111 fCurrentEntity.position = 0; 1112 } 1113 1114 // normalize newlines 1115 int offset = fCurrentEntity.position; 1116 int c = fCurrentEntity.ch[offset]; 1117 int newlines = 0; 1118 if(whiteSpaceInfoNeeded) 1119 whiteSpaceLen=0; 1120 if (c == '\n' || (c == '\r' && isExternal)) { 1121 if (DEBUG_BUFFER) { 1122 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1123 print(); 1124 System.out.println(); 1125 } 1126 do { 1127 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1128 if (c == '\r' && isExternal) { 1129 newlines++; 1130 fCurrentEntity.lineNumber++; 1131 fCurrentEntity.columnNumber = 1; 1132 if (fCurrentEntity.position == fCurrentEntity.count) { 1133 offset = 0; 1134 fCurrentEntity.position = newlines; 1135 if (load(newlines, false, true)) { 1136 break; 1137 } 1138 } 1139 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1140 fCurrentEntity.position++; 1141 offset++; 1142 } 1143 /*** NEWLINE NORMALIZATION ***/ 1144 else { 1145 newlines++; 1146 } 1147 /***/ 1148 } else if (c == '\n') { 1149 newlines++; 1150 fCurrentEntity.lineNumber++; 1151 fCurrentEntity.columnNumber = 1; 1152 if (fCurrentEntity.position == fCurrentEntity.count) { 1153 offset = 0; 1154 fCurrentEntity.position = newlines; 1155 if (load(newlines, false, true)) { 1156 break; 1157 } 1158 } 1159 /*** NEWLINE NORMALIZATION *** 1160 * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' 1161 * && external) { 1162 * fCurrentEntity.position++; 1163 * offset++; 1164 * } 1165 * /***/ 1166 } else { 1167 fCurrentEntity.position--; 1168 break; 1169 } 1170 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1171 int i=0; 1172 for ( i = offset; i < fCurrentEntity.position; i++) { 1173 fCurrentEntity.ch[i] = '\n'; 1174 storeWhiteSpace(i); 1175 } 1176 1177 int length = fCurrentEntity.position - offset; 1178 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1179 content.setValues(fCurrentEntity.ch, offset, length); 1180 if (DEBUG_BUFFER) { 1181 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1182 print(); 1183 System.out.println(); 1184 } 1185 return -1; 1186 } 1187 if (DEBUG_BUFFER) { 1188 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1189 print(); 1190 System.out.println(); 1191 } 1192 } 1193 1194 // scan literal value 1195 for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) { 1196 c = fCurrentEntity.ch[fCurrentEntity.position]; 1197 if ((c == quote && 1198 (!fCurrentEntity.literal || isExternal)) || 1199 c == '%' || !XMLChar.isContent(c)) { 1200 break; 1201 } 1202 if (whiteSpaceInfoNeeded && c == '\t') { 1203 storeWhiteSpace(fCurrentEntity.position); 1204 } 1205 } 1206 int length = fCurrentEntity.position - offset; 1207 fCurrentEntity.columnNumber += length - newlines; 1208 if (fCurrentEntity.isGE) { 1209 checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length); 1210 } 1211 content.setValues(fCurrentEntity.ch, offset, length); 1212 1213 // return next character 1214 if (fCurrentEntity.position != fCurrentEntity.count) { 1215 c = fCurrentEntity.ch[fCurrentEntity.position]; 1216 // NOTE: We don't want to accidentally signal the 1217 // end of the literal if we're expanding an 1218 // entity appearing in the literal. -Ac 1219 if (c == quote && fCurrentEntity.literal) { 1220 c = -1; 1221 } 1222 } else { 1223 c = -1; 1224 } 1225 if (DEBUG_BUFFER) { 1226 System.out.print(")scanLiteral, '"+(char)quote+"': "); 1227 print(); 1228 System.out.println(" -> '"+(char)c+"'"); 1229 } 1230 return c; 1231 1232 } // scanLiteral(int,XMLString):int 1233 1234 /** 1235 * Save whitespace information. Increase the whitespace buffer by 100 1236 * when needed. 1237 * 1238 * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). 1239 * 1240 * @param whiteSpacePos position of a whitespace in the scanner entity buffer 1241 */ 1242 private void storeWhiteSpace(int whiteSpacePos) { 1243 if (whiteSpaceLen >= whiteSpaceLookup.length) { 1244 int [] tmp = new int[whiteSpaceLookup.length + 100]; 1245 System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); 1246 whiteSpaceLookup = tmp; 1247 } 1248 1249 whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; 1250 } 1251 1252 //CHANGED: 1253 /** 1254 * Scans a range of character data up to the specified delimiter, 1255 * setting the fields of the XMLString structure, appropriately. 1256 * <p> 1257 * <strong>Note:</strong> The characters are consumed. 1258 * <p> 1259 * <strong>Note:</strong> This assumes that the length of the delimiter 1260 * and that the delimiter contains at least one character. 1261 * <p> 1262 * <strong>Note:</strong> This method does not guarantee to return 1263 * the longest run of character data. This method may return before 1264 * the delimiter due to reaching the end of the input buffer or any 1265 * other reason. 1266 * <p> 1267 * @param delimiter The string that signifies the end of the character 1268 * data to be scanned. 1269 * @param buffer The XMLStringBuffer to fill. 1270 * 1271 * @return Returns true if there is more data to scan, false otherwise. 1272 * 1273 * @throws IOException Thrown if i/o error occurs. 1274 * @throws EOFException Thrown on end of file. 1275 */ 1276 public boolean scanData(String delimiter, XMLStringBuffer buffer) 1277 throws IOException { 1278 1279 boolean done = false; 1280 int delimLen = delimiter.length(); 1281 char charAt0 = delimiter.charAt(0); 1282 do { 1283 if (DEBUG_BUFFER) { 1284 System.out.print("(scanData: "); 1285 print(); 1286 System.out.println(); 1287 } 1288 1289 // load more characters, if needed 1290 1291 if (fCurrentEntity.position == fCurrentEntity.count) { 1292 load(0, true, false); 1293 } 1294 1295 boolean bNextEntity = false; 1296 1297 while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) 1298 && (!bNextEntity)) 1299 { 1300 System.arraycopy(fCurrentEntity.ch, 1301 fCurrentEntity.position, 1302 fCurrentEntity.ch, 1303 0, 1304 fCurrentEntity.count - fCurrentEntity.position); 1305 1306 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false); 1307 fCurrentEntity.position = 0; 1308 fCurrentEntity.startPosition = 0; 1309 } 1310 1311 if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { 1312 // something must be wrong with the input: e.g., file ends in an unterminated comment 1313 int length = fCurrentEntity.count - fCurrentEntity.position; 1314 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 1315 fCurrentEntity.columnNumber += fCurrentEntity.count; 1316 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); 1317 fCurrentEntity.position = fCurrentEntity.count; 1318 fCurrentEntity.startPosition = fCurrentEntity.count; 1319 load(0, true, false); 1320 return false; 1321 } 1322 1323 // normalize newlines 1324 int offset = fCurrentEntity.position; 1325 int c = fCurrentEntity.ch[offset]; 1326 int newlines = 0; 1327 if (c == '\n' || (c == '\r' && isExternal)) { 1328 if (DEBUG_BUFFER) { 1329 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1330 print(); 1331 System.out.println(); 1332 } 1333 do { 1334 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1335 if (c == '\r' && isExternal) { 1336 newlines++; 1337 fCurrentEntity.lineNumber++; 1338 fCurrentEntity.columnNumber = 1; 1339 if (fCurrentEntity.position == fCurrentEntity.count) { 1340 offset = 0; 1341 fCurrentEntity.position = newlines; 1342 if (load(newlines, false, true)) { 1343 break; 1344 } 1345 } 1346 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1347 fCurrentEntity.position++; 1348 offset++; 1349 } 1350 /*** NEWLINE NORMALIZATION ***/ 1351 else { 1352 newlines++; 1353 } 1354 } else if (c == '\n') { 1355 newlines++; 1356 fCurrentEntity.lineNumber++; 1357 fCurrentEntity.columnNumber = 1; 1358 if (fCurrentEntity.position == fCurrentEntity.count) { 1359 offset = 0; 1360 fCurrentEntity.position = newlines; 1361 fCurrentEntity.count = newlines; 1362 if (load(newlines, false, true)) { 1363 break; 1364 } 1365 } 1366 } else { 1367 fCurrentEntity.position--; 1368 break; 1369 } 1370 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1371 for (int i = offset; i < fCurrentEntity.position; i++) { 1372 fCurrentEntity.ch[i] = '\n'; 1373 } 1374 int length = fCurrentEntity.position - offset; 1375 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1376 buffer.append(fCurrentEntity.ch, offset, length); 1377 if (DEBUG_BUFFER) { 1378 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1379 print(); 1380 System.out.println(); 1381 } 1382 return true; 1383 } 1384 if (DEBUG_BUFFER) { 1385 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1386 print(); 1387 System.out.println(); 1388 } 1389 } 1390 1391 // iterate over buffer looking for delimiter 1392 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { 1393 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1394 if (c == charAt0) { 1395 // looks like we just hit the delimiter 1396 int delimOffset = fCurrentEntity.position - 1; 1397 for (int i = 1; i < delimLen; i++) { 1398 if (fCurrentEntity.position == fCurrentEntity.count) { 1399 fCurrentEntity.position -= i; 1400 break OUTER; 1401 } 1402 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1403 if (delimiter.charAt(i) != c) { 1404 fCurrentEntity.position -= i; 1405 break; 1406 } 1407 } 1408 if (fCurrentEntity.position == delimOffset + delimLen) { 1409 done = true; 1410 break; 1411 } 1412 } else if (c == '\n' || (isExternal && c == '\r')) { 1413 fCurrentEntity.position--; 1414 break; 1415 } else if (XMLChar.isInvalid(c)) { 1416 fCurrentEntity.position--; 1417 int length = fCurrentEntity.position - offset; 1418 fCurrentEntity.columnNumber += length - newlines; 1419 buffer.append(fCurrentEntity.ch, offset, length); 1420 return true; 1421 } 1422 } 1423 int length = fCurrentEntity.position - offset; 1424 fCurrentEntity.columnNumber += length - newlines; 1425 if (done) { 1426 length -= delimLen; 1427 } 1428 buffer.append(fCurrentEntity.ch, offset, length); 1429 1430 // return true if string was skipped 1431 if (DEBUG_BUFFER) { 1432 System.out.print(")scanData: "); 1433 print(); 1434 System.out.println(" -> " + done); 1435 } 1436 } while (!done); 1437 return !done; 1438 1439 } // scanData(String,XMLString) 1440 1441 /** 1442 * Skips a character appearing immediately on the input. 1443 * <p> 1444 * <strong>Note:</strong> The character is consumed only if it matches 1445 * the specified character. 1446 * 1447 * @param c The character to skip. 1448 * 1449 * @return Returns true if the character was skipped. 1450 * 1451 * @throws IOException Thrown if i/o error occurs. 1452 * @throws EOFException Thrown on end of file. 1453 */ 1454 public boolean skipChar(int c) throws IOException { 1455 if (DEBUG_BUFFER) { 1456 System.out.print("(skipChar, '"+(char)c+"': "); 1457 print(); 1458 System.out.println(); 1459 } 1460 1461 // load more characters, if needed 1462 if (fCurrentEntity.position == fCurrentEntity.count) { 1463 load(0, true, true); 1464 } 1465 1466 // skip character 1467 int cc = fCurrentEntity.ch[fCurrentEntity.position]; 1468 if (cc == c) { 1469 fCurrentEntity.position++; 1470 if (c == '\n') { 1471 fCurrentEntity.lineNumber++; 1472 fCurrentEntity.columnNumber = 1; 1473 } else { 1474 fCurrentEntity.columnNumber++; 1475 } 1476 if (DEBUG_BUFFER) { 1477 System.out.print(")skipChar, '"+(char)c+"': "); 1478 print(); 1479 System.out.println(" -> true"); 1480 } 1481 return true; 1482 } else if (c == '\n' && cc == '\r' && isExternal) { 1483 // handle newlines 1484 if (fCurrentEntity.position == fCurrentEntity.count) { 1485 invokeListeners(1); 1486 fCurrentEntity.ch[0] = (char)cc; 1487 load(1, false, false); 1488 } 1489 fCurrentEntity.position++; 1490 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1491 fCurrentEntity.position++; 1492 } 1493 fCurrentEntity.lineNumber++; 1494 fCurrentEntity.columnNumber = 1; 1495 if (DEBUG_BUFFER) { 1496 System.out.print(")skipChar, '"+(char)c+"': "); 1497 print(); 1498 System.out.println(" -> true"); 1499 } 1500 return true; 1501 } 1502 1503 // character was not skipped 1504 if (DEBUG_BUFFER) { 1505 System.out.print(")skipChar, '"+(char)c+"': "); 1506 print(); 1507 System.out.println(" -> false"); 1508 } 1509 return false; 1510 1511 } // skipChar(int):boolean 1512 1513 public boolean isSpace(char ch){ 1514 return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); 1515 } 1516 /** 1517 * Skips space characters appearing immediately on the input. 1518 * <p> 1519 * <strong>Note:</strong> The characters are consumed only if they are 1520 * space characters. 1521 * 1522 * @return Returns true if at least one space character was skipped. 1523 * 1524 * @throws IOException Thrown if i/o error occurs. 1525 * @throws EOFException Thrown on end of file. 1526 * 1527 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 1528 */ 1529 public boolean skipSpaces() throws IOException { 1530 if (DEBUG_BUFFER) { 1531 System.out.print("(skipSpaces: "); 1532 print(); 1533 System.out.println(); 1534 } 1535 //boolean entityChanged = false; 1536 // load more characters, if needed 1537 if (fCurrentEntity.position == fCurrentEntity.count) { 1538 load(0, true, true); 1539 } 1540 1541 //we are doing this check only in skipSpace() because it is called by 1542 //fMiscDispatcher and we want the parser to exit gracefully when document 1543 //is well-formed. 1544 //it is possible that end of document is reached and 1545 //fCurrentEntity becomes null 1546 //nothing was read so entity changed 'false' should be returned. 1547 if(fCurrentEntity == null){ 1548 return false ; 1549 } 1550 1551 // skip spaces 1552 int c = fCurrentEntity.ch[fCurrentEntity.position]; 1553 if (XMLChar.isSpace(c)) { 1554 do { 1555 boolean entityChanged = false; 1556 // handle newlines 1557 if (c == '\n' || (isExternal && c == '\r')) { 1558 fCurrentEntity.lineNumber++; 1559 fCurrentEntity.columnNumber = 1; 1560 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1561 invokeListeners(0); 1562 fCurrentEntity.ch[0] = (char)c; 1563 entityChanged = load(1, true, false); 1564 if (!entityChanged){ 1565 // the load change the position to be 1, 1566 // need to restore it when entity not changed 1567 fCurrentEntity.position = 0; 1568 }else if(fCurrentEntity == null){ 1569 return true ; 1570 } 1571 } 1572 if (c == '\r' && isExternal) { 1573 // REVISIT: Does this need to be updated to fix the 1574 // #x0D ^#x0A newline normalization problem? -Ac 1575 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 1576 fCurrentEntity.position--; 1577 } 1578 } 1579 } else { 1580 fCurrentEntity.columnNumber++; 1581 } 1582 // load more characters, if needed 1583 if (!entityChanged){ 1584 fCurrentEntity.position++; 1585 } 1586 1587 if (fCurrentEntity.position == fCurrentEntity.count) { 1588 load(0, true, true); 1589 1590 //we are doing this check only in skipSpace() because it is called by 1591 //fMiscDispatcher and we want the parser to exit gracefully when document 1592 //is well-formed. 1593 1594 //it is possible that end of document is reached and 1595 //fCurrentEntity becomes null 1596 //nothing was read so entity changed 'false' should be returned. 1597 if(fCurrentEntity == null){ 1598 return true ; 1599 } 1600 1601 } 1602 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 1603 if (DEBUG_BUFFER) { 1604 System.out.print(")skipSpaces: "); 1605 print(); 1606 System.out.println(" -> true"); 1607 } 1608 return true; 1609 } 1610 1611 // no spaces were found 1612 if (DEBUG_BUFFER) { 1613 System.out.print(")skipSpaces: "); 1614 print(); 1615 System.out.println(" -> false"); 1616 } 1617 return false; 1618 1619 } // skipSpaces():boolean 1620 1621 1622 /** 1623 * @param legnth This function checks that following number of characters are available. 1624 * to the underlying buffer. 1625 * @return This function returns true if capacity asked is available. 1626 */ 1627 public boolean arrangeCapacity(int length) throws IOException{ 1628 return arrangeCapacity(length, false); 1629 } 1630 1631 /** 1632 * @param legnth This function checks that following number of characters are available. 1633 * to the underlying buffer. 1634 * @param if the underlying function should change the entity 1635 * @return This function returns true if capacity asked is available. 1636 * 1637 */ 1638 public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ 1639 //check if the capacity is availble in the current buffer 1640 //count is no. of characters in the buffer [x][m][l] 1641 //position is '0' based 1642 //System.out.println("fCurrent Entity " + fCurrentEntity); 1643 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1644 return true; 1645 } 1646 if(DEBUG_SKIP_STRING){ 1647 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1648 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1649 System.out.println("length = " + length); 1650 } 1651 boolean entityChanged = false; 1652 //load more characters -- this function shouldn't change the entity 1653 while((fCurrentEntity.count - fCurrentEntity.position) < length){ 1654 if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ 1655 invokeListeners(0); 1656 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); 1657 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; 1658 fCurrentEntity.position = 0; 1659 } 1660 1661 if((fCurrentEntity.count - fCurrentEntity.position) < length){ 1662 int pos = fCurrentEntity.position; 1663 invokeListeners(pos); 1664 entityChanged = load(fCurrentEntity.count, changeEntity, false); 1665 fCurrentEntity.position = pos; 1666 if(entityChanged)break; 1667 } 1668 if(DEBUG_SKIP_STRING){ 1669 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1670 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1671 System.out.println("length = " + length); 1672 } 1673 } 1674 //load changes the position.. set it back to the point where we started. 1675 1676 //after loading check again. 1677 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1678 return true; 1679 } else { 1680 return false; 1681 } 1682 } 1683 1684 /** 1685 * Skips the specified string appearing immediately on the input. 1686 * <p> 1687 * <strong>Note:</strong> The characters are consumed only if all 1688 * the characters are skipped. 1689 * 1690 * @param s The string to skip. 1691 * 1692 * @return Returns true if the string was skipped. 1693 * 1694 * @throws IOException Thrown if i/o error occurs. 1695 * @throws EOFException Thrown on end of file. 1696 */ 1697 public boolean skipString(String s) throws IOException { 1698 1699 final int length = s.length(); 1700 1701 //first make sure that required capacity is avaible 1702 if(arrangeCapacity(length, false)){ 1703 final int beforeSkip = fCurrentEntity.position ; 1704 int afterSkip = fCurrentEntity.position + length - 1 ; 1705 if(DEBUG_SKIP_STRING){ 1706 System.out.println("skipString,length = " + s + "," + length); 1707 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); 1708 } 1709 1710 //s.charAt() indexes are 0 to 'Length -1' based. 1711 int i = length - 1 ; 1712 //check from reverse 1713 while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ 1714 if(afterSkip-- == beforeSkip){ 1715 fCurrentEntity.position = fCurrentEntity.position + length ; 1716 fCurrentEntity.columnNumber += length; 1717 return true; 1718 } 1719 } 1720 } 1721 1722 return false; 1723 } // skipString(String):boolean 1724 1725 public boolean skipString(char [] s) throws IOException { 1726 1727 final int length = s.length; 1728 //first make sure that required capacity is avaible 1729 if(arrangeCapacity(length, false)){ 1730 int beforeSkip = fCurrentEntity.position ; 1731 int afterSkip = fCurrentEntity.position + length ; 1732 1733 if(DEBUG_SKIP_STRING){ 1734 System.out.println("skipString,length = " + new String(s) + "," + length); 1735 System.out.println("skipString,length = " + new String(s) + "," + length); 1736 } 1737 1738 for(int i=0;i<length;i++){ 1739 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ 1740 return false; 1741 } 1742 } 1743 fCurrentEntity.position = fCurrentEntity.position + length ; 1744 fCurrentEntity.columnNumber += length; 1745 return true; 1746 1747 } 1748 1749 return false; 1750 } 1751 1752 // 1753 // Locator methods 1754 // 1755 // 1756 // Private methods 1757 // 1758 1759 /** 1760 * Loads a chunk of text. 1761 * 1762 * @param offset The offset into the character buffer to 1763 * read the next batch of characters. 1764 * @param changeEntity True if the load should change entities 1765 * at the end of the entity, otherwise leave 1766 * the current entity in place and the entity 1767 * boundary will be signaled by the return 1768 * value. 1769 * @param notify Determine whether to notify listeners of 1770 * the event 1771 * 1772 * @returns Returns true if the entity changed as a result of this 1773 * load operation. 1774 */ 1775 final boolean load(int offset, boolean changeEntity, boolean notify) 1776 throws IOException { 1777 if (DEBUG_BUFFER) { 1778 System.out.print("(load, "+offset+": "); 1779 print(); 1780 System.out.println(); 1781 } 1782 if (notify) { 1783 invokeListeners(offset); 1784 } 1785 //maintaing the count till last load 1786 fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; 1787 // read characters 1788 int length = fCurrentEntity.ch.length - offset; 1789 if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { 1790 length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; 1791 } 1792 if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); 1793 int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); 1794 if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); 1795 1796 // reset count and position 1797 boolean entityChanged = false; 1798 if (count != -1) { 1799 if (count != 0) { 1800 // record the last count 1801 fCurrentEntity.fLastCount = count; 1802 fCurrentEntity.count = count + offset; 1803 fCurrentEntity.position = offset; 1804 } 1805 } 1806 // end of this entity 1807 else { 1808 fCurrentEntity.count = offset; 1809 fCurrentEntity.position = offset; 1810 entityChanged = true; 1811 1812 if (changeEntity) { 1813 //notify the entity manager about the end of entity 1814 fEntityManager.endEntity(); 1815 //return if the current entity becomes null 1816 if(fCurrentEntity == null){ 1817 throw END_OF_DOCUMENT_ENTITY; 1818 } 1819 // handle the trailing edges 1820 if (fCurrentEntity.position == fCurrentEntity.count) { 1821 load(0, true, false); 1822 } 1823 } 1824 1825 } 1826 if (DEBUG_BUFFER) { 1827 System.out.print(")load, "+offset+": "); 1828 print(); 1829 System.out.println(); 1830 } 1831 1832 return entityChanged; 1833 1834 } // load(int, boolean):boolean 1835 1836 /** 1837 * Creates a reader capable of reading the given input stream in 1838 * the specified encoding. 1839 * 1840 * @param inputStream The input stream. 1841 * @param encoding The encoding name that the input stream is 1842 * encoded using. If the user has specified that 1843 * Java encoding names are allowed, then the 1844 * encoding name may be a Java encoding name; 1845 * otherwise, it is an ianaEncoding name. 1846 * @param isBigEndian For encodings (like uCS-4), whose names cannot 1847 * specify a byte order, this tells whether the order is bigEndian. null menas 1848 * unknown or not relevant. 1849 * 1850 * @return Returns a reader. 1851 */ 1852 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 1853 throws IOException { 1854 1855 // normalize encoding name 1856 if (encoding == null) { 1857 encoding = "UTF-8"; 1858 } 1859 1860 // try to use an optimized reader 1861 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 1862 if (ENCODING.equals("UTF-8")) { 1863 if (DEBUG_ENCODINGS) { 1864 System.out.println("$$$ creating UTF8Reader"); 1865 } 1866 return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 1867 } 1868 if (ENCODING.equals("US-ASCII")) { 1869 if (DEBUG_ENCODINGS) { 1870 System.out.println("$$$ creating ASCIIReader"); 1871 } 1872 return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1873 } 1874 if(ENCODING.equals("ISO-10646-UCS-4")) { 1875 if(isBigEndian != null) { 1876 boolean isBE = isBigEndian.booleanValue(); 1877 if(isBE) { 1878 return new UCSReader(inputStream, UCSReader.UCS4BE); 1879 } else { 1880 return new UCSReader(inputStream, UCSReader.UCS4LE); 1881 } 1882 } else { 1883 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1884 "EncodingByteOrderUnsupported", 1885 new Object[] { encoding }, 1886 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1887 } 1888 } 1889 if(ENCODING.equals("ISO-10646-UCS-2")) { 1890 if(isBigEndian != null) { // sould never happen with this encoding... 1891 boolean isBE = isBigEndian.booleanValue(); 1892 if(isBE) { 1893 return new UCSReader(inputStream, UCSReader.UCS2BE); 1894 } else { 1895 return new UCSReader(inputStream, UCSReader.UCS2LE); 1896 } 1897 } else { 1898 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1899 "EncodingByteOrderUnsupported", 1900 new Object[] { encoding }, 1901 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1902 } 1903 } 1904 1905 // check for valid name 1906 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 1907 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 1908 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 1909 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1910 "EncodingDeclInvalid", 1911 new Object[] { encoding }, 1912 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1913 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 1914 // because every byte is a valid ISO Latin 1 character. 1915 // It may not translate correctly but if we failed on 1916 // the encoding anyway, then we're expecting the content 1917 // of the document to be bad. This will just prevent an 1918 // invalid UTF-8 sequence to be detected. This is only 1919 // important when continue-after-fatal-error is turned 1920 // on. -Ac 1921 encoding = "ISO-8859-1"; 1922 } 1923 1924 // try to use a Java reader 1925 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 1926 if (javaEncoding == null) { 1927 if(fAllowJavaEncodings) { 1928 javaEncoding = encoding; 1929 } else { 1930 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1931 "EncodingDeclInvalid", 1932 new Object[] { encoding }, 1933 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1934 // see comment above. 1935 javaEncoding = "ISO8859_1"; 1936 } 1937 } 1938 else if (javaEncoding.equals("ASCII")) { 1939 if (DEBUG_ENCODINGS) { 1940 System.out.println("$$$ creating ASCIIReader"); 1941 } 1942 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1943 } 1944 1945 if (DEBUG_ENCODINGS) { 1946 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 1947 if (javaEncoding == encoding) { 1948 System.out.print(" (IANA encoding)"); 1949 } 1950 System.out.println(); 1951 } 1952 return new InputStreamReader(inputStream, javaEncoding); 1953 1954 } // createReader(InputStream,String, Boolean): Reader 1955 1956 /** 1957 * Returns the IANA encoding name that is auto-detected from 1958 * the bytes specified, with the endian-ness of that encoding where appropriate. 1959 * 1960 * @param b4 The first four bytes of the input. 1961 * @param count The number of bytes actually read. 1962 * @return a 2-element array: the first element, an IANA-encoding string, 1963 * the second element a Boolean which is true iff the document is big endian, false 1964 * if it's little-endian, and null if the distinction isn't relevant. 1965 */ 1966 protected Object[] getEncodingName(byte[] b4, int count) { 1967 1968 if (count < 2) { 1969 return new Object[]{"UTF-8", null}; 1970 } 1971 1972 // UTF-16, with BOM 1973 int b0 = b4[0] & 0xFF; 1974 int b1 = b4[1] & 0xFF; 1975 if (b0 == 0xFE && b1 == 0xFF) { 1976 // UTF-16, big-endian 1977 return new Object [] {"UTF-16BE", new Boolean(true)}; 1978 } 1979 if (b0 == 0xFF && b1 == 0xFE) { 1980 // UTF-16, little-endian 1981 return new Object [] {"UTF-16LE", new Boolean(false)}; 1982 } 1983 1984 // default to UTF-8 if we don't have enough bytes to make a 1985 // good determination of the encoding 1986 if (count < 3) { 1987 return new Object [] {"UTF-8", null}; 1988 } 1989 1990 // UTF-8 with a BOM 1991 int b2 = b4[2] & 0xFF; 1992 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 1993 return new Object [] {"UTF-8", null}; 1994 } 1995 1996 // default to UTF-8 if we don't have enough bytes to make a 1997 // good determination of the encoding 1998 if (count < 4) { 1999 return new Object [] {"UTF-8", null}; 2000 } 2001 2002 // other encodings 2003 int b3 = b4[3] & 0xFF; 2004 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2005 // UCS-4, big endian (1234) 2006 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2007 } 2008 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2009 // UCS-4, little endian (4321) 2010 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2011 } 2012 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2013 // UCS-4, unusual octet order (2143) 2014 // REVISIT: What should this be? 2015 return new Object [] {"ISO-10646-UCS-4", null}; 2016 } 2017 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2018 // UCS-4, unusual octect order (3412) 2019 // REVISIT: What should this be? 2020 return new Object [] {"ISO-10646-UCS-4", null}; 2021 } 2022 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2023 // UTF-16, big-endian, no BOM 2024 // (or could turn out to be UCS-2... 2025 // REVISIT: What should this be? 2026 return new Object [] {"UTF-16BE", new Boolean(true)}; 2027 } 2028 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2029 // UTF-16, little-endian, no BOM 2030 // (or could turn out to be UCS-2... 2031 return new Object [] {"UTF-16LE", new Boolean(false)}; 2032 } 2033 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2034 // EBCDIC 2035 // a la xerces1, return CP037 instead of EBCDIC here 2036 return new Object [] {"CP037", null}; 2037 } 2038 2039 // default encoding 2040 return new Object [] {"UTF-8", null}; 2041 2042 } // getEncodingName(byte[],int):Object[] 2043 2044 /** 2045 * xxx not removing endEntity() so that i remember that we need to implement it. 2046 * Ends an entity. 2047 * 2048 * @throws XNIException Thrown by entity handler to signal an error. 2049 */ 2050 // 2051 /** Prints the contents of the buffer. */ 2052 final void print() { 2053 if (DEBUG_BUFFER) { 2054 if (fCurrentEntity != null) { 2055 System.out.print('['); 2056 System.out.print(fCurrentEntity.count); 2057 System.out.print(' '); 2058 System.out.print(fCurrentEntity.position); 2059 if (fCurrentEntity.count > 0) { 2060 System.out.print(" \""); 2061 for (int i = 0; i < fCurrentEntity.count; i++) { 2062 if (i == fCurrentEntity.position) { 2063 System.out.print('^'); 2064 } 2065 char c = fCurrentEntity.ch[i]; 2066 switch (c) { 2067 case '\n': { 2068 System.out.print("\\n"); 2069 break; 2070 } 2071 case '\r': { 2072 System.out.print("\\r"); 2073 break; 2074 } 2075 case '\t': { 2076 System.out.print("\\t"); 2077 break; 2078 } 2079 case '\\': { 2080 System.out.print("\\\\"); 2081 break; 2082 } 2083 default: { 2084 System.out.print(c); 2085 } 2086 } 2087 } 2088 if (fCurrentEntity.position == fCurrentEntity.count) { 2089 System.out.print('^'); 2090 } 2091 System.out.print('"'); 2092 } 2093 System.out.print(']'); 2094 System.out.print(" @ "); 2095 System.out.print(fCurrentEntity.lineNumber); 2096 System.out.print(','); 2097 System.out.print(fCurrentEntity.columnNumber); 2098 } else { 2099 System.out.print("*NO CURRENT ENTITY*"); 2100 } 2101 } 2102 } 2103 2104 /** 2105 * Registers the listener object and provides callback. 2106 * @param listener listener to which call back should be provided when scanner buffer 2107 * is being changed. 2108 */ 2109 public void registerListener(XMLBufferListener listener) { 2110 if(!listeners.contains(listener)) 2111 listeners.add(listener); 2112 } 2113 2114 /** 2115 * 2116 * @param loadPos Starting position from which new data is being loaded into scanner buffer. 2117 */ 2118 public void invokeListeners(int loadPos){ 2119 for(int i=0;i<listeners.size();i++){ 2120 XMLBufferListener listener =(XMLBufferListener) listeners.get(i); 2121 listener.refresh(loadPos); 2122 } 2123 } 2124 2125 /** 2126 * Skips space characters appearing immediately on the input that would 2127 * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 2128 * normalization is performed. This is useful when scanning structures 2129 * such as the XMLDecl and TextDecl that can only contain US-ASCII 2130 * characters. 2131 * <p> 2132 * <strong>Note:</strong> The characters are consumed only if they would 2133 * match non-terminal S before end of line normalization is performed. 2134 * 2135 * @return Returns true if at least one space character was skipped. 2136 * 2137 * @throws IOException Thrown if i/o error occurs. 2138 * @throws EOFException Thrown on end of file. 2139 * 2140 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 2141 */ 2142 public final boolean skipDeclSpaces() throws IOException { 2143 if (DEBUG_BUFFER) { 2144 System.out.print("(skipDeclSpaces: "); 2145 //XMLEntityManager.print(fCurrentEntity); 2146 System.out.println(); 2147 } 2148 2149 // load more characters, if needed 2150 if (fCurrentEntity.position == fCurrentEntity.count) { 2151 load(0, true, false); 2152 } 2153 2154 // skip spaces 2155 int c = fCurrentEntity.ch[fCurrentEntity.position]; 2156 if (XMLChar.isSpace(c)) { 2157 boolean external = fCurrentEntity.isExternal(); 2158 do { 2159 boolean entityChanged = false; 2160 // handle newlines 2161 if (c == '\n' || (external && c == '\r')) { 2162 fCurrentEntity.lineNumber++; 2163 fCurrentEntity.columnNumber = 1; 2164 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 2165 fCurrentEntity.ch[0] = (char)c; 2166 entityChanged = load(1, true, false); 2167 if (!entityChanged) 2168 // the load change the position to be 1, 2169 // need to restore it when entity not changed 2170 fCurrentEntity.position = 0; 2171 } 2172 if (c == '\r' && external) { 2173 // REVISIT: Does this need to be updated to fix the 2174 // #x0D ^#x0A newline normalization problem? -Ac 2175 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 2176 fCurrentEntity.position--; 2177 } 2178 } 2179 /*** NEWLINE NORMALIZATION *** 2180 * else { 2181 * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' 2182 * && external) { 2183 * fCurrentEntity.position++; 2184 * } 2185 * } 2186 * /***/ 2187 } else { 2188 fCurrentEntity.columnNumber++; 2189 } 2190 // load more characters, if needed 2191 if (!entityChanged) 2192 fCurrentEntity.position++; 2193 if (fCurrentEntity.position == fCurrentEntity.count) { 2194 load(0, true, false); 2195 } 2196 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 2197 if (DEBUG_BUFFER) { 2198 System.out.print(")skipDeclSpaces: "); 2199 // XMLEntityManager.print(fCurrentEntity); 2200 System.out.println(" -> true"); 2201 } 2202 return true; 2203 } 2204 2205 // no spaces were found 2206 if (DEBUG_BUFFER) { 2207 System.out.print(")skipDeclSpaces: "); 2208 //XMLEntityManager.print(fCurrentEntity); 2209 System.out.println(" -> false"); 2210 } 2211 return false; 2212 2213 } // skipDeclSpaces():boolean 2214 2215 2216 } // class XMLEntityScanner