1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 import java.io.EOFException; 24 import java.io.IOException; 25 import java.util.Locale; 26 import java.util.Vector; 27 28 import com.sun.xml.internal.stream.Entity; 29 import com.sun.xml.internal.stream.XMLBufferListener; 30 import java.io.InputStream; 31 import java.io.InputStreamReader; 32 import java.io.Reader; 33 34 35 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 36 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 37 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 38 39 40 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 41 import com.sun.org.apache.xerces.internal.util.EncodingMap; 42 43 import com.sun.org.apache.xerces.internal.util.SymbolTable; 44 import com.sun.org.apache.xerces.internal.util.XMLChar; 45 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 46 import com.sun.org.apache.xerces.internal.xni.QName; 47 import com.sun.org.apache.xerces.internal.xni.XMLString; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 50 import com.sun.org.apache.xerces.internal.xni.*; 51 52 /** 53 * Implements the entity scanner methods. 54 * 55 * @author Neeraj Bajaj, Sun Microsystems 56 * @author Andy Clark, IBM 57 * @author Arnaud Le Hors, IBM 58 * @author K.Venugopal Sun Microsystems 59 * 60 */ 61 public class XMLEntityScanner implements XMLLocator { 62 63 64 protected Entity.ScannedEntity fCurrentEntity = null ; 65 protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; 66 67 protected XMLEntityManager fEntityManager ; 68 69 /** Debug switching readers for encodings. */ 70 private static final boolean DEBUG_ENCODINGS = false; 71 /** Listeners which should know when load is being called */ 72 private Vector listeners = new Vector(); 73 74 private static final boolean [] VALID_NAMES = new boolean[127]; 75 76 /** 77 * Debug printing of buffer. This debugging flag works best when you 78 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 79 * 64 characters. 80 */ 81 private static final boolean DEBUG_BUFFER = false; 82 private static final boolean DEBUG_SKIP_STRING = false; 83 /** 84 * To signal the end of the document entity, this exception will be thrown. 85 */ 86 private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { 87 private static final long serialVersionUID = 980337771224675268L; 88 public Throwable fillInStackTrace() { 89 return this; 90 } 91 }; 92 93 protected SymbolTable fSymbolTable = null; 94 protected XMLErrorReporter fErrorReporter = null; 95 int [] whiteSpaceLookup = new int[100]; 96 int whiteSpaceLen = 0; 97 boolean whiteSpaceInfoNeeded = true; 98 99 /** 100 * Allow Java encoding names. This feature identifier is: 101 * http://apache.org/xml/features/allow-java-encodings 102 */ 103 protected boolean fAllowJavaEncodings; 104 105 //Will be used only during internal subsets. 106 //for appending data. 107 108 /** Property identifier: symbol table. */ 109 protected static final String SYMBOL_TABLE = 110 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 111 112 /** Property identifier: error reporter. */ 113 protected static final String ERROR_REPORTER = 114 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 115 116 /** Feature identifier: allow Java encodings. */ 117 protected static final String ALLOW_JAVA_ENCODINGS = 118 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 119 120 protected PropertyManager fPropertyManager = null ; 121 122 boolean isExternal = false; 123 static { 124 125 for(int i=0x0041;i<=0x005A ; i++){ 126 VALID_NAMES[i]=true; 127 } 128 for(int i=0x0061;i<=0x007A; i++){ 129 VALID_NAMES[i]=true; 130 } 131 for(int i=0x0030;i<=0x0039; i++){ 132 VALID_NAMES[i]=true; 133 } 134 VALID_NAMES[45]=true; 135 VALID_NAMES[46]=true; 136 VALID_NAMES[58]=true; 137 VALID_NAMES[95]=true; 138 } 139 // SAPJVM: Remember, that the XML version has explicitly been set, 140 // so that XMLStreamReader.getVersion() can find that out. 141 boolean xmlVersionSetExplicitly = false; 142 // 143 // Constructors 144 // 145 146 /** Default constructor. */ 147 public XMLEntityScanner() { 148 } // <init>() 149 150 151 /** private constructor, this class can only be instantiated within this class. Instance of this class should 152 * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) 153 * @see getEntityScanner() 154 * @see getEntityScanner(ScannedEntity) 155 */ 156 public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { 157 fEntityManager = entityManager ; 158 reset(propertyManager); 159 } // <init>() 160 161 162 // set buffer size: 163 public final void setBufferSize(int size) { 164 // REVISIT: Buffer size passed to entity scanner 165 // was not being kept in synch with the actual size 166 // of the buffers in each scanned entity. If any 167 // of the buffers were actually resized, it was possible 168 // that the parser would throw an ArrayIndexOutOfBoundsException 169 // for documents which contained names which are longer than 170 // the current buffer size. Conceivably the buffer size passed 171 // to entity scanner could be used to determine a minimum size 172 // for resizing, if doubling its size is smaller than this 173 // minimum. -- mrglavas 174 fBufferSize = size; 175 } 176 177 /** 178 * Resets the components. 179 */ 180 public void reset(PropertyManager propertyManager){ 181 fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; 182 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; 183 fCurrentEntity = null; 184 whiteSpaceLen = 0; 185 whiteSpaceInfoNeeded = true; 186 listeners.clear(); 187 } 188 189 /** 190 * Resets the component. The component can query the component manager 191 * about any features and properties that affect the operation of the 192 * component. 193 * 194 * @param componentManager The component manager. 195 * 196 * @throws SAXException Thrown by component on initialization error. 197 * For example, if a feature or property is 198 * required for the operation of the component, the 199 * component manager may throw a 200 * SAXNotRecognizedException or a 201 * SAXNotSupportedException. 202 */ 203 public void reset(XMLComponentManager componentManager) 204 throws XMLConfigurationException { 205 206 //System.out.println(" this is being called"); 207 // xerces features 208 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 209 210 //xerces properties 211 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 212 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 213 fCurrentEntity = null; 214 whiteSpaceLen = 0; 215 whiteSpaceInfoNeeded = true; 216 listeners.clear(); 217 } // reset(XMLComponentManager) 218 219 220 public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, 221 XMLErrorReporter reporter) { 222 fCurrentEntity = null; 223 fSymbolTable = symbolTable; 224 fEntityManager = entityManager; 225 fErrorReporter = reporter; 226 } 227 228 /** 229 * Returns the XML version of the current entity. This will normally be the 230 * value from the XML or text declaration or defaulted by the parser. Note that 231 * that this value may be different than the version of the processing rules 232 * applied to the current entity. For instance, an XML 1.1 document may refer to 233 * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire 234 * document. Also note that, for a given entity, this value can only be considered 235 * final once the XML or text declaration has been read or once it has been 236 * determined that there is no such declaration. 237 */ 238 public final String getXMLVersion() { 239 if (fCurrentEntity != null) { 240 return fCurrentEntity.xmlVersion; 241 } 242 return null; 243 } // getXMLVersion():String 244 245 /** 246 * Sets the XML version. This method is used by the 247 * scanners to report the value of the version pseudo-attribute 248 * in an XML or text declaration. 249 * 250 * @param xmlVersion the XML version of the current entity 251 */ 252 public final void setXMLVersion(String xmlVersion) { 253 xmlVersionSetExplicitly = true; // SAPJVM 254 fCurrentEntity.xmlVersion = xmlVersion; 255 } // setXMLVersion(String) 256 257 258 /** set the instance of current scanned entity. 259 * @param ScannedEntity 260 */ 261 262 public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ 263 fCurrentEntity = scannedEntity ; 264 if(fCurrentEntity != null){ 265 isExternal = fCurrentEntity.isExternal(); 266 if(DEBUG_BUFFER) 267 System.out.println("Current Entity is "+scannedEntity.name); 268 } 269 } 270 271 public Entity.ScannedEntity getCurrentEntity(){ 272 return fCurrentEntity ; 273 } 274 // 275 // XMLEntityReader methods 276 // 277 278 /** 279 * Returns the base system identifier of the currently scanned 280 * entity, or null if none is available. 281 */ 282 public final String getBaseSystemId() { 283 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 284 } // getBaseSystemId():String 285 286 /** 287 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) 288 */ 289 public void setBaseSystemId(String systemId) { 290 //no-op 291 } 292 293 ///////////// Locator methods start. 294 public final int getLineNumber(){ 295 //if the entity is closed, we should return -1 296 //xxx at first place why such call should be there... 297 return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; 298 } 299 300 /** 301 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) 302 */ 303 public void setLineNumber(int line) { 304 //no-op 305 } 306 307 308 public final int getColumnNumber(){ 309 //if the entity is closed, we should return -1 310 //xxx at first place why such call should be there... 311 return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; 312 } 313 314 /** 315 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) 316 */ 317 public void setColumnNumber(int col) { 318 // no-op 319 } 320 321 322 public final int getCharacterOffset(){ 323 return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; 324 } 325 326 /** Returns the expanded system identifier. */ 327 public final String getExpandedSystemId() { 328 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 329 } 330 331 /** 332 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) 333 */ 334 public void setExpandedSystemId(String systemId) { 335 //no-op 336 } 337 338 /** Returns the literal system identifier. */ 339 public final String getLiteralSystemId() { 340 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; 341 } 342 343 /** 344 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) 345 */ 346 public void setLiteralSystemId(String systemId) { 347 //no-op 348 } 349 350 /** Returns the public identifier. */ 351 public final String getPublicId() { 352 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 353 } 354 355 /** 356 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) 357 */ 358 public void setPublicId(String publicId) { 359 //no-op 360 } 361 362 ///////////////// Locator methods finished. 363 364 /** the version of the current entity being scanned */ 365 public void setVersion(String version){ 366 fCurrentEntity.version = version; 367 } 368 369 public String getVersion(){ 370 if (fCurrentEntity != null) 371 return fCurrentEntity.version ; 372 return null; 373 } 374 375 /** 376 * Returns the encoding of the current entity. 377 * Note that, for a given entity, this value can only be 378 * considered final once the encoding declaration has been read (or once it 379 * has been determined that there is no such declaration) since, no encoding 380 * having been specified on the XMLInputSource, the parser 381 * will make an initial "guess" which could be in error. 382 */ 383 public final String getEncoding() { 384 if (fCurrentEntity != null) { 385 return fCurrentEntity.encoding; 386 } 387 return null; 388 } // getEncoding():String 389 390 /** 391 * Sets the encoding of the scanner. This method is used by the 392 * scanners if the XMLDecl or TextDecl line contains an encoding 393 * pseudo-attribute. 394 * <p> 395 * <strong>Note:</strong> The underlying character reader on the 396 * current entity will be changed to accomodate the new encoding. 397 * However, the new encoding is ignored if the current reader was 398 * not constructed from an input stream (e.g. an external entity 399 * that is resolved directly to the appropriate java.io.Reader 400 * object). 401 * 402 * @param encoding The IANA encoding name of the new encoding. 403 * 404 * @throws IOException Thrown if the new encoding is not supported. 405 * 406 * @see com.sun.org.apache.xerces.internal.util.EncodingMap 407 */ 408 public final void setEncoding(String encoding) throws IOException { 409 410 if (DEBUG_ENCODINGS) { 411 System.out.println("$$$ setEncoding: "+encoding); 412 } 413 414 if (fCurrentEntity.stream != null) { 415 // if the encoding is the same, don't change the reader and 416 // re-use the original reader used by the OneCharReader 417 // NOTE: Besides saving an object, this overcomes deficiencies 418 // in the UTF-16 reader supplied with the standard Java 419 // distribution (up to and including 1.3). The UTF-16 420 // decoder buffers 8K blocks even when only asked to read 421 // a single char! -Ac 422 if (fCurrentEntity.encoding == null || 423 !fCurrentEntity.encoding.equals(encoding)) { 424 // UTF-16 is a bit of a special case. If the encoding is UTF-16, 425 // and we know the endian-ness, we shouldn't change readers. 426 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce 427 // the endian-ness from the encoding we presently have. 428 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { 429 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 430 if(ENCODING.equals("UTF-16")) return; 431 if(ENCODING.equals("ISO-10646-UCS-4")) { 432 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 433 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); 434 } else { 435 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); 436 } 437 return; 438 } 439 if(ENCODING.equals("ISO-10646-UCS-2")) { 440 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 441 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); 442 } else { 443 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); 444 } 445 return; 446 } 447 } 448 // wrap a new reader around the input stream, changing 449 // the encoding 450 if (DEBUG_ENCODINGS) { 451 System.out.println("$$$ creating new reader from stream: "+ 452 fCurrentEntity.stream); 453 } 454 //fCurrentEntity.stream.reset(); 455 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); 456 fCurrentEntity.encoding = encoding; 457 458 } else { 459 if (DEBUG_ENCODINGS) 460 System.out.println("$$$ reusing old reader on stream"); 461 } 462 } 463 464 } // setEncoding(String) 465 466 /** Returns true if the current entity being scanned is external. */ 467 public final boolean isExternal() { 468 return fCurrentEntity.isExternal(); 469 } // isExternal():boolean 470 471 public int getChar(int relative) throws IOException{ 472 if(arrangeCapacity(relative + 1, false)){ 473 return fCurrentEntity.ch[fCurrentEntity.position + relative]; 474 }else{ 475 return -1; 476 } 477 }//getChar() 478 479 /** 480 * Returns the next character on the input. 481 * <p> 482 * <strong>Note:</strong> The character is <em>not</em> consumed. 483 * 484 * @throws IOException Thrown if i/o error occurs. 485 * @throws EOFException Thrown on end of file. 486 */ 487 public int peekChar() throws IOException { 488 if (DEBUG_BUFFER) { 489 System.out.print("(peekChar: "); 490 print(); 491 System.out.println(); 492 } 493 494 // load more characters, if needed 495 if (fCurrentEntity.position == fCurrentEntity.count) { 496 invokeListeners(0); 497 load(0, true); 498 } 499 500 // peek at character 501 int c = fCurrentEntity.ch[fCurrentEntity.position]; 502 503 // return peeked character 504 if (DEBUG_BUFFER) { 505 System.out.print(")peekChar: "); 506 print(); 507 if (isExternal) { 508 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); 509 } else { 510 System.out.println(" -> '"+(char)c+"'"); 511 } 512 } 513 if (isExternal) { 514 return c != '\r' ? c : '\n'; 515 } else { 516 return c; 517 } 518 519 } // peekChar():int 520 521 /** 522 * Returns the next character on the input. 523 * <p> 524 * <strong>Note:</strong> The character is consumed. 525 * 526 * @throws IOException Thrown if i/o error occurs. 527 * @throws EOFException Thrown on end of file. 528 */ 529 public int scanChar() throws IOException { 530 if (DEBUG_BUFFER) { 531 System.out.print("(scanChar: "); 532 print(); 533 System.out.println(); 534 } 535 536 // load more characters, if needed 537 if (fCurrentEntity.position == fCurrentEntity.count) { 538 invokeListeners(0); 539 load(0, true); 540 } 541 542 // scan character 543 int c = fCurrentEntity.ch[fCurrentEntity.position++]; 544 if (c == '\n' || 545 (c == '\r' && isExternal)) { 546 fCurrentEntity.lineNumber++; 547 fCurrentEntity.columnNumber = 1; 548 if (fCurrentEntity.position == fCurrentEntity.count) { 549 invokeListeners(1); 550 fCurrentEntity.ch[0] = (char)c; 551 load(1, false); 552 } 553 if (c == '\r' && isExternal) { 554 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { 555 fCurrentEntity.position--; 556 } 557 c = '\n'; 558 } 559 } 560 561 // return character that was scanned 562 if (DEBUG_BUFFER) { 563 System.out.print(")scanChar: "); 564 print(); 565 System.out.println(" -> '"+(char)c+"'"); 566 } 567 fCurrentEntity.columnNumber++; 568 return c; 569 570 } // scanChar():int 571 572 /** 573 * Returns a string matching the NMTOKEN production appearing immediately 574 * on the input as a symbol, or null if NMTOKEN Name string is present. 575 * <p> 576 * <strong>Note:</strong> The NMTOKEN characters are consumed. 577 * <p> 578 * <strong>Note:</strong> The string returned must be a symbol. The 579 * SymbolTable can be used for this purpose. 580 * 581 * @throws IOException Thrown if i/o error occurs. 582 * @throws EOFException Thrown on end of file. 583 * 584 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 585 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 586 */ 587 public String scanNmtoken() throws IOException { 588 if (DEBUG_BUFFER) { 589 System.out.print("(scanNmtoken: "); 590 print(); 591 System.out.println(); 592 } 593 594 // load more characters, if needed 595 if (fCurrentEntity.position == fCurrentEntity.count) { 596 invokeListeners(0); 597 load(0, true); 598 } 599 600 // scan nmtoken 601 int offset = fCurrentEntity.position; 602 boolean vc = false; 603 char c; 604 while (true){ 605 //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { 606 c = fCurrentEntity.ch[fCurrentEntity.position]; 607 if(c < 127){ 608 vc = VALID_NAMES[c]; 609 }else{ 610 vc = XMLChar.isName(c); 611 } 612 if(!vc)break; 613 614 if (++fCurrentEntity.position == fCurrentEntity.count) { 615 int length = fCurrentEntity.position - offset; 616 invokeListeners(length); 617 if (length == fCurrentEntity.fBufferSize) { 618 // bad luck we have to resize our buffer 619 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 620 System.arraycopy(fCurrentEntity.ch, offset, 621 tmp, 0, length); 622 fCurrentEntity.ch = tmp; 623 fCurrentEntity.fBufferSize *= 2; 624 } else { 625 System.arraycopy(fCurrentEntity.ch, offset, 626 fCurrentEntity.ch, 0, length); 627 } 628 offset = 0; 629 if (load(length, false)) { 630 break; 631 } 632 } 633 } 634 int length = fCurrentEntity.position - offset; 635 fCurrentEntity.columnNumber += length; 636 637 // return nmtoken 638 String symbol = null; 639 if (length > 0) { 640 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 641 } 642 if (DEBUG_BUFFER) { 643 System.out.print(")scanNmtoken: "); 644 print(); 645 System.out.println(" -> "+String.valueOf(symbol)); 646 } 647 return symbol; 648 649 } // scanNmtoken():String 650 651 /** 652 * Returns a string matching the Name production appearing immediately 653 * on the input as a symbol, or null if no Name string is present. 654 * <p> 655 * <strong>Note:</strong> The Name characters are consumed. 656 * <p> 657 * <strong>Note:</strong> The string returned must be a symbol. The 658 * SymbolTable can be used for this purpose. 659 * 660 * @throws IOException Thrown if i/o error occurs. 661 * @throws EOFException Thrown on end of file. 662 * 663 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 664 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 665 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 666 */ 667 public String scanName() throws IOException { 668 if (DEBUG_BUFFER) { 669 System.out.print("(scanName: "); 670 print(); 671 System.out.println(); 672 } 673 674 // load more characters, if needed 675 if (fCurrentEntity.position == fCurrentEntity.count) { 676 invokeListeners(0); 677 load(0, true); 678 } 679 680 // scan name 681 int offset = fCurrentEntity.position; 682 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 683 if (++fCurrentEntity.position == fCurrentEntity.count) { 684 invokeListeners(1); 685 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 686 offset = 0; 687 if (load(1, false)) { 688 fCurrentEntity.columnNumber++; 689 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 690 691 if (DEBUG_BUFFER) { 692 System.out.print(")scanName: "); 693 print(); 694 System.out.println(" -> "+String.valueOf(symbol)); 695 } 696 return symbol; 697 } 698 } 699 boolean vc =false; 700 while (true ){ 701 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 702 char c = fCurrentEntity.ch[fCurrentEntity.position]; 703 if(c < 127){ 704 vc = VALID_NAMES[c]; 705 }else{ 706 vc = XMLChar.isName(c); 707 } 708 if(!vc)break; 709 if (++fCurrentEntity.position == fCurrentEntity.count) { 710 int length = fCurrentEntity.position - offset; 711 invokeListeners(length); 712 if (length == fCurrentEntity.fBufferSize) { 713 // bad luck we have to resize our buffer 714 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 715 System.arraycopy(fCurrentEntity.ch, offset, 716 tmp, 0, length); 717 fCurrentEntity.ch = tmp; 718 fCurrentEntity.fBufferSize *= 2; 719 } else { 720 System.arraycopy(fCurrentEntity.ch, offset, 721 fCurrentEntity.ch, 0, length); 722 } 723 offset = 0; 724 if (load(length, false)) { 725 break; 726 } 727 } 728 } 729 } 730 int length = fCurrentEntity.position - offset; 731 fCurrentEntity.columnNumber += length; 732 733 // return name 734 String symbol; 735 if (length > 0) { 736 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 737 } else 738 symbol = null; 739 if (DEBUG_BUFFER) { 740 System.out.print(")scanName: "); 741 print(); 742 System.out.println(" -> "+String.valueOf(symbol)); 743 } 744 return symbol; 745 746 } // scanName():String 747 748 /** 749 * Scans a qualified name from the input, setting the fields of the 750 * QName structure appropriately. 751 * <p> 752 * <strong>Note:</strong> The qualified name characters are consumed. 753 * <p> 754 * <strong>Note:</strong> The strings used to set the values of the 755 * QName structure must be symbols. The SymbolTable can be used for 756 * this purpose. 757 * 758 * @param qname The qualified name structure to fill. 759 * 760 * @return Returns true if a qualified name appeared immediately on 761 * the input and was scanned, false otherwise. 762 * 763 * @throws IOException Thrown if i/o error occurs. 764 * @throws EOFException Thrown on end of file. 765 * 766 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 767 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 768 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 769 */ 770 public boolean scanQName(QName qname) throws IOException { 771 if (DEBUG_BUFFER) { 772 System.out.print("(scanQName, "+qname+": "); 773 print(); 774 System.out.println(); 775 } 776 777 // load more characters, if needed 778 if (fCurrentEntity.position == fCurrentEntity.count) { 779 invokeListeners(0); 780 load(0, true); 781 } 782 783 // scan qualified name 784 int offset = fCurrentEntity.position; 785 786 //making a check if if the specified character is a valid name start character 787 //as defined by production [5] in the XML 1.0 specification. 788 // Name ::= (Letter | '_' | ':') (NameChar)* 789 790 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 791 if (++fCurrentEntity.position == fCurrentEntity.count) { 792 invokeListeners(1); 793 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 794 offset = 0; 795 796 if (load(1, false)) { 797 fCurrentEntity.columnNumber++; 798 //adding into symbol table. 799 //XXX We are trying to add single character in SymbolTable?????? 800 String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 801 qname.setValues(null, name, name, null); 802 if (DEBUG_BUFFER) { 803 System.out.print(")scanQName, "+qname+": "); 804 print(); 805 System.out.println(" -> true"); 806 } 807 return true; 808 } 809 } 810 int index = -1; 811 boolean vc = false; 812 while ( true){ 813 814 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 815 char c = fCurrentEntity.ch[fCurrentEntity.position]; 816 if(c < 127){ 817 vc = VALID_NAMES[c]; 818 }else{ 819 vc = XMLChar.isName(c); 820 } 821 if(!vc)break; 822 if (c == ':') { 823 if (index != -1) { 824 break; 825 } 826 index = fCurrentEntity.position; 827 } 828 if (++fCurrentEntity.position == fCurrentEntity.count) { 829 int length = fCurrentEntity.position - offset; 830 invokeListeners(length); 831 if (length == fCurrentEntity.fBufferSize) { 832 // bad luck we have to resize our buffer 833 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 834 System.arraycopy(fCurrentEntity.ch, offset, 835 tmp, 0, length); 836 fCurrentEntity.ch = tmp; 837 fCurrentEntity.fBufferSize *= 2; 838 } else { 839 System.arraycopy(fCurrentEntity.ch, offset, 840 fCurrentEntity.ch, 0, length); 841 } 842 if (index != -1) { 843 index = index - offset; 844 } 845 offset = 0; 846 if (load(length, false)) { 847 break; 848 } 849 } 850 } 851 int length = fCurrentEntity.position - offset; 852 fCurrentEntity.columnNumber += length; 853 if (length > 0) { 854 String prefix = null; 855 String localpart = null; 856 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, 857 offset, length); 858 859 if (index != -1) { 860 int prefixLength = index - offset; 861 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, 862 offset, prefixLength); 863 int len = length - prefixLength - 1; 864 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, 865 index + 1, len); 866 867 } else { 868 localpart = rawname; 869 } 870 qname.setValues(prefix, localpart, rawname, null); 871 if (DEBUG_BUFFER) { 872 System.out.print(")scanQName, "+qname+": "); 873 print(); 874 System.out.println(" -> true"); 875 } 876 return true; 877 } 878 } 879 880 // no qualified name found 881 if (DEBUG_BUFFER) { 882 System.out.print(")scanQName, "+qname+": "); 883 print(); 884 System.out.println(" -> false"); 885 } 886 return false; 887 888 } // scanQName(QName):boolean 889 890 /** 891 * CHANGED: 892 * Scans a range of parsed character data, This function appends the character data to 893 * the supplied buffer. 894 * <p> 895 * <strong>Note:</strong> The characters are consumed. 896 * <p> 897 * <strong>Note:</strong> This method does not guarantee to return 898 * the longest run of parsed character data. This method may return 899 * before markup due to reaching the end of the input buffer or any 900 * other reason. 901 * <p> 902 * 903 * @param content The content structure to fill. 904 * 905 * @return Returns the next character on the input, if known. This 906 * value may be -1 but this does <em>note</em> designate 907 * end of file. 908 * 909 * @throws IOException Thrown if i/o error occurs. 910 * @throws EOFException Thrown on end of file. 911 */ 912 public int scanContent(XMLString content) throws IOException { 913 if (DEBUG_BUFFER) { 914 System.out.print("(scanContent: "); 915 print(); 916 System.out.println(); 917 } 918 919 // load more characters, if needed 920 if (fCurrentEntity.position == fCurrentEntity.count) { 921 invokeListeners(0); 922 load(0, true); 923 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 924 invokeListeners(0); 925 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 926 load(1, false); 927 fCurrentEntity.position = 0; 928 } 929 930 // normalize newlines 931 int offset = fCurrentEntity.position; 932 int c = fCurrentEntity.ch[offset]; 933 int newlines = 0; 934 if (c == '\n' || (c == '\r' && isExternal)) { 935 if (DEBUG_BUFFER) { 936 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 937 print(); 938 System.out.println(); 939 } 940 do { 941 c = fCurrentEntity.ch[fCurrentEntity.position++]; 942 if (c == '\r' && isExternal) { 943 newlines++; 944 fCurrentEntity.lineNumber++; 945 fCurrentEntity.columnNumber = 1; 946 if (fCurrentEntity.position == fCurrentEntity.count) { 947 offset = 0; 948 invokeListeners(newlines); 949 fCurrentEntity.position = newlines; 950 if (load(newlines, false)) { 951 break; 952 } 953 } 954 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 955 fCurrentEntity.position++; 956 offset++; 957 } 958 /*** NEWLINE NORMALIZATION ***/ 959 else { 960 newlines++; 961 } 962 } else if (c == '\n') { 963 newlines++; 964 fCurrentEntity.lineNumber++; 965 fCurrentEntity.columnNumber = 1; 966 if (fCurrentEntity.position == fCurrentEntity.count) { 967 offset = 0; 968 invokeListeners(newlines); 969 fCurrentEntity.position = newlines; 970 if (load(newlines, false)) { 971 break; 972 } 973 } 974 } else { 975 fCurrentEntity.position--; 976 break; 977 } 978 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 979 for (int i = offset; i < fCurrentEntity.position; i++) { 980 fCurrentEntity.ch[i] = '\n'; 981 } 982 int length = fCurrentEntity.position - offset; 983 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 984 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 985 //on buffering the data.. 986 content.setValues(fCurrentEntity.ch, offset, length); 987 //content.append(fCurrentEntity.ch, offset, length); 988 if (DEBUG_BUFFER) { 989 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 990 print(); 991 System.out.println(); 992 } 993 return -1; 994 } 995 if (DEBUG_BUFFER) { 996 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 997 print(); 998 System.out.println(); 999 } 1000 } 1001 1002 while (fCurrentEntity.position < fCurrentEntity.count) { 1003 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1004 if (!XMLChar.isContent(c)) { 1005 fCurrentEntity.position--; 1006 break; 1007 } 1008 } 1009 int length = fCurrentEntity.position - offset; 1010 fCurrentEntity.columnNumber += length - newlines; 1011 1012 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 1013 //on buffering the data.. 1014 content.setValues(fCurrentEntity.ch, offset, length); 1015 //content.append(fCurrentEntity.ch, offset, length); 1016 // return next character 1017 if (fCurrentEntity.position != fCurrentEntity.count) { 1018 c = fCurrentEntity.ch[fCurrentEntity.position]; 1019 // REVISIT: Does this need to be updated to fix the 1020 // #x0D ^#x0A newline normalization problem? -Ac 1021 if (c == '\r' && isExternal) { 1022 c = '\n'; 1023 } 1024 } else { 1025 c = -1; 1026 } 1027 if (DEBUG_BUFFER) { 1028 System.out.print(")scanContent: "); 1029 print(); 1030 System.out.println(" -> '"+(char)c+"'"); 1031 } 1032 return c; 1033 1034 } // scanContent(XMLString):int 1035 1036 /** 1037 * Scans a range of attribute value data, setting the fields of the 1038 * XMLString structure, appropriately. 1039 * <p> 1040 * <strong>Note:</strong> The characters are consumed. 1041 * <p> 1042 * <strong>Note:</strong> This method does not guarantee to return 1043 * the longest run of attribute value data. This method may return 1044 * before the quote character due to reaching the end of the input 1045 * buffer or any other reason. 1046 * <p> 1047 * <strong>Note:</strong> The fields contained in the XMLString 1048 * structure are not guaranteed to remain valid upon subsequent calls 1049 * to the entity scanner. Therefore, the caller is responsible for 1050 * immediately using the returned character data or making a copy of 1051 * the character data. 1052 * 1053 * @param quote The quote character that signifies the end of the 1054 * attribute value data. 1055 * @param content The content structure to fill. 1056 * 1057 * @return Returns the next character on the input, if known. This 1058 * value may be -1 but this does <em>note</em> designate 1059 * end of file. 1060 * 1061 * @throws IOException Thrown if i/o error occurs. 1062 * @throws EOFException Thrown on end of file. 1063 */ 1064 public int scanLiteral(int quote, XMLString content) 1065 throws IOException { 1066 if (DEBUG_BUFFER) { 1067 System.out.print("(scanLiteral, '"+(char)quote+"': "); 1068 print(); 1069 System.out.println(); 1070 } 1071 // load more characters, if needed 1072 if (fCurrentEntity.position == fCurrentEntity.count) { 1073 invokeListeners(0); 1074 load(0, true); 1075 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1076 invokeListeners(0); 1077 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 1078 1079 load(1, false); 1080 fCurrentEntity.position = 0; 1081 } 1082 1083 // normalize newlines 1084 int offset = fCurrentEntity.position; 1085 int c = fCurrentEntity.ch[offset]; 1086 int newlines = 0; 1087 if(whiteSpaceInfoNeeded) 1088 whiteSpaceLen=0; 1089 if (c == '\n' || (c == '\r' && isExternal)) { 1090 if (DEBUG_BUFFER) { 1091 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1092 print(); 1093 System.out.println(); 1094 } 1095 do { 1096 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1097 if (c == '\r' && isExternal) { 1098 newlines++; 1099 fCurrentEntity.lineNumber++; 1100 fCurrentEntity.columnNumber = 1; 1101 if (fCurrentEntity.position == fCurrentEntity.count) { 1102 invokeListeners(newlines); 1103 offset = 0; 1104 fCurrentEntity.position = newlines; 1105 if (load(newlines, false)) { 1106 break; 1107 } 1108 } 1109 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1110 fCurrentEntity.position++; 1111 offset++; 1112 } 1113 /*** NEWLINE NORMALIZATION ***/ 1114 else { 1115 newlines++; 1116 } 1117 /***/ 1118 } else if (c == '\n') { 1119 newlines++; 1120 fCurrentEntity.lineNumber++; 1121 fCurrentEntity.columnNumber = 1; 1122 if (fCurrentEntity.position == fCurrentEntity.count) { 1123 offset = 0; 1124 invokeListeners(newlines); 1125 fCurrentEntity.position = newlines; 1126 if (load(newlines, false)) { 1127 break; 1128 } 1129 } 1130 /*** NEWLINE NORMALIZATION *** 1131 * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' 1132 * && external) { 1133 * fCurrentEntity.position++; 1134 * offset++; 1135 * } 1136 * /***/ 1137 } else { 1138 fCurrentEntity.position--; 1139 break; 1140 } 1141 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1142 int i=0; 1143 for ( i = offset; i < fCurrentEntity.position; i++) { 1144 fCurrentEntity.ch[i] = '\n'; 1145 storeWhiteSpace(i); 1146 } 1147 1148 int length = fCurrentEntity.position - offset; 1149 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1150 content.setValues(fCurrentEntity.ch, offset, length); 1151 if (DEBUG_BUFFER) { 1152 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1153 print(); 1154 System.out.println(); 1155 } 1156 return -1; 1157 } 1158 if (DEBUG_BUFFER) { 1159 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1160 print(); 1161 System.out.println(); 1162 } 1163 } 1164 1165 // scan literal value 1166 for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) { 1167 c = fCurrentEntity.ch[fCurrentEntity.position]; 1168 if ((c == quote && 1169 (!fCurrentEntity.literal || isExternal)) || 1170 c == '%' || !XMLChar.isContent(c)) { 1171 break; 1172 } 1173 if (whiteSpaceInfoNeeded && c == '\t') { 1174 storeWhiteSpace(fCurrentEntity.position); 1175 } 1176 } 1177 1178 int length = fCurrentEntity.position - offset; 1179 fCurrentEntity.columnNumber += length - newlines; 1180 content.setValues(fCurrentEntity.ch, offset, length); 1181 1182 // return next character 1183 if (fCurrentEntity.position != fCurrentEntity.count) { 1184 c = fCurrentEntity.ch[fCurrentEntity.position]; 1185 // NOTE: We don't want to accidentally signal the 1186 // end of the literal if we're expanding an 1187 // entity appearing in the literal. -Ac 1188 if (c == quote && fCurrentEntity.literal) { 1189 c = -1; 1190 } 1191 } else { 1192 c = -1; 1193 } 1194 if (DEBUG_BUFFER) { 1195 System.out.print(")scanLiteral, '"+(char)quote+"': "); 1196 print(); 1197 System.out.println(" -> '"+(char)c+"'"); 1198 } 1199 return c; 1200 1201 } // scanLiteral(int,XMLString):int 1202 1203 /** 1204 * Save whitespace information. Increase the whitespace buffer by 100 1205 * when needed. 1206 * 1207 * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). 1208 * 1209 * @param whiteSpacePos position of a whitespace in the scanner entity buffer 1210 */ 1211 private void storeWhiteSpace(int whiteSpacePos) { 1212 if (whiteSpaceLen >= whiteSpaceLookup.length) { 1213 int [] tmp = new int[whiteSpaceLookup.length + 100]; 1214 System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); 1215 whiteSpaceLookup = tmp; 1216 } 1217 1218 whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; 1219 } 1220 1221 //CHANGED: 1222 /** 1223 * Scans a range of character data up to the specified delimiter, 1224 * setting the fields of the XMLString structure, appropriately. 1225 * <p> 1226 * <strong>Note:</strong> The characters are consumed. 1227 * <p> 1228 * <strong>Note:</strong> This assumes that the length of the delimiter 1229 * and that the delimiter contains at least one character. 1230 * <p> 1231 * <strong>Note:</strong> This method does not guarantee to return 1232 * the longest run of character data. This method may return before 1233 * the delimiter due to reaching the end of the input buffer or any 1234 * other reason. 1235 * <p> 1236 * @param delimiter The string that signifies the end of the character 1237 * data to be scanned. 1238 * @param buffer The XMLStringBuffer to fill. 1239 * 1240 * @return Returns true if there is more data to scan, false otherwise. 1241 * 1242 * @throws IOException Thrown if i/o error occurs. 1243 * @throws EOFException Thrown on end of file. 1244 */ 1245 public boolean scanData(String delimiter, XMLStringBuffer buffer) 1246 throws IOException { 1247 1248 boolean done = false; 1249 int delimLen = delimiter.length(); 1250 char charAt0 = delimiter.charAt(0); 1251 do { 1252 if (DEBUG_BUFFER) { 1253 System.out.print("(scanData: "); 1254 print(); 1255 System.out.println(); 1256 } 1257 1258 // load more characters, if needed 1259 1260 if (fCurrentEntity.position == fCurrentEntity.count) { 1261 load(0, true); 1262 } 1263 1264 boolean bNextEntity = false; 1265 1266 while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) 1267 && (!bNextEntity)) 1268 { 1269 System.arraycopy(fCurrentEntity.ch, 1270 fCurrentEntity.position, 1271 fCurrentEntity.ch, 1272 0, 1273 fCurrentEntity.count - fCurrentEntity.position); 1274 1275 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false); 1276 fCurrentEntity.position = 0; 1277 fCurrentEntity.startPosition = 0; 1278 } 1279 1280 if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { 1281 // something must be wrong with the input: e.g., file ends in an unterminated comment 1282 int length = fCurrentEntity.count - fCurrentEntity.position; 1283 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 1284 fCurrentEntity.columnNumber += fCurrentEntity.count; 1285 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); 1286 fCurrentEntity.position = fCurrentEntity.count; 1287 fCurrentEntity.startPosition = fCurrentEntity.count; 1288 load(0, true); 1289 return false; 1290 } 1291 1292 // normalize newlines 1293 int offset = fCurrentEntity.position; 1294 int c = fCurrentEntity.ch[offset]; 1295 int newlines = 0; 1296 if (c == '\n' || (c == '\r' && isExternal)) { 1297 if (DEBUG_BUFFER) { 1298 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1299 print(); 1300 System.out.println(); 1301 } 1302 do { 1303 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1304 if (c == '\r' && isExternal) { 1305 newlines++; 1306 fCurrentEntity.lineNumber++; 1307 fCurrentEntity.columnNumber = 1; 1308 if (fCurrentEntity.position == fCurrentEntity.count) { 1309 offset = 0; 1310 invokeListeners(newlines); 1311 fCurrentEntity.position = newlines; 1312 if (load(newlines, false)) { 1313 break; 1314 } 1315 } 1316 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1317 fCurrentEntity.position++; 1318 offset++; 1319 } 1320 /*** NEWLINE NORMALIZATION ***/ 1321 else { 1322 newlines++; 1323 } 1324 } else if (c == '\n') { 1325 newlines++; 1326 fCurrentEntity.lineNumber++; 1327 fCurrentEntity.columnNumber = 1; 1328 if (fCurrentEntity.position == fCurrentEntity.count) { 1329 offset = 0; 1330 invokeListeners(newlines); 1331 fCurrentEntity.position = newlines; 1332 fCurrentEntity.count = newlines; 1333 if (load(newlines, false)) { 1334 break; 1335 } 1336 } 1337 } else { 1338 fCurrentEntity.position--; 1339 break; 1340 } 1341 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1342 for (int i = offset; i < fCurrentEntity.position; i++) { 1343 fCurrentEntity.ch[i] = '\n'; 1344 } 1345 int length = fCurrentEntity.position - offset; 1346 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1347 buffer.append(fCurrentEntity.ch, offset, length); 1348 if (DEBUG_BUFFER) { 1349 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1350 print(); 1351 System.out.println(); 1352 } 1353 return true; 1354 } 1355 if (DEBUG_BUFFER) { 1356 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1357 print(); 1358 System.out.println(); 1359 } 1360 } 1361 1362 // iterate over buffer looking for delimiter 1363 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { 1364 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1365 if (c == charAt0) { 1366 // looks like we just hit the delimiter 1367 int delimOffset = fCurrentEntity.position - 1; 1368 for (int i = 1; i < delimLen; i++) { 1369 if (fCurrentEntity.position == fCurrentEntity.count) { 1370 fCurrentEntity.position -= i; 1371 break OUTER; 1372 } 1373 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1374 if (delimiter.charAt(i) != c) { 1375 fCurrentEntity.position -= i; 1376 break; 1377 } 1378 } 1379 if (fCurrentEntity.position == delimOffset + delimLen) { 1380 done = true; 1381 break; 1382 } 1383 } else if (c == '\n' || (isExternal && c == '\r')) { 1384 fCurrentEntity.position--; 1385 break; 1386 } else if (XMLChar.isInvalid(c)) { 1387 fCurrentEntity.position--; 1388 int length = fCurrentEntity.position - offset; 1389 fCurrentEntity.columnNumber += length - newlines; 1390 buffer.append(fCurrentEntity.ch, offset, length); 1391 return true; 1392 } 1393 } 1394 int length = fCurrentEntity.position - offset; 1395 fCurrentEntity.columnNumber += length - newlines; 1396 if (done) { 1397 length -= delimLen; 1398 } 1399 buffer.append(fCurrentEntity.ch, offset, length); 1400 1401 // return true if string was skipped 1402 if (DEBUG_BUFFER) { 1403 System.out.print(")scanData: "); 1404 print(); 1405 System.out.println(" -> " + done); 1406 } 1407 } while (!done); 1408 return !done; 1409 1410 } // scanData(String,XMLString) 1411 1412 /** 1413 * Skips a character appearing immediately on the input. 1414 * <p> 1415 * <strong>Note:</strong> The character is consumed only if it matches 1416 * the specified character. 1417 * 1418 * @param c The character to skip. 1419 * 1420 * @return Returns true if the character was skipped. 1421 * 1422 * @throws IOException Thrown if i/o error occurs. 1423 * @throws EOFException Thrown on end of file. 1424 */ 1425 public boolean skipChar(int c) throws IOException { 1426 if (DEBUG_BUFFER) { 1427 System.out.print("(skipChar, '"+(char)c+"': "); 1428 print(); 1429 System.out.println(); 1430 } 1431 1432 // load more characters, if needed 1433 if (fCurrentEntity.position == fCurrentEntity.count) { 1434 invokeListeners(0); 1435 load(0, true); 1436 } 1437 1438 // skip character 1439 int cc = fCurrentEntity.ch[fCurrentEntity.position]; 1440 if (cc == c) { 1441 fCurrentEntity.position++; 1442 if (c == '\n') { 1443 fCurrentEntity.lineNumber++; 1444 fCurrentEntity.columnNumber = 1; 1445 } else { 1446 fCurrentEntity.columnNumber++; 1447 } 1448 if (DEBUG_BUFFER) { 1449 System.out.print(")skipChar, '"+(char)c+"': "); 1450 print(); 1451 System.out.println(" -> true"); 1452 } 1453 return true; 1454 } else if (c == '\n' && cc == '\r' && isExternal) { 1455 // handle newlines 1456 if (fCurrentEntity.position == fCurrentEntity.count) { 1457 invokeListeners(1); 1458 fCurrentEntity.ch[0] = (char)cc; 1459 load(1, false); 1460 } 1461 fCurrentEntity.position++; 1462 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1463 fCurrentEntity.position++; 1464 } 1465 fCurrentEntity.lineNumber++; 1466 fCurrentEntity.columnNumber = 1; 1467 if (DEBUG_BUFFER) { 1468 System.out.print(")skipChar, '"+(char)c+"': "); 1469 print(); 1470 System.out.println(" -> true"); 1471 } 1472 return true; 1473 } 1474 1475 // character was not skipped 1476 if (DEBUG_BUFFER) { 1477 System.out.print(")skipChar, '"+(char)c+"': "); 1478 print(); 1479 System.out.println(" -> false"); 1480 } 1481 return false; 1482 1483 } // skipChar(int):boolean 1484 1485 public boolean isSpace(char ch){ 1486 return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); 1487 } 1488 /** 1489 * Skips space characters appearing immediately on the input. 1490 * <p> 1491 * <strong>Note:</strong> The characters are consumed only if they are 1492 * space characters. 1493 * 1494 * @return Returns true if at least one space character was skipped. 1495 * 1496 * @throws IOException Thrown if i/o error occurs. 1497 * @throws EOFException Thrown on end of file. 1498 * 1499 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 1500 */ 1501 public boolean skipSpaces() throws IOException { 1502 if (DEBUG_BUFFER) { 1503 System.out.print("(skipSpaces: "); 1504 print(); 1505 System.out.println(); 1506 } 1507 //boolean entityChanged = false; 1508 // load more characters, if needed 1509 if (fCurrentEntity.position == fCurrentEntity.count) { 1510 invokeListeners(0); 1511 load(0, true); 1512 } 1513 1514 //we are doing this check only in skipSpace() because it is called by 1515 //fMiscDispatcher and we want the parser to exit gracefully when document 1516 //is well-formed. 1517 //it is possible that end of document is reached and 1518 //fCurrentEntity becomes null 1519 //nothing was read so entity changed 'false' should be returned. 1520 if(fCurrentEntity == null){ 1521 return false ; 1522 } 1523 1524 // skip spaces 1525 int c = fCurrentEntity.ch[fCurrentEntity.position]; 1526 if (XMLChar.isSpace(c)) { 1527 do { 1528 boolean entityChanged = false; 1529 // handle newlines 1530 if (c == '\n' || (isExternal && c == '\r')) { 1531 fCurrentEntity.lineNumber++; 1532 fCurrentEntity.columnNumber = 1; 1533 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1534 invokeListeners(0); 1535 fCurrentEntity.ch[0] = (char)c; 1536 entityChanged = load(1, true); 1537 if (!entityChanged){ 1538 // the load change the position to be 1, 1539 // need to restore it when entity not changed 1540 fCurrentEntity.position = 0; 1541 }else if(fCurrentEntity == null){ 1542 return true ; 1543 } 1544 } 1545 if (c == '\r' && isExternal) { 1546 // REVISIT: Does this need to be updated to fix the 1547 // #x0D ^#x0A newline normalization problem? -Ac 1548 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 1549 fCurrentEntity.position--; 1550 } 1551 } 1552 } else { 1553 fCurrentEntity.columnNumber++; 1554 } 1555 // load more characters, if needed 1556 if (!entityChanged){ 1557 fCurrentEntity.position++; 1558 } 1559 1560 if (fCurrentEntity.position == fCurrentEntity.count) { 1561 invokeListeners(0); 1562 load(0, true); 1563 1564 //we are doing this check only in skipSpace() because it is called by 1565 //fMiscDispatcher and we want the parser to exit gracefully when document 1566 //is well-formed. 1567 1568 //it is possible that end of document is reached and 1569 //fCurrentEntity becomes null 1570 //nothing was read so entity changed 'false' should be returned. 1571 if(fCurrentEntity == null){ 1572 return true ; 1573 } 1574 1575 } 1576 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 1577 if (DEBUG_BUFFER) { 1578 System.out.print(")skipSpaces: "); 1579 print(); 1580 System.out.println(" -> true"); 1581 } 1582 return true; 1583 } 1584 1585 // no spaces were found 1586 if (DEBUG_BUFFER) { 1587 System.out.print(")skipSpaces: "); 1588 print(); 1589 System.out.println(" -> false"); 1590 } 1591 return false; 1592 1593 } // skipSpaces():boolean 1594 1595 1596 /** 1597 * @param legnth This function checks that following number of characters are available. 1598 * to the underlying buffer. 1599 * @return This function returns true if capacity asked is available. 1600 */ 1601 public boolean arrangeCapacity(int length) throws IOException{ 1602 return arrangeCapacity(length, false); 1603 } 1604 1605 /** 1606 * @param legnth This function checks that following number of characters are available. 1607 * to the underlying buffer. 1608 * @param if the underlying function should change the entity 1609 * @return This function returns true if capacity asked is available. 1610 * 1611 */ 1612 public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ 1613 //check if the capacity is availble in the current buffer 1614 //count is no. of characters in the buffer [x][m][l] 1615 //position is '0' based 1616 //System.out.println("fCurrent Entity " + fCurrentEntity); 1617 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1618 return true; 1619 } 1620 if(DEBUG_SKIP_STRING){ 1621 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1622 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1623 System.out.println("length = " + length); 1624 } 1625 boolean entityChanged = false; 1626 //load more characters -- this function shouldn't change the entity 1627 while((fCurrentEntity.count - fCurrentEntity.position) < length){ 1628 if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ 1629 invokeListeners(0); 1630 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); 1631 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; 1632 fCurrentEntity.position = 0; 1633 } 1634 1635 if((fCurrentEntity.count - fCurrentEntity.position) < length){ 1636 int pos = fCurrentEntity.position; 1637 invokeListeners(pos); 1638 entityChanged = load(fCurrentEntity.count, changeEntity); 1639 fCurrentEntity.position = pos; 1640 if(entityChanged)break; 1641 } 1642 if(DEBUG_SKIP_STRING){ 1643 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1644 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1645 System.out.println("length = " + length); 1646 } 1647 } 1648 //load changes the position.. set it back to the point where we started. 1649 1650 //after loading check again. 1651 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1652 return true; 1653 } else { 1654 return false; 1655 } 1656 } 1657 1658 /** 1659 * Skips the specified string appearing immediately on the input. 1660 * <p> 1661 * <strong>Note:</strong> The characters are consumed only if all 1662 * the characters are skipped. 1663 * 1664 * @param s The string to skip. 1665 * 1666 * @return Returns true if the string was skipped. 1667 * 1668 * @throws IOException Thrown if i/o error occurs. 1669 * @throws EOFException Thrown on end of file. 1670 */ 1671 public boolean skipString(String s) throws IOException { 1672 1673 final int length = s.length(); 1674 1675 //first make sure that required capacity is avaible 1676 if(arrangeCapacity(length, false)){ 1677 final int beforeSkip = fCurrentEntity.position ; 1678 int afterSkip = fCurrentEntity.position + length - 1 ; 1679 if(DEBUG_SKIP_STRING){ 1680 System.out.println("skipString,length = " + s + "," + length); 1681 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); 1682 } 1683 1684 //s.charAt() indexes are 0 to 'Length -1' based. 1685 int i = length - 1 ; 1686 //check from reverse 1687 while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ 1688 if(afterSkip-- == beforeSkip){ 1689 fCurrentEntity.position = fCurrentEntity.position + length ; 1690 fCurrentEntity.columnNumber += length; 1691 return true; 1692 } 1693 } 1694 } 1695 1696 return false; 1697 } // skipString(String):boolean 1698 1699 public boolean skipString(char [] s) throws IOException { 1700 1701 final int length = s.length; 1702 //first make sure that required capacity is avaible 1703 if(arrangeCapacity(length, false)){ 1704 int beforeSkip = fCurrentEntity.position ; 1705 int afterSkip = fCurrentEntity.position + length ; 1706 1707 if(DEBUG_SKIP_STRING){ 1708 System.out.println("skipString,length = " + new String(s) + "," + length); 1709 System.out.println("skipString,length = " + new String(s) + "," + length); 1710 } 1711 1712 for(int i=0;i<length;i++){ 1713 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ 1714 return false; 1715 } 1716 } 1717 fCurrentEntity.position = fCurrentEntity.position + length ; 1718 fCurrentEntity.columnNumber += length; 1719 return true; 1720 1721 } 1722 1723 return false; 1724 } 1725 1726 // 1727 // Locator methods 1728 // 1729 // 1730 // Private methods 1731 // 1732 1733 /** 1734 * Loads a chunk of text. 1735 * 1736 * @param offset The offset into the character buffer to 1737 * read the next batch of characters. 1738 * @param changeEntity True if the load should change entities 1739 * at the end of the entity, otherwise leave 1740 * the current entity in place and the entity 1741 * boundary will be signaled by the return 1742 * value. 1743 * 1744 * @returns Returns true if the entity changed as a result of this 1745 * load operation. 1746 */ 1747 final boolean load(int offset, boolean changeEntity) 1748 throws IOException { 1749 if (DEBUG_BUFFER) { 1750 System.out.print("(load, "+offset+": "); 1751 print(); 1752 System.out.println(); 1753 } 1754 //maintaing the count till last load 1755 fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; 1756 // read characters 1757 int length = fCurrentEntity.ch.length - offset; 1758 if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { 1759 length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; 1760 } 1761 if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); 1762 int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); 1763 if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); 1764 1765 // reset count and position 1766 boolean entityChanged = false; 1767 if (count != -1) { 1768 if (count != 0) { 1769 // record the last count 1770 fCurrentEntity.fLastCount = count; 1771 fCurrentEntity.count = count + offset; 1772 fCurrentEntity.position = offset; 1773 } 1774 } 1775 // end of this entity 1776 else { 1777 fCurrentEntity.count = offset; 1778 fCurrentEntity.position = offset; 1779 entityChanged = true; 1780 1781 if (changeEntity) { 1782 //notify the entity manager about the end of entity 1783 fEntityManager.endEntity(); 1784 //return if the current entity becomes null 1785 if(fCurrentEntity == null){ 1786 throw END_OF_DOCUMENT_ENTITY; 1787 } 1788 // handle the trailing edges 1789 if (fCurrentEntity.position == fCurrentEntity.count) { 1790 load(0, true); 1791 } 1792 } 1793 1794 } 1795 if (DEBUG_BUFFER) { 1796 System.out.print(")load, "+offset+": "); 1797 print(); 1798 System.out.println(); 1799 } 1800 1801 return entityChanged; 1802 1803 } // load(int, boolean):boolean 1804 1805 /** 1806 * Creates a reader capable of reading the given input stream in 1807 * the specified encoding. 1808 * 1809 * @param inputStream The input stream. 1810 * @param encoding The encoding name that the input stream is 1811 * encoded using. If the user has specified that 1812 * Java encoding names are allowed, then the 1813 * encoding name may be a Java encoding name; 1814 * otherwise, it is an ianaEncoding name. 1815 * @param isBigEndian For encodings (like uCS-4), whose names cannot 1816 * specify a byte order, this tells whether the order is bigEndian. null menas 1817 * unknown or not relevant. 1818 * 1819 * @return Returns a reader. 1820 */ 1821 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 1822 throws IOException { 1823 1824 // normalize encoding name 1825 if (encoding == null) { 1826 encoding = "UTF-8"; 1827 } 1828 1829 // try to use an optimized reader 1830 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 1831 if (ENCODING.equals("UTF-8")) { 1832 if (DEBUG_ENCODINGS) { 1833 System.out.println("$$$ creating UTF8Reader"); 1834 } 1835 return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 1836 } 1837 if (ENCODING.equals("US-ASCII")) { 1838 if (DEBUG_ENCODINGS) { 1839 System.out.println("$$$ creating ASCIIReader"); 1840 } 1841 return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1842 } 1843 if(ENCODING.equals("ISO-10646-UCS-4")) { 1844 if(isBigEndian != null) { 1845 boolean isBE = isBigEndian.booleanValue(); 1846 if(isBE) { 1847 return new UCSReader(inputStream, UCSReader.UCS4BE); 1848 } else { 1849 return new UCSReader(inputStream, UCSReader.UCS4LE); 1850 } 1851 } else { 1852 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1853 "EncodingByteOrderUnsupported", 1854 new Object[] { encoding }, 1855 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1856 } 1857 } 1858 if(ENCODING.equals("ISO-10646-UCS-2")) { 1859 if(isBigEndian != null) { // sould never happen with this encoding... 1860 boolean isBE = isBigEndian.booleanValue(); 1861 if(isBE) { 1862 return new UCSReader(inputStream, UCSReader.UCS2BE); 1863 } else { 1864 return new UCSReader(inputStream, UCSReader.UCS2LE); 1865 } 1866 } else { 1867 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1868 "EncodingByteOrderUnsupported", 1869 new Object[] { encoding }, 1870 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1871 } 1872 } 1873 1874 // check for valid name 1875 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 1876 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 1877 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 1878 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1879 "EncodingDeclInvalid", 1880 new Object[] { encoding }, 1881 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1882 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 1883 // because every byte is a valid ISO Latin 1 character. 1884 // It may not translate correctly but if we failed on 1885 // the encoding anyway, then we're expecting the content 1886 // of the document to be bad. This will just prevent an 1887 // invalid UTF-8 sequence to be detected. This is only 1888 // important when continue-after-fatal-error is turned 1889 // on. -Ac 1890 encoding = "ISO-8859-1"; 1891 } 1892 1893 // try to use a Java reader 1894 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 1895 if (javaEncoding == null) { 1896 if(fAllowJavaEncodings) { 1897 javaEncoding = encoding; 1898 } else { 1899 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1900 "EncodingDeclInvalid", 1901 new Object[] { encoding }, 1902 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1903 // see comment above. 1904 javaEncoding = "ISO8859_1"; 1905 } 1906 } 1907 else if (javaEncoding.equals("ASCII")) { 1908 if (DEBUG_ENCODINGS) { 1909 System.out.println("$$$ creating ASCIIReader"); 1910 } 1911 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1912 } 1913 1914 if (DEBUG_ENCODINGS) { 1915 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 1916 if (javaEncoding == encoding) { 1917 System.out.print(" (IANA encoding)"); 1918 } 1919 System.out.println(); 1920 } 1921 return new InputStreamReader(inputStream, javaEncoding); 1922 1923 } // createReader(InputStream,String, Boolean): Reader 1924 1925 /** 1926 * Returns the IANA encoding name that is auto-detected from 1927 * the bytes specified, with the endian-ness of that encoding where appropriate. 1928 * 1929 * @param b4 The first four bytes of the input. 1930 * @param count The number of bytes actually read. 1931 * @return a 2-element array: the first element, an IANA-encoding string, 1932 * the second element a Boolean which is true iff the document is big endian, false 1933 * if it's little-endian, and null if the distinction isn't relevant. 1934 */ 1935 protected Object[] getEncodingName(byte[] b4, int count) { 1936 1937 if (count < 2) { 1938 return new Object[]{"UTF-8", null}; 1939 } 1940 1941 // UTF-16, with BOM 1942 int b0 = b4[0] & 0xFF; 1943 int b1 = b4[1] & 0xFF; 1944 if (b0 == 0xFE && b1 == 0xFF) { 1945 // UTF-16, big-endian 1946 return new Object [] {"UTF-16BE", new Boolean(true)}; 1947 } 1948 if (b0 == 0xFF && b1 == 0xFE) { 1949 // UTF-16, little-endian 1950 return new Object [] {"UTF-16LE", new Boolean(false)}; 1951 } 1952 1953 // default to UTF-8 if we don't have enough bytes to make a 1954 // good determination of the encoding 1955 if (count < 3) { 1956 return new Object [] {"UTF-8", null}; 1957 } 1958 1959 // UTF-8 with a BOM 1960 int b2 = b4[2] & 0xFF; 1961 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 1962 return new Object [] {"UTF-8", null}; 1963 } 1964 1965 // default to UTF-8 if we don't have enough bytes to make a 1966 // good determination of the encoding 1967 if (count < 4) { 1968 return new Object [] {"UTF-8", null}; 1969 } 1970 1971 // other encodings 1972 int b3 = b4[3] & 0xFF; 1973 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 1974 // UCS-4, big endian (1234) 1975 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 1976 } 1977 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 1978 // UCS-4, little endian (4321) 1979 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 1980 } 1981 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 1982 // UCS-4, unusual octet order (2143) 1983 // REVISIT: What should this be? 1984 return new Object [] {"ISO-10646-UCS-4", null}; 1985 } 1986 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 1987 // UCS-4, unusual octect order (3412) 1988 // REVISIT: What should this be? 1989 return new Object [] {"ISO-10646-UCS-4", null}; 1990 } 1991 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 1992 // UTF-16, big-endian, no BOM 1993 // (or could turn out to be UCS-2... 1994 // REVISIT: What should this be? 1995 return new Object [] {"UTF-16BE", new Boolean(true)}; 1996 } 1997 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 1998 // UTF-16, little-endian, no BOM 1999 // (or could turn out to be UCS-2... 2000 return new Object [] {"UTF-16LE", new Boolean(false)}; 2001 } 2002 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2003 // EBCDIC 2004 // a la xerces1, return CP037 instead of EBCDIC here 2005 return new Object [] {"CP037", null}; 2006 } 2007 2008 // default encoding 2009 return new Object [] {"UTF-8", null}; 2010 2011 } // getEncodingName(byte[],int):Object[] 2012 2013 /** 2014 * xxx not removing endEntity() so that i remember that we need to implement it. 2015 * Ends an entity. 2016 * 2017 * @throws XNIException Thrown by entity handler to signal an error. 2018 */ 2019 // 2020 /** Prints the contents of the buffer. */ 2021 final void print() { 2022 if (DEBUG_BUFFER) { 2023 if (fCurrentEntity != null) { 2024 System.out.print('['); 2025 System.out.print(fCurrentEntity.count); 2026 System.out.print(' '); 2027 System.out.print(fCurrentEntity.position); 2028 if (fCurrentEntity.count > 0) { 2029 System.out.print(" \""); 2030 for (int i = 0; i < fCurrentEntity.count; i++) { 2031 if (i == fCurrentEntity.position) { 2032 System.out.print('^'); 2033 } 2034 char c = fCurrentEntity.ch[i]; 2035 switch (c) { 2036 case '\n': { 2037 System.out.print("\\n"); 2038 break; 2039 } 2040 case '\r': { 2041 System.out.print("\\r"); 2042 break; 2043 } 2044 case '\t': { 2045 System.out.print("\\t"); 2046 break; 2047 } 2048 case '\\': { 2049 System.out.print("\\\\"); 2050 break; 2051 } 2052 default: { 2053 System.out.print(c); 2054 } 2055 } 2056 } 2057 if (fCurrentEntity.position == fCurrentEntity.count) { 2058 System.out.print('^'); 2059 } 2060 System.out.print('"'); 2061 } 2062 System.out.print(']'); 2063 System.out.print(" @ "); 2064 System.out.print(fCurrentEntity.lineNumber); 2065 System.out.print(','); 2066 System.out.print(fCurrentEntity.columnNumber); 2067 } else { 2068 System.out.print("*NO CURRENT ENTITY*"); 2069 } 2070 } 2071 } 2072 2073 /** 2074 * Registers the listener object and provides callback. 2075 * @param listener listener to which call back should be provided when scanner buffer 2076 * is being changed. 2077 */ 2078 public void registerListener(XMLBufferListener listener) { 2079 if(!listeners.contains(listener)) 2080 listeners.add(listener); 2081 } 2082 2083 /** 2084 * 2085 * @param loadPos Starting position from which new data is being loaded into scanner buffer. 2086 */ 2087 private void invokeListeners(int loadPos){ 2088 for(int i=0;i<listeners.size();i++){ 2089 XMLBufferListener listener =(XMLBufferListener) listeners.get(i); 2090 listener.refresh(loadPos); 2091 } 2092 } 2093 2094 /** 2095 * Skips space characters appearing immediately on the input that would 2096 * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 2097 * normalization is performed. This is useful when scanning structures 2098 * such as the XMLDecl and TextDecl that can only contain US-ASCII 2099 * characters. 2100 * <p> 2101 * <strong>Note:</strong> The characters are consumed only if they would 2102 * match non-terminal S before end of line normalization is performed. 2103 * 2104 * @return Returns true if at least one space character was skipped. 2105 * 2106 * @throws IOException Thrown if i/o error occurs. 2107 * @throws EOFException Thrown on end of file. 2108 * 2109 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 2110 */ 2111 public final boolean skipDeclSpaces() throws IOException { 2112 if (DEBUG_BUFFER) { 2113 System.out.print("(skipDeclSpaces: "); 2114 //XMLEntityManager.print(fCurrentEntity); 2115 System.out.println(); 2116 } 2117 2118 // load more characters, if needed 2119 if (fCurrentEntity.position == fCurrentEntity.count) { 2120 load(0, true); 2121 } 2122 2123 // skip spaces 2124 int c = fCurrentEntity.ch[fCurrentEntity.position]; 2125 if (XMLChar.isSpace(c)) { 2126 boolean external = fCurrentEntity.isExternal(); 2127 do { 2128 boolean entityChanged = false; 2129 // handle newlines 2130 if (c == '\n' || (external && c == '\r')) { 2131 fCurrentEntity.lineNumber++; 2132 fCurrentEntity.columnNumber = 1; 2133 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 2134 fCurrentEntity.ch[0] = (char)c; 2135 entityChanged = load(1, true); 2136 if (!entityChanged) 2137 // the load change the position to be 1, 2138 // need to restore it when entity not changed 2139 fCurrentEntity.position = 0; 2140 } 2141 if (c == '\r' && external) { 2142 // REVISIT: Does this need to be updated to fix the 2143 // #x0D ^#x0A newline normalization problem? -Ac 2144 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 2145 fCurrentEntity.position--; 2146 } 2147 } 2148 /*** NEWLINE NORMALIZATION *** 2149 * else { 2150 * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' 2151 * && external) { 2152 * fCurrentEntity.position++; 2153 * } 2154 * } 2155 * /***/ 2156 } else { 2157 fCurrentEntity.columnNumber++; 2158 } 2159 // load more characters, if needed 2160 if (!entityChanged) 2161 fCurrentEntity.position++; 2162 if (fCurrentEntity.position == fCurrentEntity.count) { 2163 load(0, true); 2164 } 2165 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 2166 if (DEBUG_BUFFER) { 2167 System.out.print(")skipDeclSpaces: "); 2168 // XMLEntityManager.print(fCurrentEntity); 2169 System.out.println(" -> true"); 2170 } 2171 return true; 2172 } 2173 2174 // no spaces were found 2175 if (DEBUG_BUFFER) { 2176 System.out.print(")skipDeclSpaces: "); 2177 //XMLEntityManager.print(fCurrentEntity); 2178 System.out.println(" -> false"); 2179 } 2180 return false; 2181 2182 } // skipDeclSpaces():boolean 2183 2184 2185 } // class XMLEntityScanner