1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 import java.io.EOFException; 24 import java.io.IOException; 25 import java.util.Locale; 26 import java.util.Vector; 27 28 import com.sun.xml.internal.stream.Entity; 29 import com.sun.xml.internal.stream.XMLBufferListener; 30 import java.io.InputStream; 31 import java.io.InputStreamReader; 32 import java.io.Reader; 33 34 35 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 36 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 37 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 38 39 40 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 41 import com.sun.org.apache.xerces.internal.util.EncodingMap; 42 43 import com.sun.org.apache.xerces.internal.util.SymbolTable; 44 import com.sun.org.apache.xerces.internal.util.XMLChar; 45 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 46 import com.sun.org.apache.xerces.internal.xni.QName; 47 import com.sun.org.apache.xerces.internal.xni.XMLString; 48 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 49 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 50 import com.sun.org.apache.xerces.internal.xni.*; 51 52 /** 53 * Implements the entity scanner methods. 54 * 55 * @author Neeraj Bajaj, Sun Microsystems 56 * @author Andy Clark, IBM 57 * @author Arnaud Le Hors, IBM 58 * @author K.Venugopal Sun Microsystems 59 * 60 */ 61 public class XMLEntityScanner implements XMLLocator { 62 63 64 protected Entity.ScannedEntity fCurrentEntity = null ; 65 protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; 66 67 protected XMLEntityManager fEntityManager ; 68 69 /** Debug switching readers for encodings. */ 70 private static final boolean DEBUG_ENCODINGS = false; 71 /** Listeners which should know when load is being called */ 72 private Vector listeners = new Vector(); 73 74 private static final boolean [] VALID_NAMES = new boolean[127]; 75 76 /** 77 * Debug printing of buffer. This debugging flag works best when you 78 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 79 * 64 characters. 80 */ 81 private static final boolean DEBUG_BUFFER = false; 82 private static final boolean DEBUG_SKIP_STRING = false; 83 /** 84 * To signal the end of the document entity, this exception will be thrown. 85 */ 86 private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { 87 private static final long serialVersionUID = 980337771224675268L; 88 public Throwable fillInStackTrace() { 89 return this; 90 } 91 }; 92 93 protected SymbolTable fSymbolTable = null; 94 protected XMLErrorReporter fErrorReporter = null; 95 int [] whiteSpaceLookup = new int[100]; 96 int whiteSpaceLen = 0; 97 boolean whiteSpaceInfoNeeded = true; 98 99 /** 100 * Allow Java encoding names. This feature identifier is: 101 * http://apache.org/xml/features/allow-java-encodings 102 */ 103 protected boolean fAllowJavaEncodings; 104 105 //Will be used only during internal subsets. 106 //for appending data. 107 108 /** Property identifier: symbol table. */ 109 protected static final String SYMBOL_TABLE = 110 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 111 112 /** Property identifier: error reporter. */ 113 protected static final String ERROR_REPORTER = 114 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 115 116 /** Feature identifier: allow Java encodings. */ 117 protected static final String ALLOW_JAVA_ENCODINGS = 118 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 119 120 protected PropertyManager fPropertyManager = null ; 121 122 boolean isExternal = false; 123 static { 124 125 for(int i=0x0041;i<=0x005A ; i++){ 126 VALID_NAMES[i]=true; 127 } 128 for(int i=0x0061;i<=0x007A; i++){ 129 VALID_NAMES[i]=true; 130 } 131 for(int i=0x0030;i<=0x0039; i++){ 132 VALID_NAMES[i]=true; 133 } 134 VALID_NAMES[45]=true; 135 VALID_NAMES[46]=true; 136 VALID_NAMES[58]=true; 137 VALID_NAMES[95]=true; 138 } 139 // SAPJVM: Remember, that the XML version has explicitly been set, 140 // so that XMLStreamReader.getVersion() can find that out. 141 boolean xmlVersionSetExplicitly = false; 142 // 143 // Constructors 144 // 145 146 /** Default constructor. */ 147 public XMLEntityScanner() { 148 } // <init>() 149 150 151 /** private constructor, this class can only be instantiated within this class. Instance of this class should 152 * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) 153 * @see getEntityScanner() 154 * @see getEntityScanner(ScannedEntity) 155 */ 156 public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { 157 fEntityManager = entityManager ; 158 reset(propertyManager); 159 } // <init>() 160 161 162 // set buffer size: 163 public final void setBufferSize(int size) { 164 // REVISIT: Buffer size passed to entity scanner 165 // was not being kept in synch with the actual size 166 // of the buffers in each scanned entity. If any 167 // of the buffers were actually resized, it was possible 168 // that the parser would throw an ArrayIndexOutOfBoundsException 169 // for documents which contained names which are longer than 170 // the current buffer size. Conceivably the buffer size passed 171 // to entity scanner could be used to determine a minimum size 172 // for resizing, if doubling its size is smaller than this 173 // minimum. -- mrglavas 174 fBufferSize = size; 175 } 176 177 /** 178 * Resets the components. 179 */ 180 public void reset(PropertyManager propertyManager){ 181 fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; 182 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; 183 fCurrentEntity = null; 184 whiteSpaceLen = 0; 185 whiteSpaceInfoNeeded = true; 186 listeners.clear(); 187 } 188 189 /** 190 * Resets the component. The component can query the component manager 191 * about any features and properties that affect the operation of the 192 * component. 193 * 194 * @param componentManager The component manager. 195 * 196 * @throws SAXException Thrown by component on initialization error. 197 * For example, if a feature or property is 198 * required for the operation of the component, the 199 * component manager may throw a 200 * SAXNotRecognizedException or a 201 * SAXNotSupportedException. 202 */ 203 public void reset(XMLComponentManager componentManager) 204 throws XMLConfigurationException { 205 206 //System.out.println(" this is being called"); 207 // xerces features 208 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 209 210 //xerces properties 211 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 212 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 213 fCurrentEntity = null; 214 whiteSpaceLen = 0; 215 whiteSpaceInfoNeeded = true; 216 listeners.clear(); 217 } // reset(XMLComponentManager) 218 219 220 public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, 221 XMLErrorReporter reporter) { 222 fCurrentEntity = null; 223 fSymbolTable = symbolTable; 224 fEntityManager = entityManager; 225 fErrorReporter = reporter; 226 } 227 228 /** 229 * Returns the XML version of the current entity. This will normally be the 230 * value from the XML or text declaration or defaulted by the parser. Note that 231 * that this value may be different than the version of the processing rules 232 * applied to the current entity. For instance, an XML 1.1 document may refer to 233 * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire 234 * document. Also note that, for a given entity, this value can only be considered 235 * final once the XML or text declaration has been read or once it has been 236 * determined that there is no such declaration. 237 */ 238 public final String getXMLVersion() { 239 if (fCurrentEntity != null) { 240 return fCurrentEntity.xmlVersion; 241 } 242 return null; 243 } // getXMLVersion():String 244 245 /** 246 * Sets the XML version. This method is used by the 247 * scanners to report the value of the version pseudo-attribute 248 * in an XML or text declaration. 249 * 250 * @param xmlVersion the XML version of the current entity 251 */ 252 public final void setXMLVersion(String xmlVersion) { 253 xmlVersionSetExplicitly = true; // SAPJVM 254 fCurrentEntity.xmlVersion = xmlVersion; 255 } // setXMLVersion(String) 256 257 258 /** set the instance of current scanned entity. 259 * @param ScannedEntity 260 */ 261 262 public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ 263 fCurrentEntity = scannedEntity ; 264 if(fCurrentEntity != null){ 265 isExternal = fCurrentEntity.isExternal(); 266 if(DEBUG_BUFFER) 267 System.out.println("Current Entity is "+scannedEntity.name); 268 } 269 } 270 271 public Entity.ScannedEntity getCurrentEntity(){ 272 return fCurrentEntity ; 273 } 274 // 275 // XMLEntityReader methods 276 // 277 278 /** 279 * Returns the base system identifier of the currently scanned 280 * entity, or null if none is available. 281 */ 282 public final String getBaseSystemId() { 283 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 284 } // getBaseSystemId():String 285 286 /** 287 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) 288 */ 289 public void setBaseSystemId(String systemId) { 290 //no-op 291 } 292 293 ///////////// Locator methods start. 294 public final int getLineNumber(){ 295 //if the entity is closed, we should return -1 296 //xxx at first place why such call should be there... 297 return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; 298 } 299 300 /** 301 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) 302 */ 303 public void setLineNumber(int line) { 304 //no-op 305 } 306 307 308 public final int getColumnNumber(){ 309 //if the entity is closed, we should return -1 310 //xxx at first place why such call should be there... 311 return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; 312 } 313 314 /** 315 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) 316 */ 317 public void setColumnNumber(int col) { 318 // no-op 319 } 320 321 322 public final int getCharacterOffset(){ 323 return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; 324 } 325 326 /** Returns the expanded system identifier. */ 327 public final String getExpandedSystemId() { 328 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 329 } 330 331 /** 332 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) 333 */ 334 public void setExpandedSystemId(String systemId) { 335 //no-op 336 } 337 338 /** Returns the literal system identifier. */ 339 public final String getLiteralSystemId() { 340 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; 341 } 342 343 /** 344 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) 345 */ 346 public void setLiteralSystemId(String systemId) { 347 //no-op 348 } 349 350 /** Returns the public identifier. */ 351 public final String getPublicId() { 352 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 353 } 354 355 /** 356 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) 357 */ 358 public void setPublicId(String publicId) { 359 //no-op 360 } 361 362 ///////////////// Locator methods finished. 363 364 /** the version of the current entity being scanned */ 365 public void setVersion(String version){ 366 fCurrentEntity.version = version; 367 } 368 369 public String getVersion(){ 370 if (fCurrentEntity != null) 371 return fCurrentEntity.version ; 372 return null; 373 } 374 375 /** 376 * Returns the encoding of the current entity. 377 * Note that, for a given entity, this value can only be 378 * considered final once the encoding declaration has been read (or once it 379 * has been determined that there is no such declaration) since, no encoding 380 * having been specified on the XMLInputSource, the parser 381 * will make an initial "guess" which could be in error. 382 */ 383 public final String getEncoding() { 384 if (fCurrentEntity != null) { 385 return fCurrentEntity.encoding; 386 } 387 return null; 388 } // getEncoding():String 389 390 /** 391 * Sets the encoding of the scanner. This method is used by the 392 * scanners if the XMLDecl or TextDecl line contains an encoding 393 * pseudo-attribute. 394 * <p> 395 * <strong>Note:</strong> The underlying character reader on the 396 * current entity will be changed to accomodate the new encoding. 397 * However, the new encoding is ignored if the current reader was 398 * not constructed from an input stream (e.g. an external entity 399 * that is resolved directly to the appropriate java.io.Reader 400 * object). 401 * 402 * @param encoding The IANA encoding name of the new encoding. 403 * 404 * @throws IOException Thrown if the new encoding is not supported. 405 * 406 * @see com.sun.org.apache.xerces.internal.util.EncodingMap 407 */ 408 public final void setEncoding(String encoding) throws IOException { 409 410 if (DEBUG_ENCODINGS) { 411 System.out.println("$$$ setEncoding: "+encoding); 412 } 413 414 if (fCurrentEntity.stream != null) { 415 // if the encoding is the same, don't change the reader and 416 // re-use the original reader used by the OneCharReader 417 // NOTE: Besides saving an object, this overcomes deficiencies 418 // in the UTF-16 reader supplied with the standard Java 419 // distribution (up to and including 1.3). The UTF-16 420 // decoder buffers 8K blocks even when only asked to read 421 // a single char! -Ac 422 if (fCurrentEntity.encoding == null || 423 !fCurrentEntity.encoding.equals(encoding)) { 424 // UTF-16 is a bit of a special case. If the encoding is UTF-16, 425 // and we know the endian-ness, we shouldn't change readers. 426 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce 427 // the endian-ness from the encoding we presently have. 428 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { 429 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 430 if(ENCODING.equals("UTF-16")) return; 431 if(ENCODING.equals("ISO-10646-UCS-4")) { 432 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 433 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); 434 } else { 435 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); 436 } 437 return; 438 } 439 if(ENCODING.equals("ISO-10646-UCS-2")) { 440 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 441 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); 442 } else { 443 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); 444 } 445 return; 446 } 447 } 448 // wrap a new reader around the input stream, changing 449 // the encoding 450 if (DEBUG_ENCODINGS) { 451 System.out.println("$$$ creating new reader from stream: "+ 452 fCurrentEntity.stream); 453 } 454 //fCurrentEntity.stream.reset(); 455 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); 456 fCurrentEntity.encoding = encoding; 457 458 } else { 459 if (DEBUG_ENCODINGS) 460 System.out.println("$$$ reusing old reader on stream"); 461 } 462 } 463 464 } // setEncoding(String) 465 466 /** Returns true if the current entity being scanned is external. */ 467 public final boolean isExternal() { 468 return fCurrentEntity.isExternal(); 469 } // isExternal():boolean 470 471 public int getChar(int relative) throws IOException{ 472 if(arrangeCapacity(relative + 1, false)){ 473 return fCurrentEntity.ch[fCurrentEntity.position + relative]; 474 }else{ 475 return -1; 476 } 477 }//getChar() 478 479 /** 480 * Returns the next character on the input. 481 * <p> 482 * <strong>Note:</strong> The character is <em>not</em> consumed. 483 * 484 * @throws IOException Thrown if i/o error occurs. 485 * @throws EOFException Thrown on end of file. 486 */ 487 public int peekChar() throws IOException { 488 if (DEBUG_BUFFER) { 489 System.out.print("(peekChar: "); 490 print(); 491 System.out.println(); 492 } 493 494 // load more characters, if needed 495 if (fCurrentEntity.position == fCurrentEntity.count) { 496 invokeListeners(0); 497 load(0, true); 498 } 499 500 // peek at character 501 int c = fCurrentEntity.ch[fCurrentEntity.position]; 502 503 // return peeked character 504 if (DEBUG_BUFFER) { 505 System.out.print(")peekChar: "); 506 print(); 507 if (isExternal) { 508 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); 509 } else { 510 System.out.println(" -> '"+(char)c+"'"); 511 } 512 } 513 if (isExternal) { 514 return c != '\r' ? c : '\n'; 515 } else { 516 return c; 517 } 518 519 } // peekChar():int 520 521 /** 522 * Returns the next character on the input. 523 * <p> 524 * <strong>Note:</strong> The character is consumed. 525 * 526 * @throws IOException Thrown if i/o error occurs. 527 * @throws EOFException Thrown on end of file. 528 */ 529 public int scanChar() throws IOException { 530 if (DEBUG_BUFFER) { 531 System.out.print("(scanChar: "); 532 print(); 533 System.out.println(); 534 } 535 536 // load more characters, if needed 537 if (fCurrentEntity.position == fCurrentEntity.count) { 538 invokeListeners(0); 539 load(0, true); 540 } 541 542 // scan character 543 int c = fCurrentEntity.ch[fCurrentEntity.position++]; 544 if (c == '\n' || 545 (c == '\r' && isExternal)) { 546 fCurrentEntity.lineNumber++; 547 fCurrentEntity.columnNumber = 1; 548 if (fCurrentEntity.position == fCurrentEntity.count) { 549 invokeListeners(1); 550 fCurrentEntity.ch[0] = (char)c; 551 load(1, false); 552 } 553 if (c == '\r' && isExternal) { 554 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { 555 fCurrentEntity.position--; 556 } 557 c = '\n'; 558 } 559 } 560 561 // return character that was scanned 562 if (DEBUG_BUFFER) { 563 System.out.print(")scanChar: "); 564 print(); 565 System.out.println(" -> '"+(char)c+"'"); 566 } 567 fCurrentEntity.columnNumber++; 568 return c; 569 570 } // scanChar():int 571 572 /** 573 * Returns a string matching the NMTOKEN production appearing immediately 574 * on the input as a symbol, or null if NMTOKEN Name string is present. 575 * <p> 576 * <strong>Note:</strong> The NMTOKEN characters are consumed. 577 * <p> 578 * <strong>Note:</strong> The string returned must be a symbol. The 579 * SymbolTable can be used for this purpose. 580 * 581 * @throws IOException Thrown if i/o error occurs. 582 * @throws EOFException Thrown on end of file. 583 * 584 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 585 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 586 */ 587 public String scanNmtoken() throws IOException { 588 if (DEBUG_BUFFER) { 589 System.out.print("(scanNmtoken: "); 590 print(); 591 System.out.println(); 592 } 593 594 // load more characters, if needed 595 if (fCurrentEntity.position == fCurrentEntity.count) { 596 invokeListeners(0); 597 load(0, true); 598 } 599 600 // scan nmtoken 601 int offset = fCurrentEntity.position; 602 boolean vc = false; 603 char c; 604 while (true){ 605 //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { 606 c = fCurrentEntity.ch[fCurrentEntity.position]; 607 if(c < 127){ 608 vc = VALID_NAMES[c]; 609 }else{ 610 vc = XMLChar.isName(c); 611 } 612 if(!vc)break; 613 614 if (++fCurrentEntity.position == fCurrentEntity.count) { 615 int length = fCurrentEntity.position - offset; 616 invokeListeners(length); 617 if (length == fCurrentEntity.fBufferSize) { 618 // bad luck we have to resize our buffer 619 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 620 System.arraycopy(fCurrentEntity.ch, offset, 621 tmp, 0, length); 622 fCurrentEntity.ch = tmp; 623 fCurrentEntity.fBufferSize *= 2; 624 } else { 625 System.arraycopy(fCurrentEntity.ch, offset, 626 fCurrentEntity.ch, 0, length); 627 } 628 offset = 0; 629 if (load(length, false)) { 630 break; 631 } 632 } 633 } 634 int length = fCurrentEntity.position - offset; 635 fCurrentEntity.columnNumber += length; 636 637 // return nmtoken 638 String symbol = null; 639 if (length > 0) { 640 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 641 } 642 if (DEBUG_BUFFER) { 643 System.out.print(")scanNmtoken: "); 644 print(); 645 System.out.println(" -> "+String.valueOf(symbol)); 646 } 647 return symbol; 648 649 } // scanNmtoken():String 650 651 /** 652 * Returns a string matching the Name production appearing immediately 653 * on the input as a symbol, or null if no Name string is present. 654 * <p> 655 * <strong>Note:</strong> The Name characters are consumed. 656 * <p> 657 * <strong>Note:</strong> The string returned must be a symbol. The 658 * SymbolTable can be used for this purpose. 659 * 660 * @throws IOException Thrown if i/o error occurs. 661 * @throws EOFException Thrown on end of file. 662 * 663 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 664 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 665 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 666 */ 667 public String scanName() throws IOException { 668 if (DEBUG_BUFFER) { 669 System.out.print("(scanName: "); 670 print(); 671 System.out.println(); 672 } 673 674 // load more characters, if needed 675 if (fCurrentEntity.position == fCurrentEntity.count) { 676 invokeListeners(0); 677 load(0, true); 678 } 679 680 // scan name 681 int offset = fCurrentEntity.position; 682 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 683 if (++fCurrentEntity.position == fCurrentEntity.count) { 684 invokeListeners(1); 685 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 686 offset = 0; 687 if (load(1, false)) { 688 fCurrentEntity.columnNumber++; 689 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 690 691 if (DEBUG_BUFFER) { 692 System.out.print(")scanName: "); 693 print(); 694 System.out.println(" -> "+String.valueOf(symbol)); 695 } 696 return symbol; 697 } 698 } 699 boolean vc =false; 700 while (true ){ 701 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 702 char c = fCurrentEntity.ch[fCurrentEntity.position]; 703 if(c < 127){ 704 vc = VALID_NAMES[c]; 705 }else{ 706 vc = XMLChar.isName(c); 707 } 708 if(!vc)break; 709 if (++fCurrentEntity.position == fCurrentEntity.count) { 710 int length = fCurrentEntity.position - offset; 711 invokeListeners(length); 712 if (length == fCurrentEntity.fBufferSize) { 713 // bad luck we have to resize our buffer 714 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 715 System.arraycopy(fCurrentEntity.ch, offset, 716 tmp, 0, length); 717 fCurrentEntity.ch = tmp; 718 fCurrentEntity.fBufferSize *= 2; 719 } else { 720 System.arraycopy(fCurrentEntity.ch, offset, 721 fCurrentEntity.ch, 0, length); 722 } 723 offset = 0; 724 if (load(length, false)) { 725 break; 726 } 727 } 728 } 729 } 730 int length = fCurrentEntity.position - offset; 731 fCurrentEntity.columnNumber += length; 732 733 // return name 734 String symbol; 735 if (length > 0) { 736 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 737 } else 738 symbol = null; 739 if (DEBUG_BUFFER) { 740 System.out.print(")scanName: "); 741 print(); 742 System.out.println(" -> "+String.valueOf(symbol)); 743 } 744 return symbol; 745 746 } // scanName():String 747 748 /** 749 * Scans a qualified name from the input, setting the fields of the 750 * QName structure appropriately. 751 * <p> 752 * <strong>Note:</strong> The qualified name characters are consumed. 753 * <p> 754 * <strong>Note:</strong> The strings used to set the values of the 755 * QName structure must be symbols. The SymbolTable can be used for 756 * this purpose. 757 * 758 * @param qname The qualified name structure to fill. 759 * 760 * @return Returns true if a qualified name appeared immediately on 761 * the input and was scanned, false otherwise. 762 * 763 * @throws IOException Thrown if i/o error occurs. 764 * @throws EOFException Thrown on end of file. 765 * 766 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 767 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 768 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 769 */ 770 public boolean scanQName(QName qname) throws IOException { 771 if (DEBUG_BUFFER) { 772 System.out.print("(scanQName, "+qname+": "); 773 print(); 774 System.out.println(); 775 } 776 777 // load more characters, if needed 778 if (fCurrentEntity.position == fCurrentEntity.count) { 779 invokeListeners(0); 780 load(0, true); 781 } 782 783 // scan qualified name 784 int offset = fCurrentEntity.position; 785 786 //making a check if if the specified character is a valid name start character 787 //as defined by production [5] in the XML 1.0 specification. 788 // Name ::= (Letter | '_' | ':') (NameChar)* 789 790 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 791 if (++fCurrentEntity.position == fCurrentEntity.count) { 792 invokeListeners(1); 793 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 794 offset = 0; 795 796 if (load(1, false)) { 797 fCurrentEntity.columnNumber++; 798 //adding into symbol table. 799 //XXX We are trying to add single character in SymbolTable?????? 800 String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 801 qname.setValues(null, name, name, null); 802 if (DEBUG_BUFFER) { 803 System.out.print(")scanQName, "+qname+": "); 804 print(); 805 System.out.println(" -> true"); 806 } 807 return true; 808 } 809 } 810 int index = -1; 811 boolean vc = false; 812 while ( true){ 813 814 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 815 char c = fCurrentEntity.ch[fCurrentEntity.position]; 816 if(c < 127){ 817 vc = VALID_NAMES[c]; 818 }else{ 819 vc = XMLChar.isName(c); 820 } 821 if(!vc)break; 822 if (c == ':') { 823 if (index != -1) { 824 break; 825 } 826 index = fCurrentEntity.position; 827 } 828 if (++fCurrentEntity.position == fCurrentEntity.count) { 829 int length = fCurrentEntity.position - offset; 830 invokeListeners(length); 831 if (length == fCurrentEntity.fBufferSize) { 832 // bad luck we have to resize our buffer 833 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 834 System.arraycopy(fCurrentEntity.ch, offset, 835 tmp, 0, length); 836 fCurrentEntity.ch = tmp; 837 fCurrentEntity.fBufferSize *= 2; 838 } else { 839 System.arraycopy(fCurrentEntity.ch, offset, 840 fCurrentEntity.ch, 0, length); 841 } 842 if (index != -1) { 843 index = index - offset; 844 } 845 offset = 0; 846 if (load(length, false)) { 847 break; 848 } 849 } 850 } 851 int length = fCurrentEntity.position - offset; 852 fCurrentEntity.columnNumber += length; 853 if (length > 0) { 854 String prefix = null; 855 String localpart = null; 856 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, 857 offset, length); 858 859 if (index != -1) { 860 int prefixLength = index - offset; 861 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, 862 offset, prefixLength); 863 int len = length - prefixLength - 1; 864 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, 865 index + 1, len); 866 867 } else { 868 localpart = rawname; 869 } 870 qname.setValues(prefix, localpart, rawname, null); 871 if (DEBUG_BUFFER) { 872 System.out.print(")scanQName, "+qname+": "); 873 print(); 874 System.out.println(" -> true"); 875 } 876 return true; 877 } 878 } 879 880 // no qualified name found 881 if (DEBUG_BUFFER) { 882 System.out.print(")scanQName, "+qname+": "); 883 print(); 884 System.out.println(" -> false"); 885 } 886 return false; 887 888 } // scanQName(QName):boolean 889 890 /** 891 * CHANGED: 892 * Scans a range of parsed character data, This function appends the character data to 893 * the supplied buffer. 894 * <p> 895 * <strong>Note:</strong> The characters are consumed. 896 * <p> 897 * <strong>Note:</strong> This method does not guarantee to return 898 * the longest run of parsed character data. This method may return 899 * before markup due to reaching the end of the input buffer or any 900 * other reason. 901 * <p> 902 * 903 * @param content The content structure to fill. 904 * 905 * @return Returns the next character on the input, if known. This 906 * value may be -1 but this does <em>note</em> designate 907 * end of file. 908 * 909 * @throws IOException Thrown if i/o error occurs. 910 * @throws EOFException Thrown on end of file. 911 */ 912 public int scanContent(XMLString content) throws IOException { 913 if (DEBUG_BUFFER) { 914 System.out.print("(scanContent: "); 915 print(); 916 System.out.println(); 917 } 918 919 // load more characters, if needed 920 if (fCurrentEntity.position == fCurrentEntity.count) { 921 invokeListeners(0); 922 load(0, true); 923 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 924 invokeListeners(0); 925 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 926 load(1, false); 927 fCurrentEntity.position = 0; 928 } 929 930 // normalize newlines 931 int offset = fCurrentEntity.position; 932 int c = fCurrentEntity.ch[offset]; 933 int newlines = 0; 934 if (c == '\n' || (c == '\r' && isExternal)) { 935 if (DEBUG_BUFFER) { 936 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 937 print(); 938 System.out.println(); 939 } 940 do { 941 c = fCurrentEntity.ch[fCurrentEntity.position++]; 942 if (c == '\r' && isExternal) { 943 newlines++; 944 fCurrentEntity.lineNumber++; 945 fCurrentEntity.columnNumber = 1; 946 if (fCurrentEntity.position == fCurrentEntity.count) { 947 offset = 0; 948 invokeListeners(newlines); 949 fCurrentEntity.position = newlines; 950 if (load(newlines, false)) { 951 break; 952 } 953 } 954 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 955 fCurrentEntity.position++; 956 offset++; 957 } 958 /*** NEWLINE NORMALIZATION ***/ 959 else { 960 newlines++; 961 } 962 } else if (c == '\n') { 963 newlines++; 964 fCurrentEntity.lineNumber++; 965 fCurrentEntity.columnNumber = 1; 966 if (fCurrentEntity.position == fCurrentEntity.count) { 967 offset = 0; 968 invokeListeners(newlines); 969 fCurrentEntity.position = newlines; 970 if (load(newlines, false)) { 971 break; 972 } 973 } 974 } else { 975 fCurrentEntity.position--; 976 break; 977 } 978 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 979 for (int i = offset; i < fCurrentEntity.position; i++) { 980 fCurrentEntity.ch[i] = '\n'; 981 } 982 int length = fCurrentEntity.position - offset; 983 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 984 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 985 //on buffering the data.. 986 content.setValues(fCurrentEntity.ch, offset, length); 987 //content.append(fCurrentEntity.ch, offset, length); 988 if (DEBUG_BUFFER) { 989 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 990 print(); 991 System.out.println(); 992 } 993 return -1; 994 } 995 if (DEBUG_BUFFER) { 996 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 997 print(); 998 System.out.println(); 999 } 1000 } 1001 1002 while (fCurrentEntity.position < fCurrentEntity.count) { 1003 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1004 if (!XMLChar.isContent(c)) { 1005 fCurrentEntity.position--; 1006 break; 1007 } 1008 } 1009 int length = fCurrentEntity.position - offset; 1010 fCurrentEntity.columnNumber += length - newlines; 1011 1012 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 1013 //on buffering the data.. 1014 content.setValues(fCurrentEntity.ch, offset, length); 1015 //content.append(fCurrentEntity.ch, offset, length); 1016 // return next character 1017 if (fCurrentEntity.position != fCurrentEntity.count) { 1018 c = fCurrentEntity.ch[fCurrentEntity.position]; 1019 // REVISIT: Does this need to be updated to fix the 1020 // #x0D ^#x0A newline normalization problem? -Ac 1021 if (c == '\r' && isExternal) { 1022 c = '\n'; 1023 } 1024 } else { 1025 c = -1; 1026 } 1027 if (DEBUG_BUFFER) { 1028 System.out.print(")scanContent: "); 1029 print(); 1030 System.out.println(" -> '"+(char)c+"'"); 1031 } 1032 return c; 1033 1034 } // scanContent(XMLString):int 1035 1036 /** 1037 * Scans a range of attribute value data, setting the fields of the 1038 * XMLString structure, appropriately. 1039 * <p> 1040 * <strong>Note:</strong> The characters are consumed. 1041 * <p> 1042 * <strong>Note:</strong> This method does not guarantee to return 1043 * the longest run of attribute value data. This method may return 1044 * before the quote character due to reaching the end of the input 1045 * buffer or any other reason. 1046 * <p> 1047 * <strong>Note:</strong> The fields contained in the XMLString 1048 * structure are not guaranteed to remain valid upon subsequent calls 1049 * to the entity scanner. Therefore, the caller is responsible for 1050 * immediately using the returned character data or making a copy of 1051 * the character data. 1052 * 1053 * @param quote The quote character that signifies the end of the 1054 * attribute value data. 1055 * @param content The content structure to fill. 1056 * 1057 * @return Returns the next character on the input, if known. This 1058 * value may be -1 but this does <em>note</em> designate 1059 * end of file. 1060 * 1061 * @throws IOException Thrown if i/o error occurs. 1062 * @throws EOFException Thrown on end of file. 1063 */ 1064 public int scanLiteral(int quote, XMLString content) 1065 throws IOException { 1066 if (DEBUG_BUFFER) { 1067 System.out.print("(scanLiteral, '"+(char)quote+"': "); 1068 print(); 1069 System.out.println(); 1070 } 1071 // load more characters, if needed 1072 if (fCurrentEntity.position == fCurrentEntity.count) { 1073 invokeListeners(0); 1074 load(0, true); 1075 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1076 invokeListeners(0); 1077 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 1078 1079 load(1, false); 1080 fCurrentEntity.position = 0; 1081 } 1082 1083 // normalize newlines 1084 int offset = fCurrentEntity.position; 1085 int c = fCurrentEntity.ch[offset]; 1086 int newlines = 0; 1087 if(whiteSpaceInfoNeeded) 1088 whiteSpaceLen=0; 1089 if (c == '\n' || (c == '\r' && isExternal)) { 1090 if (DEBUG_BUFFER) { 1091 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1092 print(); 1093 System.out.println(); 1094 } 1095 do { 1096 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1097 if (c == '\r' && isExternal) { 1098 newlines++; 1099 fCurrentEntity.lineNumber++; 1100 fCurrentEntity.columnNumber = 1; 1101 if (fCurrentEntity.position == fCurrentEntity.count) { 1102 invokeListeners(newlines); 1103 offset = 0; 1104 fCurrentEntity.position = newlines; 1105 if (load(newlines, false)) { 1106 break; 1107 } 1108 } 1109 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1110 fCurrentEntity.position++; 1111 offset++; 1112 } 1113 /*** NEWLINE NORMALIZATION ***/ 1114 else { 1115 newlines++; 1116 } 1117 /***/ 1118 } else if (c == '\n') { 1119 newlines++; 1120 fCurrentEntity.lineNumber++; 1121 fCurrentEntity.columnNumber = 1; 1122 if (fCurrentEntity.position == fCurrentEntity.count) { 1123 offset = 0; 1124 invokeListeners(newlines); 1125 fCurrentEntity.position = newlines; 1126 if (load(newlines, false)) { 1127 break; 1128 } 1129 } 1130 /*** NEWLINE NORMALIZATION *** 1131 * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' 1132 * && external) { 1133 * fCurrentEntity.position++; 1134 * offset++; 1135 * } 1136 * /***/ 1137 } else { 1138 fCurrentEntity.position--; 1139 break; 1140 } 1141 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1142 int i=0; 1143 for ( i = offset; i < fCurrentEntity.position; i++) { 1144 fCurrentEntity.ch[i] = '\n'; 1145 whiteSpaceLookup[whiteSpaceLen++]=i; 1146 } 1147 1148 int length = fCurrentEntity.position - offset; 1149 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1150 content.setValues(fCurrentEntity.ch, offset, length); 1151 if (DEBUG_BUFFER) { 1152 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1153 print(); 1154 System.out.println(); 1155 } 1156 return -1; 1157 } 1158 if (DEBUG_BUFFER) { 1159 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1160 print(); 1161 System.out.println(); 1162 } 1163 } 1164 1165 // scan literal value 1166 while (fCurrentEntity.position < fCurrentEntity.count) { 1167 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1168 if ((c == quote && 1169 (!fCurrentEntity.literal || isExternal)) 1170 || c == '%' || !XMLChar.isContent(c)) { 1171 fCurrentEntity.position--; 1172 break; 1173 } 1174 if(whiteSpaceInfoNeeded){ 1175 if(c == 0x20 || c == 0x9){ 1176 if(whiteSpaceLen < whiteSpaceLookup.length){ 1177 whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position-1; 1178 }else{ 1179 int [] tmp = new int[whiteSpaceLookup.length*2]; 1180 System.arraycopy(whiteSpaceLookup,0,tmp,0,whiteSpaceLookup.length); 1181 whiteSpaceLookup = tmp; 1182 whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position - 1; 1183 } 1184 } 1185 } 1186 } 1187 int length = fCurrentEntity.position - offset; 1188 fCurrentEntity.columnNumber += length - newlines; 1189 content.setValues(fCurrentEntity.ch, offset, length); 1190 1191 // return next character 1192 if (fCurrentEntity.position != fCurrentEntity.count) { 1193 c = fCurrentEntity.ch[fCurrentEntity.position]; 1194 // NOTE: We don't want to accidentally signal the 1195 // end of the literal if we're expanding an 1196 // entity appearing in the literal. -Ac 1197 if (c == quote && fCurrentEntity.literal) { 1198 c = -1; 1199 } 1200 } else { 1201 c = -1; 1202 } 1203 if (DEBUG_BUFFER) { 1204 System.out.print(")scanLiteral, '"+(char)quote+"': "); 1205 print(); 1206 System.out.println(" -> '"+(char)c+"'"); 1207 } 1208 return c; 1209 1210 } // scanLiteral(int,XMLString):int 1211 1212 //CHANGED: 1213 /** 1214 * Scans a range of character data up to the specified delimiter, 1215 * setting the fields of the XMLString structure, appropriately. 1216 * <p> 1217 * <strong>Note:</strong> The characters are consumed. 1218 * <p> 1219 * <strong>Note:</strong> This assumes that the length of the delimiter 1220 * and that the delimiter contains at least one character. 1221 * <p> 1222 * <strong>Note:</strong> This method does not guarantee to return 1223 * the longest run of character data. This method may return before 1224 * the delimiter due to reaching the end of the input buffer or any 1225 * other reason. 1226 * <p> 1227 * @param delimiter The string that signifies the end of the character 1228 * data to be scanned. 1229 * @param buffer The XMLStringBuffer to fill. 1230 * 1231 * @return Returns true if there is more data to scan, false otherwise. 1232 * 1233 * @throws IOException Thrown if i/o error occurs. 1234 * @throws EOFException Thrown on end of file. 1235 */ 1236 public boolean scanData(String delimiter, XMLStringBuffer buffer) 1237 throws IOException { 1238 1239 boolean done = false; 1240 int delimLen = delimiter.length(); 1241 char charAt0 = delimiter.charAt(0); 1242 do { 1243 if (DEBUG_BUFFER) { 1244 System.out.print("(scanData: "); 1245 print(); 1246 System.out.println(); 1247 } 1248 1249 // load more characters, if needed 1250 1251 if (fCurrentEntity.position == fCurrentEntity.count) { 1252 load(0, true); 1253 } 1254 1255 boolean bNextEntity = false; 1256 1257 while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) 1258 && (!bNextEntity)) 1259 { 1260 System.arraycopy(fCurrentEntity.ch, 1261 fCurrentEntity.position, 1262 fCurrentEntity.ch, 1263 0, 1264 fCurrentEntity.count - fCurrentEntity.position); 1265 1266 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false); 1267 fCurrentEntity.position = 0; 1268 fCurrentEntity.startPosition = 0; 1269 } 1270 1271 if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { 1272 // something must be wrong with the input: e.g., file ends in an unterminated comment 1273 int length = fCurrentEntity.count - fCurrentEntity.position; 1274 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 1275 fCurrentEntity.columnNumber += fCurrentEntity.count; 1276 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); 1277 fCurrentEntity.position = fCurrentEntity.count; 1278 fCurrentEntity.startPosition = fCurrentEntity.count; 1279 load(0, true); 1280 return false; 1281 } 1282 1283 // normalize newlines 1284 int offset = fCurrentEntity.position; 1285 int c = fCurrentEntity.ch[offset]; 1286 int newlines = 0; 1287 if (c == '\n' || (c == '\r' && isExternal)) { 1288 if (DEBUG_BUFFER) { 1289 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1290 print(); 1291 System.out.println(); 1292 } 1293 do { 1294 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1295 if (c == '\r' && isExternal) { 1296 newlines++; 1297 fCurrentEntity.lineNumber++; 1298 fCurrentEntity.columnNumber = 1; 1299 if (fCurrentEntity.position == fCurrentEntity.count) { 1300 offset = 0; 1301 invokeListeners(newlines); 1302 fCurrentEntity.position = newlines; 1303 if (load(newlines, false)) { 1304 break; 1305 } 1306 } 1307 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1308 fCurrentEntity.position++; 1309 offset++; 1310 } 1311 /*** NEWLINE NORMALIZATION ***/ 1312 else { 1313 newlines++; 1314 } 1315 } else if (c == '\n') { 1316 newlines++; 1317 fCurrentEntity.lineNumber++; 1318 fCurrentEntity.columnNumber = 1; 1319 if (fCurrentEntity.position == fCurrentEntity.count) { 1320 offset = 0; 1321 invokeListeners(newlines); 1322 fCurrentEntity.position = newlines; 1323 fCurrentEntity.count = newlines; 1324 if (load(newlines, false)) { 1325 break; 1326 } 1327 } 1328 } else { 1329 fCurrentEntity.position--; 1330 break; 1331 } 1332 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1333 for (int i = offset; i < fCurrentEntity.position; i++) { 1334 fCurrentEntity.ch[i] = '\n'; 1335 } 1336 int length = fCurrentEntity.position - offset; 1337 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1338 buffer.append(fCurrentEntity.ch, offset, length); 1339 if (DEBUG_BUFFER) { 1340 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1341 print(); 1342 System.out.println(); 1343 } 1344 return true; 1345 } 1346 if (DEBUG_BUFFER) { 1347 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1348 print(); 1349 System.out.println(); 1350 } 1351 } 1352 1353 // iterate over buffer looking for delimiter 1354 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { 1355 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1356 if (c == charAt0) { 1357 // looks like we just hit the delimiter 1358 int delimOffset = fCurrentEntity.position - 1; 1359 for (int i = 1; i < delimLen; i++) { 1360 if (fCurrentEntity.position == fCurrentEntity.count) { 1361 fCurrentEntity.position -= i; 1362 break OUTER; 1363 } 1364 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1365 if (delimiter.charAt(i) != c) { 1366 fCurrentEntity.position -= i; 1367 break; 1368 } 1369 } 1370 if (fCurrentEntity.position == delimOffset + delimLen) { 1371 done = true; 1372 break; 1373 } 1374 } else if (c == '\n' || (isExternal && c == '\r')) { 1375 fCurrentEntity.position--; 1376 break; 1377 } else if (XMLChar.isInvalid(c)) { 1378 fCurrentEntity.position--; 1379 int length = fCurrentEntity.position - offset; 1380 fCurrentEntity.columnNumber += length - newlines; 1381 buffer.append(fCurrentEntity.ch, offset, length); 1382 return true; 1383 } 1384 } 1385 int length = fCurrentEntity.position - offset; 1386 fCurrentEntity.columnNumber += length - newlines; 1387 if (done) { 1388 length -= delimLen; 1389 } 1390 buffer.append(fCurrentEntity.ch, offset, length); 1391 1392 // return true if string was skipped 1393 if (DEBUG_BUFFER) { 1394 System.out.print(")scanData: "); 1395 print(); 1396 System.out.println(" -> " + done); 1397 } 1398 } while (!done); 1399 return !done; 1400 1401 } // scanData(String,XMLString) 1402 1403 /** 1404 * Skips a character appearing immediately on the input. 1405 * <p> 1406 * <strong>Note:</strong> The character is consumed only if it matches 1407 * the specified character. 1408 * 1409 * @param c The character to skip. 1410 * 1411 * @return Returns true if the character was skipped. 1412 * 1413 * @throws IOException Thrown if i/o error occurs. 1414 * @throws EOFException Thrown on end of file. 1415 */ 1416 public boolean skipChar(int c) throws IOException { 1417 if (DEBUG_BUFFER) { 1418 System.out.print("(skipChar, '"+(char)c+"': "); 1419 print(); 1420 System.out.println(); 1421 } 1422 1423 // load more characters, if needed 1424 if (fCurrentEntity.position == fCurrentEntity.count) { 1425 invokeListeners(0); 1426 load(0, true); 1427 } 1428 1429 // skip character 1430 int cc = fCurrentEntity.ch[fCurrentEntity.position]; 1431 if (cc == c) { 1432 fCurrentEntity.position++; 1433 if (c == '\n') { 1434 fCurrentEntity.lineNumber++; 1435 fCurrentEntity.columnNumber = 1; 1436 } else { 1437 fCurrentEntity.columnNumber++; 1438 } 1439 if (DEBUG_BUFFER) { 1440 System.out.print(")skipChar, '"+(char)c+"': "); 1441 print(); 1442 System.out.println(" -> true"); 1443 } 1444 return true; 1445 } else if (c == '\n' && cc == '\r' && isExternal) { 1446 // handle newlines 1447 if (fCurrentEntity.position == fCurrentEntity.count) { 1448 invokeListeners(1); 1449 fCurrentEntity.ch[0] = (char)cc; 1450 load(1, false); 1451 } 1452 fCurrentEntity.position++; 1453 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1454 fCurrentEntity.position++; 1455 } 1456 fCurrentEntity.lineNumber++; 1457 fCurrentEntity.columnNumber = 1; 1458 if (DEBUG_BUFFER) { 1459 System.out.print(")skipChar, '"+(char)c+"': "); 1460 print(); 1461 System.out.println(" -> true"); 1462 } 1463 return true; 1464 } 1465 1466 // character was not skipped 1467 if (DEBUG_BUFFER) { 1468 System.out.print(")skipChar, '"+(char)c+"': "); 1469 print(); 1470 System.out.println(" -> false"); 1471 } 1472 return false; 1473 1474 } // skipChar(int):boolean 1475 1476 public boolean isSpace(char ch){ 1477 return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); 1478 } 1479 /** 1480 * Skips space characters appearing immediately on the input. 1481 * <p> 1482 * <strong>Note:</strong> The characters are consumed only if they are 1483 * space characters. 1484 * 1485 * @return Returns true if at least one space character was skipped. 1486 * 1487 * @throws IOException Thrown if i/o error occurs. 1488 * @throws EOFException Thrown on end of file. 1489 * 1490 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 1491 */ 1492 public boolean skipSpaces() throws IOException { 1493 if (DEBUG_BUFFER) { 1494 System.out.print("(skipSpaces: "); 1495 print(); 1496 System.out.println(); 1497 } 1498 //boolean entityChanged = false; 1499 // load more characters, if needed 1500 if (fCurrentEntity.position == fCurrentEntity.count) { 1501 invokeListeners(0); 1502 load(0, true); 1503 } 1504 1505 //we are doing this check only in skipSpace() because it is called by 1506 //fMiscDispatcher and we want the parser to exit gracefully when document 1507 //is well-formed. 1508 //it is possible that end of document is reached and 1509 //fCurrentEntity becomes null 1510 //nothing was read so entity changed 'false' should be returned. 1511 if(fCurrentEntity == null){ 1512 return false ; 1513 } 1514 1515 // skip spaces 1516 int c = fCurrentEntity.ch[fCurrentEntity.position]; 1517 if (XMLChar.isSpace(c)) { 1518 do { 1519 boolean entityChanged = false; 1520 // handle newlines 1521 if (c == '\n' || (isExternal && c == '\r')) { 1522 fCurrentEntity.lineNumber++; 1523 fCurrentEntity.columnNumber = 1; 1524 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1525 invokeListeners(0); 1526 fCurrentEntity.ch[0] = (char)c; 1527 entityChanged = load(1, true); 1528 if (!entityChanged){ 1529 // the load change the position to be 1, 1530 // need to restore it when entity not changed 1531 fCurrentEntity.position = 0; 1532 }else if(fCurrentEntity == null){ 1533 return true ; 1534 } 1535 } 1536 if (c == '\r' && isExternal) { 1537 // REVISIT: Does this need to be updated to fix the 1538 // #x0D ^#x0A newline normalization problem? -Ac 1539 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 1540 fCurrentEntity.position--; 1541 } 1542 } 1543 } else { 1544 fCurrentEntity.columnNumber++; 1545 } 1546 // load more characters, if needed 1547 if (!entityChanged){ 1548 fCurrentEntity.position++; 1549 } 1550 1551 if (fCurrentEntity.position == fCurrentEntity.count) { 1552 invokeListeners(0); 1553 load(0, true); 1554 1555 //we are doing this check only in skipSpace() because it is called by 1556 //fMiscDispatcher and we want the parser to exit gracefully when document 1557 //is well-formed. 1558 1559 //it is possible that end of document is reached and 1560 //fCurrentEntity becomes null 1561 //nothing was read so entity changed 'false' should be returned. 1562 if(fCurrentEntity == null){ 1563 return true ; 1564 } 1565 1566 } 1567 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 1568 if (DEBUG_BUFFER) { 1569 System.out.print(")skipSpaces: "); 1570 print(); 1571 System.out.println(" -> true"); 1572 } 1573 return true; 1574 } 1575 1576 // no spaces were found 1577 if (DEBUG_BUFFER) { 1578 System.out.print(")skipSpaces: "); 1579 print(); 1580 System.out.println(" -> false"); 1581 } 1582 return false; 1583 1584 } // skipSpaces():boolean 1585 1586 1587 /** 1588 * @param legnth This function checks that following number of characters are available. 1589 * to the underlying buffer. 1590 * @return This function returns true if capacity asked is available. 1591 */ 1592 public boolean arrangeCapacity(int length) throws IOException{ 1593 return arrangeCapacity(length, false); 1594 } 1595 1596 /** 1597 * @param legnth This function checks that following number of characters are available. 1598 * to the underlying buffer. 1599 * @param if the underlying function should change the entity 1600 * @return This function returns true if capacity asked is available. 1601 * 1602 */ 1603 public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ 1604 //check if the capacity is availble in the current buffer 1605 //count is no. of characters in the buffer [x][m][l] 1606 //position is '0' based 1607 //System.out.println("fCurrent Entity " + fCurrentEntity); 1608 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1609 return true; 1610 } 1611 if(DEBUG_SKIP_STRING){ 1612 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1613 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1614 System.out.println("length = " + length); 1615 } 1616 boolean entityChanged = false; 1617 //load more characters -- this function shouldn't change the entity 1618 while((fCurrentEntity.count - fCurrentEntity.position) < length){ 1619 if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ 1620 invokeListeners(0); 1621 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); 1622 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; 1623 fCurrentEntity.position = 0; 1624 } 1625 1626 if((fCurrentEntity.count - fCurrentEntity.position) < length){ 1627 int pos = fCurrentEntity.position; 1628 invokeListeners(pos); 1629 entityChanged = load(fCurrentEntity.count, changeEntity); 1630 fCurrentEntity.position = pos; 1631 if(entityChanged)break; 1632 } 1633 if(DEBUG_SKIP_STRING){ 1634 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1635 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1636 System.out.println("length = " + length); 1637 } 1638 } 1639 //load changes the position.. set it back to the point where we started. 1640 1641 //after loading check again. 1642 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1643 return true; 1644 } else { 1645 return false; 1646 } 1647 } 1648 1649 /** 1650 * Skips the specified string appearing immediately on the input. 1651 * <p> 1652 * <strong>Note:</strong> The characters are consumed only if all 1653 * the characters are skipped. 1654 * 1655 * @param s The string to skip. 1656 * 1657 * @return Returns true if the string was skipped. 1658 * 1659 * @throws IOException Thrown if i/o error occurs. 1660 * @throws EOFException Thrown on end of file. 1661 */ 1662 public boolean skipString(String s) throws IOException { 1663 1664 final int length = s.length(); 1665 1666 //first make sure that required capacity is avaible 1667 if(arrangeCapacity(length, false)){ 1668 final int beforeSkip = fCurrentEntity.position ; 1669 int afterSkip = fCurrentEntity.position + length - 1 ; 1670 if(DEBUG_SKIP_STRING){ 1671 System.out.println("skipString,length = " + s + "," + length); 1672 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); 1673 } 1674 1675 //s.charAt() indexes are 0 to 'Length -1' based. 1676 int i = length - 1 ; 1677 //check from reverse 1678 while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ 1679 if(afterSkip-- == beforeSkip){ 1680 fCurrentEntity.position = fCurrentEntity.position + length ; 1681 fCurrentEntity.columnNumber += length; 1682 return true; 1683 } 1684 } 1685 } 1686 1687 return false; 1688 } // skipString(String):boolean 1689 1690 public boolean skipString(char [] s) throws IOException { 1691 1692 final int length = s.length; 1693 //first make sure that required capacity is avaible 1694 if(arrangeCapacity(length, false)){ 1695 int beforeSkip = fCurrentEntity.position ; 1696 int afterSkip = fCurrentEntity.position + length ; 1697 1698 if(DEBUG_SKIP_STRING){ 1699 System.out.println("skipString,length = " + new String(s) + "," + length); 1700 System.out.println("skipString,length = " + new String(s) + "," + length); 1701 } 1702 1703 for(int i=0;i<length;i++){ 1704 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ 1705 return false; 1706 } 1707 } 1708 fCurrentEntity.position = fCurrentEntity.position + length ; 1709 fCurrentEntity.columnNumber += length; 1710 return true; 1711 1712 } 1713 1714 return false; 1715 } 1716 1717 // 1718 // Locator methods 1719 // 1720 // 1721 // Private methods 1722 // 1723 1724 /** 1725 * Loads a chunk of text. 1726 * 1727 * @param offset The offset into the character buffer to 1728 * read the next batch of characters. 1729 * @param changeEntity True if the load should change entities 1730 * at the end of the entity, otherwise leave 1731 * the current entity in place and the entity 1732 * boundary will be signaled by the return 1733 * value. 1734 * 1735 * @returns Returns true if the entity changed as a result of this 1736 * load operation. 1737 */ 1738 final boolean load(int offset, boolean changeEntity) 1739 throws IOException { 1740 if (DEBUG_BUFFER) { 1741 System.out.print("(load, "+offset+": "); 1742 print(); 1743 System.out.println(); 1744 } 1745 //maintaing the count till last load 1746 fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; 1747 // read characters 1748 int length = fCurrentEntity.ch.length - offset; 1749 if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { 1750 length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; 1751 } 1752 if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); 1753 int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); 1754 if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); 1755 1756 // reset count and position 1757 boolean entityChanged = false; 1758 if (count != -1) { 1759 if (count != 0) { 1760 // record the last count 1761 fCurrentEntity.fLastCount = count; 1762 fCurrentEntity.count = count + offset; 1763 fCurrentEntity.position = offset; 1764 } 1765 } 1766 // end of this entity 1767 else { 1768 fCurrentEntity.count = offset; 1769 fCurrentEntity.position = offset; 1770 entityChanged = true; 1771 1772 if (changeEntity) { 1773 //notify the entity manager about the end of entity 1774 fEntityManager.endEntity(); 1775 //return if the current entity becomes null 1776 if(fCurrentEntity == null){ 1777 throw END_OF_DOCUMENT_ENTITY; 1778 } 1779 // handle the trailing edges 1780 if (fCurrentEntity.position == fCurrentEntity.count) { 1781 load(0, true); 1782 } 1783 } 1784 1785 } 1786 if (DEBUG_BUFFER) { 1787 System.out.print(")load, "+offset+": "); 1788 print(); 1789 System.out.println(); 1790 } 1791 1792 return entityChanged; 1793 1794 } // load(int, boolean):boolean 1795 1796 /** 1797 * Creates a reader capable of reading the given input stream in 1798 * the specified encoding. 1799 * 1800 * @param inputStream The input stream. 1801 * @param encoding The encoding name that the input stream is 1802 * encoded using. If the user has specified that 1803 * Java encoding names are allowed, then the 1804 * encoding name may be a Java encoding name; 1805 * otherwise, it is an ianaEncoding name. 1806 * @param isBigEndian For encodings (like uCS-4), whose names cannot 1807 * specify a byte order, this tells whether the order is bigEndian. null menas 1808 * unknown or not relevant. 1809 * 1810 * @return Returns a reader. 1811 */ 1812 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 1813 throws IOException { 1814 1815 // normalize encoding name 1816 if (encoding == null) { 1817 encoding = "UTF-8"; 1818 } 1819 1820 // try to use an optimized reader 1821 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 1822 if (ENCODING.equals("UTF-8")) { 1823 if (DEBUG_ENCODINGS) { 1824 System.out.println("$$$ creating UTF8Reader"); 1825 } 1826 return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 1827 } 1828 if (ENCODING.equals("US-ASCII")) { 1829 if (DEBUG_ENCODINGS) { 1830 System.out.println("$$$ creating ASCIIReader"); 1831 } 1832 return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1833 } 1834 if(ENCODING.equals("ISO-10646-UCS-4")) { 1835 if(isBigEndian != null) { 1836 boolean isBE = isBigEndian.booleanValue(); 1837 if(isBE) { 1838 return new UCSReader(inputStream, UCSReader.UCS4BE); 1839 } else { 1840 return new UCSReader(inputStream, UCSReader.UCS4LE); 1841 } 1842 } else { 1843 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1844 "EncodingByteOrderUnsupported", 1845 new Object[] { encoding }, 1846 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1847 } 1848 } 1849 if(ENCODING.equals("ISO-10646-UCS-2")) { 1850 if(isBigEndian != null) { // sould never happen with this encoding... 1851 boolean isBE = isBigEndian.booleanValue(); 1852 if(isBE) { 1853 return new UCSReader(inputStream, UCSReader.UCS2BE); 1854 } else { 1855 return new UCSReader(inputStream, UCSReader.UCS2LE); 1856 } 1857 } else { 1858 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1859 "EncodingByteOrderUnsupported", 1860 new Object[] { encoding }, 1861 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1862 } 1863 } 1864 1865 // check for valid name 1866 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 1867 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 1868 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 1869 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1870 "EncodingDeclInvalid", 1871 new Object[] { encoding }, 1872 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1873 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 1874 // because every byte is a valid ISO Latin 1 character. 1875 // It may not translate correctly but if we failed on 1876 // the encoding anyway, then we're expecting the content 1877 // of the document to be bad. This will just prevent an 1878 // invalid UTF-8 sequence to be detected. This is only 1879 // important when continue-after-fatal-error is turned 1880 // on. -Ac 1881 encoding = "ISO-8859-1"; 1882 } 1883 1884 // try to use a Java reader 1885 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 1886 if (javaEncoding == null) { 1887 if(fAllowJavaEncodings) { 1888 javaEncoding = encoding; 1889 } else { 1890 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1891 "EncodingDeclInvalid", 1892 new Object[] { encoding }, 1893 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1894 // see comment above. 1895 javaEncoding = "ISO8859_1"; 1896 } 1897 } 1898 else if (javaEncoding.equals("ASCII")) { 1899 if (DEBUG_ENCODINGS) { 1900 System.out.println("$$$ creating ASCIIReader"); 1901 } 1902 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1903 } 1904 1905 if (DEBUG_ENCODINGS) { 1906 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 1907 if (javaEncoding == encoding) { 1908 System.out.print(" (IANA encoding)"); 1909 } 1910 System.out.println(); 1911 } 1912 return new InputStreamReader(inputStream, javaEncoding); 1913 1914 } // createReader(InputStream,String, Boolean): Reader 1915 1916 /** 1917 * Returns the IANA encoding name that is auto-detected from 1918 * the bytes specified, with the endian-ness of that encoding where appropriate. 1919 * 1920 * @param b4 The first four bytes of the input. 1921 * @param count The number of bytes actually read. 1922 * @return a 2-element array: the first element, an IANA-encoding string, 1923 * the second element a Boolean which is true iff the document is big endian, false 1924 * if it's little-endian, and null if the distinction isn't relevant. 1925 */ 1926 protected Object[] getEncodingName(byte[] b4, int count) { 1927 1928 if (count < 2) { 1929 return new Object[]{"UTF-8", null}; 1930 } 1931 1932 // UTF-16, with BOM 1933 int b0 = b4[0] & 0xFF; 1934 int b1 = b4[1] & 0xFF; 1935 if (b0 == 0xFE && b1 == 0xFF) { 1936 // UTF-16, big-endian 1937 return new Object [] {"UTF-16BE", new Boolean(true)}; 1938 } 1939 if (b0 == 0xFF && b1 == 0xFE) { 1940 // UTF-16, little-endian 1941 return new Object [] {"UTF-16LE", new Boolean(false)}; 1942 } 1943 1944 // default to UTF-8 if we don't have enough bytes to make a 1945 // good determination of the encoding 1946 if (count < 3) { 1947 return new Object [] {"UTF-8", null}; 1948 } 1949 1950 // UTF-8 with a BOM 1951 int b2 = b4[2] & 0xFF; 1952 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 1953 return new Object [] {"UTF-8", null}; 1954 } 1955 1956 // default to UTF-8 if we don't have enough bytes to make a 1957 // good determination of the encoding 1958 if (count < 4) { 1959 return new Object [] {"UTF-8", null}; 1960 } 1961 1962 // other encodings 1963 int b3 = b4[3] & 0xFF; 1964 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 1965 // UCS-4, big endian (1234) 1966 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 1967 } 1968 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 1969 // UCS-4, little endian (4321) 1970 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 1971 } 1972 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 1973 // UCS-4, unusual octet order (2143) 1974 // REVISIT: What should this be? 1975 return new Object [] {"ISO-10646-UCS-4", null}; 1976 } 1977 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 1978 // UCS-4, unusual octect order (3412) 1979 // REVISIT: What should this be? 1980 return new Object [] {"ISO-10646-UCS-4", null}; 1981 } 1982 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 1983 // UTF-16, big-endian, no BOM 1984 // (or could turn out to be UCS-2... 1985 // REVISIT: What should this be? 1986 return new Object [] {"UTF-16BE", new Boolean(true)}; 1987 } 1988 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 1989 // UTF-16, little-endian, no BOM 1990 // (or could turn out to be UCS-2... 1991 return new Object [] {"UTF-16LE", new Boolean(false)}; 1992 } 1993 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 1994 // EBCDIC 1995 // a la xerces1, return CP037 instead of EBCDIC here 1996 return new Object [] {"CP037", null}; 1997 } 1998 1999 // default encoding 2000 return new Object [] {"UTF-8", null}; 2001 2002 } // getEncodingName(byte[],int):Object[] 2003 2004 /** 2005 * xxx not removing endEntity() so that i remember that we need to implement it. 2006 * Ends an entity. 2007 * 2008 * @throws XNIException Thrown by entity handler to signal an error. 2009 */ 2010 // 2011 /** Prints the contents of the buffer. */ 2012 final void print() { 2013 if (DEBUG_BUFFER) { 2014 if (fCurrentEntity != null) { 2015 System.out.print('['); 2016 System.out.print(fCurrentEntity.count); 2017 System.out.print(' '); 2018 System.out.print(fCurrentEntity.position); 2019 if (fCurrentEntity.count > 0) { 2020 System.out.print(" \""); 2021 for (int i = 0; i < fCurrentEntity.count; i++) { 2022 if (i == fCurrentEntity.position) { 2023 System.out.print('^'); 2024 } 2025 char c = fCurrentEntity.ch[i]; 2026 switch (c) { 2027 case '\n': { 2028 System.out.print("\\n"); 2029 break; 2030 } 2031 case '\r': { 2032 System.out.print("\\r"); 2033 break; 2034 } 2035 case '\t': { 2036 System.out.print("\\t"); 2037 break; 2038 } 2039 case '\\': { 2040 System.out.print("\\\\"); 2041 break; 2042 } 2043 default: { 2044 System.out.print(c); 2045 } 2046 } 2047 } 2048 if (fCurrentEntity.position == fCurrentEntity.count) { 2049 System.out.print('^'); 2050 } 2051 System.out.print('"'); 2052 } 2053 System.out.print(']'); 2054 System.out.print(" @ "); 2055 System.out.print(fCurrentEntity.lineNumber); 2056 System.out.print(','); 2057 System.out.print(fCurrentEntity.columnNumber); 2058 } else { 2059 System.out.print("*NO CURRENT ENTITY*"); 2060 } 2061 } 2062 } 2063 2064 /** 2065 * Registers the listener object and provides callback. 2066 * @param listener listener to which call back should be provided when scanner buffer 2067 * is being changed. 2068 */ 2069 public void registerListener(XMLBufferListener listener) { 2070 if(!listeners.contains(listener)) 2071 listeners.add(listener); 2072 } 2073 2074 /** 2075 * 2076 * @param loadPos Starting position from which new data is being loaded into scanner buffer. 2077 */ 2078 private void invokeListeners(int loadPos){ 2079 for(int i=0;i<listeners.size();i++){ 2080 XMLBufferListener listener =(XMLBufferListener) listeners.get(i); 2081 listener.refresh(loadPos); 2082 } 2083 } 2084 2085 /** 2086 * Skips space characters appearing immediately on the input that would 2087 * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 2088 * normalization is performed. This is useful when scanning structures 2089 * such as the XMLDecl and TextDecl that can only contain US-ASCII 2090 * characters. 2091 * <p> 2092 * <strong>Note:</strong> The characters are consumed only if they would 2093 * match non-terminal S before end of line normalization is performed. 2094 * 2095 * @return Returns true if at least one space character was skipped. 2096 * 2097 * @throws IOException Thrown if i/o error occurs. 2098 * @throws EOFException Thrown on end of file. 2099 * 2100 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 2101 */ 2102 public final boolean skipDeclSpaces() throws IOException { 2103 if (DEBUG_BUFFER) { 2104 System.out.print("(skipDeclSpaces: "); 2105 //XMLEntityManager.print(fCurrentEntity); 2106 System.out.println(); 2107 } 2108 2109 // load more characters, if needed 2110 if (fCurrentEntity.position == fCurrentEntity.count) { 2111 load(0, true); 2112 } 2113 2114 // skip spaces 2115 int c = fCurrentEntity.ch[fCurrentEntity.position]; 2116 if (XMLChar.isSpace(c)) { 2117 boolean external = fCurrentEntity.isExternal(); 2118 do { 2119 boolean entityChanged = false; 2120 // handle newlines 2121 if (c == '\n' || (external && c == '\r')) { 2122 fCurrentEntity.lineNumber++; 2123 fCurrentEntity.columnNumber = 1; 2124 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 2125 fCurrentEntity.ch[0] = (char)c; 2126 entityChanged = load(1, true); 2127 if (!entityChanged) 2128 // the load change the position to be 1, 2129 // need to restore it when entity not changed 2130 fCurrentEntity.position = 0; 2131 } 2132 if (c == '\r' && external) { 2133 // REVISIT: Does this need to be updated to fix the 2134 // #x0D ^#x0A newline normalization problem? -Ac 2135 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 2136 fCurrentEntity.position--; 2137 } 2138 } 2139 /*** NEWLINE NORMALIZATION *** 2140 * else { 2141 * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' 2142 * && external) { 2143 * fCurrentEntity.position++; 2144 * } 2145 * } 2146 * /***/ 2147 } else { 2148 fCurrentEntity.columnNumber++; 2149 } 2150 // load more characters, if needed 2151 if (!entityChanged) 2152 fCurrentEntity.position++; 2153 if (fCurrentEntity.position == fCurrentEntity.count) { 2154 load(0, true); 2155 } 2156 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 2157 if (DEBUG_BUFFER) { 2158 System.out.print(")skipDeclSpaces: "); 2159 // XMLEntityManager.print(fCurrentEntity); 2160 System.out.println(" -> true"); 2161 } 2162 return true; 2163 } 2164 2165 // no spaces were found 2166 if (DEBUG_BUFFER) { 2167 System.out.print(")skipDeclSpaces: "); 2168 //XMLEntityManager.print(fCurrentEntity); 2169 System.out.println(" -> false"); 2170 } 2171 return false; 2172 2173 } // skipDeclSpaces():boolean 2174 2175 2176 } // class XMLEntityScanner