1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 24 25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 29 import com.sun.org.apache.xerces.internal.util.EncodingMap; 30 import com.sun.org.apache.xerces.internal.util.SymbolTable; 31 import com.sun.org.apache.xerces.internal.util.XMLChar; 32 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 33 import com.sun.org.apache.xerces.internal.xni.*; 34 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 35 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 36 import com.sun.xml.internal.stream.Entity; 37 import com.sun.xml.internal.stream.XMLBufferListener; 38 import java.io.EOFException; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.InputStreamReader; 42 import java.io.Reader; 43 import java.util.Locale; 44 import java.util.Vector; 45 46 /** 47 * Implements the entity scanner methods. 48 * 49 * @author Neeraj Bajaj, Sun Microsystems 50 * @author Andy Clark, IBM 51 * @author Arnaud Le Hors, IBM 52 * @author K.Venugopal Sun Microsystems 53 * 54 */ 55 public class XMLEntityScanner implements XMLLocator { 56 57 58 protected Entity.ScannedEntity fCurrentEntity = null ; 59 protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; 60 61 protected XMLEntityManager fEntityManager ; 62 63 /** Debug switching readers for encodings. */ 64 private static final boolean DEBUG_ENCODINGS = false; 65 /** Listeners which should know when load is being called */ 66 private Vector listeners = new Vector(); 67 68 private static final boolean [] VALID_NAMES = new boolean[127]; 69 70 /** 71 * Debug printing of buffer. This debugging flag works best when you 72 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 73 * 64 characters. 74 */ 75 private static final boolean DEBUG_BUFFER = false; 76 private static final boolean DEBUG_SKIP_STRING = false; 77 /** 78 * To signal the end of the document entity, this exception will be thrown. 79 */ 80 private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { 81 private static final long serialVersionUID = 980337771224675268L; 82 public Throwable fillInStackTrace() { 83 return this; 84 } 85 }; 86 87 protected SymbolTable fSymbolTable = null; 88 protected XMLErrorReporter fErrorReporter = null; 89 int [] whiteSpaceLookup = new int[100]; 90 int whiteSpaceLen = 0; 91 boolean whiteSpaceInfoNeeded = true; 92 93 /** 94 * Allow Java encoding names. This feature identifier is: 95 * http://apache.org/xml/features/allow-java-encodings 96 */ 97 protected boolean fAllowJavaEncodings; 98 99 //Will be used only during internal subsets. 100 //for appending data. 101 102 /** Property identifier: symbol table. */ 103 protected static final String SYMBOL_TABLE = 104 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 105 106 /** Property identifier: error reporter. */ 107 protected static final String ERROR_REPORTER = 108 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 109 110 /** Feature identifier: allow Java encodings. */ 111 protected static final String ALLOW_JAVA_ENCODINGS = 112 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 113 114 protected PropertyManager fPropertyManager = null ; 115 116 boolean isExternal = false; 117 static { 118 119 for(int i=0x0041;i<=0x005A ; i++){ 120 VALID_NAMES[i]=true; 121 } 122 for(int i=0x0061;i<=0x007A; i++){ 123 VALID_NAMES[i]=true; 124 } 125 for(int i=0x0030;i<=0x0039; i++){ 126 VALID_NAMES[i]=true; 127 } 128 VALID_NAMES[45]=true; 129 VALID_NAMES[46]=true; 130 VALID_NAMES[58]=true; 131 VALID_NAMES[95]=true; 132 } 133 // SAPJVM: Remember, that the XML version has explicitly been set, 134 // so that XMLStreamReader.getVersion() can find that out. 135 boolean xmlVersionSetExplicitly = false; 136 // 137 // Constructors 138 // 139 140 /** Default constructor. */ 141 public XMLEntityScanner() { 142 } // <init>() 143 144 145 /** private constructor, this class can only be instantiated within this class. Instance of this class should 146 * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) 147 * @see getEntityScanner() 148 * @see getEntityScanner(ScannedEntity) 149 */ 150 public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { 151 fEntityManager = entityManager ; 152 reset(propertyManager); 153 } // <init>() 154 155 156 // set buffer size: 157 public final void setBufferSize(int size) { 158 // REVISIT: Buffer size passed to entity scanner 159 // was not being kept in synch with the actual size 160 // of the buffers in each scanned entity. If any 161 // of the buffers were actually resized, it was possible 162 // that the parser would throw an ArrayIndexOutOfBoundsException 163 // for documents which contained names which are longer than 164 // the current buffer size. Conceivably the buffer size passed 165 // to entity scanner could be used to determine a minimum size 166 // for resizing, if doubling its size is smaller than this 167 // minimum. -- mrglavas 168 fBufferSize = size; 169 } 170 171 /** 172 * Resets the components. 173 */ 174 public void reset(PropertyManager propertyManager){ 175 fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; 176 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; 177 fCurrentEntity = null; 178 whiteSpaceLen = 0; 179 whiteSpaceInfoNeeded = true; 180 listeners.clear(); 181 } 182 183 /** 184 * Resets the component. The component can query the component manager 185 * about any features and properties that affect the operation of the 186 * component. 187 * 188 * @param componentManager The component manager. 189 * 190 * @throws SAXException Thrown by component on initialization error. 191 * For example, if a feature or property is 192 * required for the operation of the component, the 193 * component manager may throw a 194 * SAXNotRecognizedException or a 195 * SAXNotSupportedException. 196 */ 197 public void reset(XMLComponentManager componentManager) 198 throws XMLConfigurationException { 199 200 //System.out.println(" this is being called"); 201 // xerces features 202 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 203 204 //xerces properties 205 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 206 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 207 fCurrentEntity = null; 208 whiteSpaceLen = 0; 209 whiteSpaceInfoNeeded = true; 210 listeners.clear(); 211 } // reset(XMLComponentManager) 212 213 214 public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, 215 XMLErrorReporter reporter) { 216 fCurrentEntity = null; 217 fSymbolTable = symbolTable; 218 fEntityManager = entityManager; 219 fErrorReporter = reporter; 220 } 221 222 /** 223 * Returns the XML version of the current entity. This will normally be the 224 * value from the XML or text declaration or defaulted by the parser. Note that 225 * that this value may be different than the version of the processing rules 226 * applied to the current entity. For instance, an XML 1.1 document may refer to 227 * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire 228 * document. Also note that, for a given entity, this value can only be considered 229 * final once the XML or text declaration has been read or once it has been 230 * determined that there is no such declaration. 231 */ 232 public final String getXMLVersion() { 233 if (fCurrentEntity != null) { 234 return fCurrentEntity.xmlVersion; 235 } 236 return null; 237 } // getXMLVersion():String 238 239 /** 240 * Sets the XML version. This method is used by the 241 * scanners to report the value of the version pseudo-attribute 242 * in an XML or text declaration. 243 * 244 * @param xmlVersion the XML version of the current entity 245 */ 246 public final void setXMLVersion(String xmlVersion) { 247 xmlVersionSetExplicitly = true; // SAPJVM 248 fCurrentEntity.xmlVersion = xmlVersion; 249 } // setXMLVersion(String) 250 251 252 /** set the instance of current scanned entity. 253 * @param ScannedEntity 254 */ 255 256 public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ 257 fCurrentEntity = scannedEntity ; 258 if(fCurrentEntity != null){ 259 isExternal = fCurrentEntity.isExternal(); 260 if(DEBUG_BUFFER) 261 System.out.println("Current Entity is "+scannedEntity.name); 262 } 263 } 264 265 public Entity.ScannedEntity getCurrentEntity(){ 266 return fCurrentEntity ; 267 } 268 // 269 // XMLEntityReader methods 270 // 271 272 /** 273 * Returns the base system identifier of the currently scanned 274 * entity, or null if none is available. 275 */ 276 public final String getBaseSystemId() { 277 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 278 } // getBaseSystemId():String 279 280 /** 281 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) 282 */ 283 public void setBaseSystemId(String systemId) { 284 //no-op 285 } 286 287 ///////////// Locator methods start. 288 public final int getLineNumber(){ 289 //if the entity is closed, we should return -1 290 //xxx at first place why such call should be there... 291 return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; 292 } 293 294 /** 295 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) 296 */ 297 public void setLineNumber(int line) { 298 //no-op 299 } 300 301 302 public final int getColumnNumber(){ 303 //if the entity is closed, we should return -1 304 //xxx at first place why such call should be there... 305 return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; 306 } 307 308 /** 309 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) 310 */ 311 public void setColumnNumber(int col) { 312 // no-op 313 } 314 315 316 public final int getCharacterOffset(){ 317 return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; 318 } 319 320 /** Returns the expanded system identifier. */ 321 public final String getExpandedSystemId() { 322 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 323 } 324 325 /** 326 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) 327 */ 328 public void setExpandedSystemId(String systemId) { 329 //no-op 330 } 331 332 /** Returns the literal system identifier. */ 333 public final String getLiteralSystemId() { 334 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; 335 } 336 337 /** 338 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) 339 */ 340 public void setLiteralSystemId(String systemId) { 341 //no-op 342 } 343 344 /** Returns the public identifier. */ 345 public final String getPublicId() { 346 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 347 } 348 349 /** 350 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) 351 */ 352 public void setPublicId(String publicId) { 353 //no-op 354 } 355 356 ///////////////// Locator methods finished. 357 358 /** the version of the current entity being scanned */ 359 public void setVersion(String version){ 360 fCurrentEntity.version = version; 361 } 362 363 public String getVersion(){ 364 if (fCurrentEntity != null) 365 return fCurrentEntity.version ; 366 return null; 367 } 368 369 /** 370 * Returns the encoding of the current entity. 371 * Note that, for a given entity, this value can only be 372 * considered final once the encoding declaration has been read (or once it 373 * has been determined that there is no such declaration) since, no encoding 374 * having been specified on the XMLInputSource, the parser 375 * will make an initial "guess" which could be in error. 376 */ 377 public final String getEncoding() { 378 if (fCurrentEntity != null) { 379 return fCurrentEntity.encoding; 380 } 381 return null; 382 } // getEncoding():String 383 384 /** 385 * Sets the encoding of the scanner. This method is used by the 386 * scanners if the XMLDecl or TextDecl line contains an encoding 387 * pseudo-attribute. 388 * <p> 389 * <strong>Note:</strong> The underlying character reader on the 390 * current entity will be changed to accomodate the new encoding. 391 * However, the new encoding is ignored if the current reader was 392 * not constructed from an input stream (e.g. an external entity 393 * that is resolved directly to the appropriate java.io.Reader 394 * object). 395 * 396 * @param encoding The IANA encoding name of the new encoding. 397 * 398 * @throws IOException Thrown if the new encoding is not supported. 399 * 400 * @see com.sun.org.apache.xerces.internal.util.EncodingMap 401 */ 402 public final void setEncoding(String encoding) throws IOException { 403 404 if (DEBUG_ENCODINGS) { 405 System.out.println("$$$ setEncoding: "+encoding); 406 } 407 408 if (fCurrentEntity.stream != null) { 409 // if the encoding is the same, don't change the reader and 410 // re-use the original reader used by the OneCharReader 411 // NOTE: Besides saving an object, this overcomes deficiencies 412 // in the UTF-16 reader supplied with the standard Java 413 // distribution (up to and including 1.3). The UTF-16 414 // decoder buffers 8K blocks even when only asked to read 415 // a single char! -Ac 416 if (fCurrentEntity.encoding == null || 417 !fCurrentEntity.encoding.equals(encoding)) { 418 // UTF-16 is a bit of a special case. If the encoding is UTF-16, 419 // and we know the endian-ness, we shouldn't change readers. 420 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce 421 // the endian-ness from the encoding we presently have. 422 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { 423 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 424 if(ENCODING.equals("UTF-16")) return; 425 if(ENCODING.equals("ISO-10646-UCS-4")) { 426 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 427 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); 428 } else { 429 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); 430 } 431 return; 432 } 433 if(ENCODING.equals("ISO-10646-UCS-2")) { 434 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 435 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); 436 } else { 437 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); 438 } 439 return; 440 } 441 } 442 // wrap a new reader around the input stream, changing 443 // the encoding 444 if (DEBUG_ENCODINGS) { 445 System.out.println("$$$ creating new reader from stream: "+ 446 fCurrentEntity.stream); 447 } 448 //fCurrentEntity.stream.reset(); 449 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); 450 fCurrentEntity.encoding = encoding; 451 452 } else { 453 if (DEBUG_ENCODINGS) 454 System.out.println("$$$ reusing old reader on stream"); 455 } 456 } 457 458 } // setEncoding(String) 459 460 /** Returns true if the current entity being scanned is external. */ 461 public final boolean isExternal() { 462 return fCurrentEntity.isExternal(); 463 } // isExternal():boolean 464 465 public int getChar(int relative) throws IOException{ 466 if(arrangeCapacity(relative + 1, false)){ 467 return fCurrentEntity.ch[fCurrentEntity.position + relative]; 468 }else{ 469 return -1; 470 } 471 }//getChar() 472 473 /** 474 * Returns the next character on the input. 475 * <p> 476 * <strong>Note:</strong> The character is <em>not</em> consumed. 477 * 478 * @throws IOException Thrown if i/o error occurs. 479 * @throws EOFException Thrown on end of file. 480 */ 481 public int peekChar() throws IOException { 482 if (DEBUG_BUFFER) { 483 System.out.print("(peekChar: "); 484 print(); 485 System.out.println(); 486 } 487 488 // load more characters, if needed 489 if (fCurrentEntity.position == fCurrentEntity.count) { 490 load(0, true, true); 491 } 492 493 // peek at character 494 int c = fCurrentEntity.ch[fCurrentEntity.position]; 495 496 // return peeked character 497 if (DEBUG_BUFFER) { 498 System.out.print(")peekChar: "); 499 print(); 500 if (isExternal) { 501 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); 502 } else { 503 System.out.println(" -> '"+(char)c+"'"); 504 } 505 } 506 if (isExternal) { 507 return c != '\r' ? c : '\n'; 508 } else { 509 return c; 510 } 511 512 } // peekChar():int 513 514 /** 515 * Returns the next character on the input. 516 * <p> 517 * <strong>Note:</strong> The character is consumed. 518 * 519 * @throws IOException Thrown if i/o error occurs. 520 * @throws EOFException Thrown on end of file. 521 */ 522 public int scanChar() throws IOException { 523 if (DEBUG_BUFFER) { 524 System.out.print("(scanChar: "); 525 print(); 526 System.out.println(); 527 } 528 529 // load more characters, if needed 530 if (fCurrentEntity.position == fCurrentEntity.count) { 531 load(0, true, true); 532 } 533 534 // scan character 535 int c = fCurrentEntity.ch[fCurrentEntity.position++]; 536 if (c == '\n' || 537 (c == '\r' && isExternal)) { 538 fCurrentEntity.lineNumber++; 539 fCurrentEntity.columnNumber = 1; 540 if (fCurrentEntity.position == fCurrentEntity.count) { 541 invokeListeners(1); 542 fCurrentEntity.ch[0] = (char)c; 543 load(1, false, false); 544 } 545 if (c == '\r' && isExternal) { 546 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { 547 fCurrentEntity.position--; 548 } 549 c = '\n'; 550 } 551 } 552 553 // return character that was scanned 554 if (DEBUG_BUFFER) { 555 System.out.print(")scanChar: "); 556 print(); 557 System.out.println(" -> '"+(char)c+"'"); 558 } 559 fCurrentEntity.columnNumber++; 560 return c; 561 562 } // scanChar():int 563 564 /** 565 * Returns a string matching the NMTOKEN production appearing immediately 566 * on the input as a symbol, or null if NMTOKEN Name string is present. 567 * <p> 568 * <strong>Note:</strong> The NMTOKEN characters are consumed. 569 * <p> 570 * <strong>Note:</strong> The string returned must be a symbol. The 571 * SymbolTable can be used for this purpose. 572 * 573 * @throws IOException Thrown if i/o error occurs. 574 * @throws EOFException Thrown on end of file. 575 * 576 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 577 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 578 */ 579 public String scanNmtoken() throws IOException { 580 if (DEBUG_BUFFER) { 581 System.out.print("(scanNmtoken: "); 582 print(); 583 System.out.println(); 584 } 585 586 // load more characters, if needed 587 if (fCurrentEntity.position == fCurrentEntity.count) { 588 load(0, true, true); 589 } 590 591 // scan nmtoken 592 int offset = fCurrentEntity.position; 593 boolean vc = false; 594 char c; 595 while (true){ 596 //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { 597 c = fCurrentEntity.ch[fCurrentEntity.position]; 598 if(c < 127){ 599 vc = VALID_NAMES[c]; 600 }else{ 601 vc = XMLChar.isName(c); 602 } 603 if(!vc)break; 604 605 if (++fCurrentEntity.position == fCurrentEntity.count) { 606 int length = fCurrentEntity.position - offset; 607 invokeListeners(length); 608 if (length == fCurrentEntity.fBufferSize) { 609 // bad luck we have to resize our buffer 610 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 611 System.arraycopy(fCurrentEntity.ch, offset, 612 tmp, 0, length); 613 fCurrentEntity.ch = tmp; 614 fCurrentEntity.fBufferSize *= 2; 615 } else { 616 System.arraycopy(fCurrentEntity.ch, offset, 617 fCurrentEntity.ch, 0, length); 618 } 619 offset = 0; 620 if (load(length, false, false)) { 621 break; 622 } 623 } 624 } 625 int length = fCurrentEntity.position - offset; 626 fCurrentEntity.columnNumber += length; 627 628 // return nmtoken 629 String symbol = null; 630 if (length > 0) { 631 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 632 } 633 if (DEBUG_BUFFER) { 634 System.out.print(")scanNmtoken: "); 635 print(); 636 System.out.println(" -> "+String.valueOf(symbol)); 637 } 638 return symbol; 639 640 } // scanNmtoken():String 641 642 /** 643 * Returns a string matching the Name production appearing immediately 644 * on the input as a symbol, or null if no Name string is present. 645 * <p> 646 * <strong>Note:</strong> The Name characters are consumed. 647 * <p> 648 * <strong>Note:</strong> The string returned must be a symbol. The 649 * SymbolTable can be used for this purpose. 650 * 651 * @throws IOException Thrown if i/o error occurs. 652 * @throws EOFException Thrown on end of file. 653 * 654 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 655 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 656 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 657 */ 658 public String scanName() throws IOException { 659 if (DEBUG_BUFFER) { 660 System.out.print("(scanName: "); 661 print(); 662 System.out.println(); 663 } 664 665 // load more characters, if needed 666 if (fCurrentEntity.position == fCurrentEntity.count) { 667 load(0, true, true); 668 } 669 670 // scan name 671 int offset = fCurrentEntity.position; 672 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 673 if (++fCurrentEntity.position == fCurrentEntity.count) { 674 invokeListeners(1); 675 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 676 offset = 0; 677 if (load(1, false, false)) { 678 fCurrentEntity.columnNumber++; 679 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 680 681 if (DEBUG_BUFFER) { 682 System.out.print(")scanName: "); 683 print(); 684 System.out.println(" -> "+String.valueOf(symbol)); 685 } 686 return symbol; 687 } 688 } 689 boolean vc =false; 690 while (true ){ 691 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 692 char c = fCurrentEntity.ch[fCurrentEntity.position]; 693 if(c < 127){ 694 vc = VALID_NAMES[c]; 695 }else{ 696 vc = XMLChar.isName(c); 697 } 698 if(!vc)break; 699 if (++fCurrentEntity.position == fCurrentEntity.count) { 700 int length = fCurrentEntity.position - offset; 701 invokeListeners(length); 702 if (length == fCurrentEntity.fBufferSize) { 703 // bad luck we have to resize our buffer 704 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 705 System.arraycopy(fCurrentEntity.ch, offset, 706 tmp, 0, length); 707 fCurrentEntity.ch = tmp; 708 fCurrentEntity.fBufferSize *= 2; 709 } else { 710 System.arraycopy(fCurrentEntity.ch, offset, 711 fCurrentEntity.ch, 0, length); 712 } 713 offset = 0; 714 if (load(length, false, false)) { 715 break; 716 } 717 } 718 } 719 } 720 int length = fCurrentEntity.position - offset; 721 fCurrentEntity.columnNumber += length; 722 723 // return name 724 String symbol; 725 if (length > 0) { 726 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 727 } else 728 symbol = null; 729 if (DEBUG_BUFFER) { 730 System.out.print(")scanName: "); 731 print(); 732 System.out.println(" -> "+String.valueOf(symbol)); 733 } 734 return symbol; 735 736 } // scanName():String 737 738 /** 739 * Scans a qualified name from the input, setting the fields of the 740 * QName structure appropriately. 741 * <p> 742 * <strong>Note:</strong> The qualified name characters are consumed. 743 * <p> 744 * <strong>Note:</strong> The strings used to set the values of the 745 * QName structure must be symbols. The SymbolTable can be used for 746 * this purpose. 747 * 748 * @param qname The qualified name structure to fill. 749 * 750 * @return Returns true if a qualified name appeared immediately on 751 * the input and was scanned, false otherwise. 752 * 753 * @throws IOException Thrown if i/o error occurs. 754 * @throws EOFException Thrown on end of file. 755 * 756 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 757 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 758 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 759 */ 760 public boolean scanQName(QName qname) throws IOException { 761 if (DEBUG_BUFFER) { 762 System.out.print("(scanQName, "+qname+": "); 763 print(); 764 System.out.println(); 765 } 766 767 // load more characters, if needed 768 if (fCurrentEntity.position == fCurrentEntity.count) { 769 load(0, true, true); 770 } 771 772 // scan qualified name 773 int offset = fCurrentEntity.position; 774 775 //making a check if if the specified character is a valid name start character 776 //as defined by production [5] in the XML 1.0 specification. 777 // Name ::= (Letter | '_' | ':') (NameChar)* 778 779 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 780 if (++fCurrentEntity.position == fCurrentEntity.count) { 781 invokeListeners(1); 782 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 783 offset = 0; 784 785 if (load(1, false, false)) { 786 fCurrentEntity.columnNumber++; 787 //adding into symbol table. 788 //XXX We are trying to add single character in SymbolTable?????? 789 String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 790 qname.setValues(null, name, name, null); 791 if (DEBUG_BUFFER) { 792 System.out.print(")scanQName, "+qname+": "); 793 print(); 794 System.out.println(" -> true"); 795 } 796 return true; 797 } 798 } 799 int index = -1; 800 boolean vc = false; 801 while ( true){ 802 803 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 804 char c = fCurrentEntity.ch[fCurrentEntity.position]; 805 if(c < 127){ 806 vc = VALID_NAMES[c]; 807 }else{ 808 vc = XMLChar.isName(c); 809 } 810 if(!vc)break; 811 if (c == ':') { 812 if (index != -1) { 813 break; 814 } 815 index = fCurrentEntity.position; 816 } 817 if (++fCurrentEntity.position == fCurrentEntity.count) { 818 int length = fCurrentEntity.position - offset; 819 invokeListeners(length); 820 if (length == fCurrentEntity.fBufferSize) { 821 // bad luck we have to resize our buffer 822 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 823 System.arraycopy(fCurrentEntity.ch, offset, 824 tmp, 0, length); 825 fCurrentEntity.ch = tmp; 826 fCurrentEntity.fBufferSize *= 2; 827 } else { 828 System.arraycopy(fCurrentEntity.ch, offset, 829 fCurrentEntity.ch, 0, length); 830 } 831 if (index != -1) { 832 index = index - offset; 833 } 834 offset = 0; 835 if (load(length, false, false)) { 836 break; 837 } 838 } 839 } 840 int length = fCurrentEntity.position - offset; 841 fCurrentEntity.columnNumber += length; 842 if (length > 0) { 843 String prefix = null; 844 String localpart = null; 845 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, 846 offset, length); 847 848 if (index != -1) { 849 int prefixLength = index - offset; 850 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, 851 offset, prefixLength); 852 int len = length - prefixLength - 1; 853 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, 854 index + 1, len); 855 856 } else { 857 localpart = rawname; 858 } 859 qname.setValues(prefix, localpart, rawname, null); 860 if (DEBUG_BUFFER) { 861 System.out.print(")scanQName, "+qname+": "); 862 print(); 863 System.out.println(" -> true"); 864 } 865 return true; 866 } 867 } 868 869 // no qualified name found 870 if (DEBUG_BUFFER) { 871 System.out.print(")scanQName, "+qname+": "); 872 print(); 873 System.out.println(" -> false"); 874 } 875 return false; 876 877 } // scanQName(QName):boolean 878 879 /** 880 * CHANGED: 881 * Scans a range of parsed character data, This function appends the character data to 882 * the supplied buffer. 883 * <p> 884 * <strong>Note:</strong> The characters are consumed. 885 * <p> 886 * <strong>Note:</strong> This method does not guarantee to return 887 * the longest run of parsed character data. This method may return 888 * before markup due to reaching the end of the input buffer or any 889 * other reason. 890 * <p> 891 * 892 * @param content The content structure to fill. 893 * 894 * @return Returns the next character on the input, if known. This 895 * value may be -1 but this does <em>note</em> designate 896 * end of file. 897 * 898 * @throws IOException Thrown if i/o error occurs. 899 * @throws EOFException Thrown on end of file. 900 */ 901 public int scanContent(XMLString content) throws IOException { 902 if (DEBUG_BUFFER) { 903 System.out.print("(scanContent: "); 904 print(); 905 System.out.println(); 906 } 907 908 // load more characters, if needed 909 if (fCurrentEntity.position == fCurrentEntity.count) { 910 load(0, true, true); 911 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 912 invokeListeners(0); 913 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 914 load(1, false, false); 915 fCurrentEntity.position = 0; 916 } 917 918 // normalize newlines 919 int offset = fCurrentEntity.position; 920 int c = fCurrentEntity.ch[offset]; 921 int newlines = 0; 922 if (c == '\n' || (c == '\r' && isExternal)) { 923 if (DEBUG_BUFFER) { 924 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 925 print(); 926 System.out.println(); 927 } 928 do { 929 c = fCurrentEntity.ch[fCurrentEntity.position++]; 930 if (c == '\r' && isExternal) { 931 newlines++; 932 fCurrentEntity.lineNumber++; 933 fCurrentEntity.columnNumber = 1; 934 if (fCurrentEntity.position == fCurrentEntity.count) { 935 offset = 0; 936 fCurrentEntity.position = newlines; 937 if (load(newlines, false, true)) { 938 break; 939 } 940 } 941 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 942 fCurrentEntity.position++; 943 offset++; 944 } 945 /*** NEWLINE NORMALIZATION ***/ 946 else { 947 newlines++; 948 } 949 } else if (c == '\n') { 950 newlines++; 951 fCurrentEntity.lineNumber++; 952 fCurrentEntity.columnNumber = 1; 953 if (fCurrentEntity.position == fCurrentEntity.count) { 954 offset = 0; 955 fCurrentEntity.position = newlines; 956 if (load(newlines, false, true)) { 957 break; 958 } 959 } 960 } else { 961 fCurrentEntity.position--; 962 break; 963 } 964 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 965 for (int i = offset; i < fCurrentEntity.position; i++) { 966 fCurrentEntity.ch[i] = '\n'; 967 } 968 int length = fCurrentEntity.position - offset; 969 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 970 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 971 //on buffering the data.. 972 content.setValues(fCurrentEntity.ch, offset, length); 973 //content.append(fCurrentEntity.ch, offset, length); 974 if (DEBUG_BUFFER) { 975 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 976 print(); 977 System.out.println(); 978 } 979 return -1; 980 } 981 if (DEBUG_BUFFER) { 982 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 983 print(); 984 System.out.println(); 985 } 986 } 987 988 while (fCurrentEntity.position < fCurrentEntity.count) { 989 c = fCurrentEntity.ch[fCurrentEntity.position++]; 990 if (!XMLChar.isContent(c)) { 991 fCurrentEntity.position--; 992 break; 993 } 994 } 995 int length = fCurrentEntity.position - offset; 996 fCurrentEntity.columnNumber += length - newlines; 997 998 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 999 //on buffering the data.. 1000 content.setValues(fCurrentEntity.ch, offset, length); 1001 //content.append(fCurrentEntity.ch, offset, length); 1002 // return next character 1003 if (fCurrentEntity.position != fCurrentEntity.count) { 1004 c = fCurrentEntity.ch[fCurrentEntity.position]; 1005 // REVISIT: Does this need to be updated to fix the 1006 // #x0D ^#x0A newline normalization problem? -Ac 1007 if (c == '\r' && isExternal) { 1008 c = '\n'; 1009 } 1010 } else { 1011 c = -1; 1012 } 1013 if (DEBUG_BUFFER) { 1014 System.out.print(")scanContent: "); 1015 print(); 1016 System.out.println(" -> '"+(char)c+"'"); 1017 } 1018 return c; 1019 1020 } // scanContent(XMLString):int 1021 1022 /** 1023 * Scans a range of attribute value data, setting the fields of the 1024 * XMLString structure, appropriately. 1025 * <p> 1026 * <strong>Note:</strong> The characters are consumed. 1027 * <p> 1028 * <strong>Note:</strong> This method does not guarantee to return 1029 * the longest run of attribute value data. This method may return 1030 * before the quote character due to reaching the end of the input 1031 * buffer or any other reason. 1032 * <p> 1033 * <strong>Note:</strong> The fields contained in the XMLString 1034 * structure are not guaranteed to remain valid upon subsequent calls 1035 * to the entity scanner. Therefore, the caller is responsible for 1036 * immediately using the returned character data or making a copy of 1037 * the character data. 1038 * 1039 * @param quote The quote character that signifies the end of the 1040 * attribute value data. 1041 * @param content The content structure to fill. 1042 * 1043 * @return Returns the next character on the input, if known. This 1044 * value may be -1 but this does <em>note</em> designate 1045 * end of file. 1046 * 1047 * @throws IOException Thrown if i/o error occurs. 1048 * @throws EOFException Thrown on end of file. 1049 */ 1050 public int scanLiteral(int quote, XMLString content) 1051 throws IOException { 1052 if (DEBUG_BUFFER) { 1053 System.out.print("(scanLiteral, '"+(char)quote+"': "); 1054 print(); 1055 System.out.println(); 1056 } 1057 // load more characters, if needed 1058 if (fCurrentEntity.position == fCurrentEntity.count) { 1059 load(0, true, true); 1060 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1061 invokeListeners(0); 1062 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 1063 load(1, false, false); 1064 fCurrentEntity.position = 0; 1065 } 1066 1067 // normalize newlines 1068 int offset = fCurrentEntity.position; 1069 int c = fCurrentEntity.ch[offset]; 1070 int newlines = 0; 1071 if(whiteSpaceInfoNeeded) 1072 whiteSpaceLen=0; 1073 if (c == '\n' || (c == '\r' && isExternal)) { 1074 if (DEBUG_BUFFER) { 1075 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1076 print(); 1077 System.out.println(); 1078 } 1079 do { 1080 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1081 if (c == '\r' && isExternal) { 1082 newlines++; 1083 fCurrentEntity.lineNumber++; 1084 fCurrentEntity.columnNumber = 1; 1085 if (fCurrentEntity.position == fCurrentEntity.count) { 1086 offset = 0; 1087 fCurrentEntity.position = newlines; 1088 if (load(newlines, false, true)) { 1089 break; 1090 } 1091 } 1092 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1093 fCurrentEntity.position++; 1094 offset++; 1095 } 1096 /*** NEWLINE NORMALIZATION ***/ 1097 else { 1098 newlines++; 1099 } 1100 /***/ 1101 } else if (c == '\n') { 1102 newlines++; 1103 fCurrentEntity.lineNumber++; 1104 fCurrentEntity.columnNumber = 1; 1105 if (fCurrentEntity.position == fCurrentEntity.count) { 1106 offset = 0; 1107 fCurrentEntity.position = newlines; 1108 if (load(newlines, false, true)) { 1109 break; 1110 } 1111 } 1112 /*** NEWLINE NORMALIZATION *** 1113 * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' 1114 * && external) { 1115 * fCurrentEntity.position++; 1116 * offset++; 1117 * } 1118 * /***/ 1119 } else { 1120 fCurrentEntity.position--; 1121 break; 1122 } 1123 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1124 int i=0; 1125 for ( i = offset; i < fCurrentEntity.position; i++) { 1126 fCurrentEntity.ch[i] = '\n'; 1127 storeWhiteSpace(i); 1128 } 1129 1130 int length = fCurrentEntity.position - offset; 1131 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1132 content.setValues(fCurrentEntity.ch, offset, length); 1133 if (DEBUG_BUFFER) { 1134 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1135 print(); 1136 System.out.println(); 1137 } 1138 return -1; 1139 } 1140 if (DEBUG_BUFFER) { 1141 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1142 print(); 1143 System.out.println(); 1144 } 1145 } 1146 1147 // scan literal value 1148 for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) { 1149 c = fCurrentEntity.ch[fCurrentEntity.position]; 1150 if ((c == quote && 1151 (!fCurrentEntity.literal || isExternal)) || 1152 c == '%' || !XMLChar.isContent(c)) { 1153 break; 1154 } 1155 if (whiteSpaceInfoNeeded && c == '\t') { 1156 storeWhiteSpace(fCurrentEntity.position); 1157 } 1158 } 1159 int length = fCurrentEntity.position - offset; 1160 fCurrentEntity.columnNumber += length - newlines; 1161 content.setValues(fCurrentEntity.ch, offset, length); 1162 1163 // return next character 1164 if (fCurrentEntity.position != fCurrentEntity.count) { 1165 c = fCurrentEntity.ch[fCurrentEntity.position]; 1166 // NOTE: We don't want to accidentally signal the 1167 // end of the literal if we're expanding an 1168 // entity appearing in the literal. -Ac 1169 if (c == quote && fCurrentEntity.literal) { 1170 c = -1; 1171 } 1172 } else { 1173 c = -1; 1174 } 1175 if (DEBUG_BUFFER) { 1176 System.out.print(")scanLiteral, '"+(char)quote+"': "); 1177 print(); 1178 System.out.println(" -> '"+(char)c+"'"); 1179 } 1180 return c; 1181 1182 } // scanLiteral(int,XMLString):int 1183 1184 /** 1185 * Save whitespace information. Increase the whitespace buffer by 100 1186 * when needed. 1187 * 1188 * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). 1189 * 1190 * @param whiteSpacePos position of a whitespace in the scanner entity buffer 1191 */ 1192 private void storeWhiteSpace(int whiteSpacePos) { 1193 if (whiteSpaceLen >= whiteSpaceLookup.length) { 1194 int [] tmp = new int[whiteSpaceLookup.length + 100]; 1195 System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); 1196 whiteSpaceLookup = tmp; 1197 } 1198 1199 whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; 1200 } 1201 1202 //CHANGED: 1203 /** 1204 * Scans a range of character data up to the specified delimiter, 1205 * setting the fields of the XMLString structure, appropriately. 1206 * <p> 1207 * <strong>Note:</strong> The characters are consumed. 1208 * <p> 1209 * <strong>Note:</strong> This assumes that the length of the delimiter 1210 * and that the delimiter contains at least one character. 1211 * <p> 1212 * <strong>Note:</strong> This method does not guarantee to return 1213 * the longest run of character data. This method may return before 1214 * the delimiter due to reaching the end of the input buffer or any 1215 * other reason. 1216 * <p> 1217 * @param delimiter The string that signifies the end of the character 1218 * data to be scanned. 1219 * @param buffer The XMLStringBuffer to fill. 1220 * 1221 * @return Returns true if there is more data to scan, false otherwise. 1222 * 1223 * @throws IOException Thrown if i/o error occurs. 1224 * @throws EOFException Thrown on end of file. 1225 */ 1226 public boolean scanData(String delimiter, XMLStringBuffer buffer) 1227 throws IOException { 1228 1229 boolean done = false; 1230 int delimLen = delimiter.length(); 1231 char charAt0 = delimiter.charAt(0); 1232 do { 1233 if (DEBUG_BUFFER) { 1234 System.out.print("(scanData: "); 1235 print(); 1236 System.out.println(); 1237 } 1238 1239 // load more characters, if needed 1240 1241 if (fCurrentEntity.position == fCurrentEntity.count) { 1242 load(0, true, false); 1243 } 1244 1245 boolean bNextEntity = false; 1246 1247 while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) 1248 && (!bNextEntity)) 1249 { 1250 System.arraycopy(fCurrentEntity.ch, 1251 fCurrentEntity.position, 1252 fCurrentEntity.ch, 1253 0, 1254 fCurrentEntity.count - fCurrentEntity.position); 1255 1256 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false); 1257 fCurrentEntity.position = 0; 1258 fCurrentEntity.startPosition = 0; 1259 } 1260 1261 if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { 1262 // something must be wrong with the input: e.g., file ends in an unterminated comment 1263 int length = fCurrentEntity.count - fCurrentEntity.position; 1264 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 1265 fCurrentEntity.columnNumber += fCurrentEntity.count; 1266 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); 1267 fCurrentEntity.position = fCurrentEntity.count; 1268 fCurrentEntity.startPosition = fCurrentEntity.count; 1269 load(0, true, false); 1270 return false; 1271 } 1272 1273 // normalize newlines 1274 int offset = fCurrentEntity.position; 1275 int c = fCurrentEntity.ch[offset]; 1276 int newlines = 0; 1277 if (c == '\n' || (c == '\r' && isExternal)) { 1278 if (DEBUG_BUFFER) { 1279 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1280 print(); 1281 System.out.println(); 1282 } 1283 do { 1284 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1285 if (c == '\r' && isExternal) { 1286 newlines++; 1287 fCurrentEntity.lineNumber++; 1288 fCurrentEntity.columnNumber = 1; 1289 if (fCurrentEntity.position == fCurrentEntity.count) { 1290 offset = 0; 1291 fCurrentEntity.position = newlines; 1292 if (load(newlines, false, true)) { 1293 break; 1294 } 1295 } 1296 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1297 fCurrentEntity.position++; 1298 offset++; 1299 } 1300 /*** NEWLINE NORMALIZATION ***/ 1301 else { 1302 newlines++; 1303 } 1304 } else if (c == '\n') { 1305 newlines++; 1306 fCurrentEntity.lineNumber++; 1307 fCurrentEntity.columnNumber = 1; 1308 if (fCurrentEntity.position == fCurrentEntity.count) { 1309 offset = 0; 1310 fCurrentEntity.position = newlines; 1311 fCurrentEntity.count = newlines; 1312 if (load(newlines, false, true)) { 1313 break; 1314 } 1315 } 1316 } else { 1317 fCurrentEntity.position--; 1318 break; 1319 } 1320 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1321 for (int i = offset; i < fCurrentEntity.position; i++) { 1322 fCurrentEntity.ch[i] = '\n'; 1323 } 1324 int length = fCurrentEntity.position - offset; 1325 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1326 buffer.append(fCurrentEntity.ch, offset, length); 1327 if (DEBUG_BUFFER) { 1328 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1329 print(); 1330 System.out.println(); 1331 } 1332 return true; 1333 } 1334 if (DEBUG_BUFFER) { 1335 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1336 print(); 1337 System.out.println(); 1338 } 1339 } 1340 1341 // iterate over buffer looking for delimiter 1342 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { 1343 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1344 if (c == charAt0) { 1345 // looks like we just hit the delimiter 1346 int delimOffset = fCurrentEntity.position - 1; 1347 for (int i = 1; i < delimLen; i++) { 1348 if (fCurrentEntity.position == fCurrentEntity.count) { 1349 fCurrentEntity.position -= i; 1350 break OUTER; 1351 } 1352 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1353 if (delimiter.charAt(i) != c) { 1354 fCurrentEntity.position -= i; 1355 break; 1356 } 1357 } 1358 if (fCurrentEntity.position == delimOffset + delimLen) { 1359 done = true; 1360 break; 1361 } 1362 } else if (c == '\n' || (isExternal && c == '\r')) { 1363 fCurrentEntity.position--; 1364 break; 1365 } else if (XMLChar.isInvalid(c)) { 1366 fCurrentEntity.position--; 1367 int length = fCurrentEntity.position - offset; 1368 fCurrentEntity.columnNumber += length - newlines; 1369 buffer.append(fCurrentEntity.ch, offset, length); 1370 return true; 1371 } 1372 } 1373 int length = fCurrentEntity.position - offset; 1374 fCurrentEntity.columnNumber += length - newlines; 1375 if (done) { 1376 length -= delimLen; 1377 } 1378 buffer.append(fCurrentEntity.ch, offset, length); 1379 1380 // return true if string was skipped 1381 if (DEBUG_BUFFER) { 1382 System.out.print(")scanData: "); 1383 print(); 1384 System.out.println(" -> " + done); 1385 } 1386 } while (!done); 1387 return !done; 1388 1389 } // scanData(String,XMLString) 1390 1391 /** 1392 * Skips a character appearing immediately on the input. 1393 * <p> 1394 * <strong>Note:</strong> The character is consumed only if it matches 1395 * the specified character. 1396 * 1397 * @param c The character to skip. 1398 * 1399 * @return Returns true if the character was skipped. 1400 * 1401 * @throws IOException Thrown if i/o error occurs. 1402 * @throws EOFException Thrown on end of file. 1403 */ 1404 public boolean skipChar(int c) throws IOException { 1405 if (DEBUG_BUFFER) { 1406 System.out.print("(skipChar, '"+(char)c+"': "); 1407 print(); 1408 System.out.println(); 1409 } 1410 1411 // load more characters, if needed 1412 if (fCurrentEntity.position == fCurrentEntity.count) { 1413 load(0, true, true); 1414 } 1415 1416 // skip character 1417 int cc = fCurrentEntity.ch[fCurrentEntity.position]; 1418 if (cc == c) { 1419 fCurrentEntity.position++; 1420 if (c == '\n') { 1421 fCurrentEntity.lineNumber++; 1422 fCurrentEntity.columnNumber = 1; 1423 } else { 1424 fCurrentEntity.columnNumber++; 1425 } 1426 if (DEBUG_BUFFER) { 1427 System.out.print(")skipChar, '"+(char)c+"': "); 1428 print(); 1429 System.out.println(" -> true"); 1430 } 1431 return true; 1432 } else if (c == '\n' && cc == '\r' && isExternal) { 1433 // handle newlines 1434 if (fCurrentEntity.position == fCurrentEntity.count) { 1435 invokeListeners(1); 1436 fCurrentEntity.ch[0] = (char)cc; 1437 load(1, false, false); 1438 } 1439 fCurrentEntity.position++; 1440 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1441 fCurrentEntity.position++; 1442 } 1443 fCurrentEntity.lineNumber++; 1444 fCurrentEntity.columnNumber = 1; 1445 if (DEBUG_BUFFER) { 1446 System.out.print(")skipChar, '"+(char)c+"': "); 1447 print(); 1448 System.out.println(" -> true"); 1449 } 1450 return true; 1451 } 1452 1453 // character was not skipped 1454 if (DEBUG_BUFFER) { 1455 System.out.print(")skipChar, '"+(char)c+"': "); 1456 print(); 1457 System.out.println(" -> false"); 1458 } 1459 return false; 1460 1461 } // skipChar(int):boolean 1462 1463 public boolean isSpace(char ch){ 1464 return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); 1465 } 1466 /** 1467 * Skips space characters appearing immediately on the input. 1468 * <p> 1469 * <strong>Note:</strong> The characters are consumed only if they are 1470 * space characters. 1471 * 1472 * @return Returns true if at least one space character was skipped. 1473 * 1474 * @throws IOException Thrown if i/o error occurs. 1475 * @throws EOFException Thrown on end of file. 1476 * 1477 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 1478 */ 1479 public boolean skipSpaces() throws IOException { 1480 if (DEBUG_BUFFER) { 1481 System.out.print("(skipSpaces: "); 1482 print(); 1483 System.out.println(); 1484 } 1485 //boolean entityChanged = false; 1486 // load more characters, if needed 1487 if (fCurrentEntity.position == fCurrentEntity.count) { 1488 load(0, true, true); 1489 } 1490 1491 //we are doing this check only in skipSpace() because it is called by 1492 //fMiscDispatcher and we want the parser to exit gracefully when document 1493 //is well-formed. 1494 //it is possible that end of document is reached and 1495 //fCurrentEntity becomes null 1496 //nothing was read so entity changed 'false' should be returned. 1497 if(fCurrentEntity == null){ 1498 return false ; 1499 } 1500 1501 // skip spaces 1502 int c = fCurrentEntity.ch[fCurrentEntity.position]; 1503 if (XMLChar.isSpace(c)) { 1504 do { 1505 boolean entityChanged = false; 1506 // handle newlines 1507 if (c == '\n' || (isExternal && c == '\r')) { 1508 fCurrentEntity.lineNumber++; 1509 fCurrentEntity.columnNumber = 1; 1510 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1511 invokeListeners(0); 1512 fCurrentEntity.ch[0] = (char)c; 1513 entityChanged = load(1, true, false); 1514 if (!entityChanged){ 1515 // the load change the position to be 1, 1516 // need to restore it when entity not changed 1517 fCurrentEntity.position = 0; 1518 }else if(fCurrentEntity == null){ 1519 return true ; 1520 } 1521 } 1522 if (c == '\r' && isExternal) { 1523 // REVISIT: Does this need to be updated to fix the 1524 // #x0D ^#x0A newline normalization problem? -Ac 1525 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 1526 fCurrentEntity.position--; 1527 } 1528 } 1529 } else { 1530 fCurrentEntity.columnNumber++; 1531 } 1532 // load more characters, if needed 1533 if (!entityChanged){ 1534 fCurrentEntity.position++; 1535 } 1536 1537 if (fCurrentEntity.position == fCurrentEntity.count) { 1538 load(0, true, true); 1539 1540 //we are doing this check only in skipSpace() because it is called by 1541 //fMiscDispatcher and we want the parser to exit gracefully when document 1542 //is well-formed. 1543 1544 //it is possible that end of document is reached and 1545 //fCurrentEntity becomes null 1546 //nothing was read so entity changed 'false' should be returned. 1547 if(fCurrentEntity == null){ 1548 return true ; 1549 } 1550 1551 } 1552 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 1553 if (DEBUG_BUFFER) { 1554 System.out.print(")skipSpaces: "); 1555 print(); 1556 System.out.println(" -> true"); 1557 } 1558 return true; 1559 } 1560 1561 // no spaces were found 1562 if (DEBUG_BUFFER) { 1563 System.out.print(")skipSpaces: "); 1564 print(); 1565 System.out.println(" -> false"); 1566 } 1567 return false; 1568 1569 } // skipSpaces():boolean 1570 1571 1572 /** 1573 * @param legnth This function checks that following number of characters are available. 1574 * to the underlying buffer. 1575 * @return This function returns true if capacity asked is available. 1576 */ 1577 public boolean arrangeCapacity(int length) throws IOException{ 1578 return arrangeCapacity(length, false); 1579 } 1580 1581 /** 1582 * @param legnth This function checks that following number of characters are available. 1583 * to the underlying buffer. 1584 * @param if the underlying function should change the entity 1585 * @return This function returns true if capacity asked is available. 1586 * 1587 */ 1588 public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ 1589 //check if the capacity is availble in the current buffer 1590 //count is no. of characters in the buffer [x][m][l] 1591 //position is '0' based 1592 //System.out.println("fCurrent Entity " + fCurrentEntity); 1593 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1594 return true; 1595 } 1596 if(DEBUG_SKIP_STRING){ 1597 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1598 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1599 System.out.println("length = " + length); 1600 } 1601 boolean entityChanged = false; 1602 //load more characters -- this function shouldn't change the entity 1603 while((fCurrentEntity.count - fCurrentEntity.position) < length){ 1604 if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ 1605 invokeListeners(0); 1606 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); 1607 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; 1608 fCurrentEntity.position = 0; 1609 } 1610 1611 if((fCurrentEntity.count - fCurrentEntity.position) < length){ 1612 int pos = fCurrentEntity.position; 1613 invokeListeners(pos); 1614 entityChanged = load(fCurrentEntity.count, changeEntity, false); 1615 fCurrentEntity.position = pos; 1616 if(entityChanged)break; 1617 } 1618 if(DEBUG_SKIP_STRING){ 1619 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1620 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1621 System.out.println("length = " + length); 1622 } 1623 } 1624 //load changes the position.. set it back to the point where we started. 1625 1626 //after loading check again. 1627 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1628 return true; 1629 } else { 1630 return false; 1631 } 1632 } 1633 1634 /** 1635 * Skips the specified string appearing immediately on the input. 1636 * <p> 1637 * <strong>Note:</strong> The characters are consumed only if all 1638 * the characters are skipped. 1639 * 1640 * @param s The string to skip. 1641 * 1642 * @return Returns true if the string was skipped. 1643 * 1644 * @throws IOException Thrown if i/o error occurs. 1645 * @throws EOFException Thrown on end of file. 1646 */ 1647 public boolean skipString(String s) throws IOException { 1648 1649 final int length = s.length(); 1650 1651 //first make sure that required capacity is avaible 1652 if(arrangeCapacity(length, false)){ 1653 final int beforeSkip = fCurrentEntity.position ; 1654 int afterSkip = fCurrentEntity.position + length - 1 ; 1655 if(DEBUG_SKIP_STRING){ 1656 System.out.println("skipString,length = " + s + "," + length); 1657 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); 1658 } 1659 1660 //s.charAt() indexes are 0 to 'Length -1' based. 1661 int i = length - 1 ; 1662 //check from reverse 1663 while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ 1664 if(afterSkip-- == beforeSkip){ 1665 fCurrentEntity.position = fCurrentEntity.position + length ; 1666 fCurrentEntity.columnNumber += length; 1667 return true; 1668 } 1669 } 1670 } 1671 1672 return false; 1673 } // skipString(String):boolean 1674 1675 public boolean skipString(char [] s) throws IOException { 1676 1677 final int length = s.length; 1678 //first make sure that required capacity is avaible 1679 if(arrangeCapacity(length, false)){ 1680 int beforeSkip = fCurrentEntity.position ; 1681 int afterSkip = fCurrentEntity.position + length ; 1682 1683 if(DEBUG_SKIP_STRING){ 1684 System.out.println("skipString,length = " + new String(s) + "," + length); 1685 System.out.println("skipString,length = " + new String(s) + "," + length); 1686 } 1687 1688 for(int i=0;i<length;i++){ 1689 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ 1690 return false; 1691 } 1692 } 1693 fCurrentEntity.position = fCurrentEntity.position + length ; 1694 fCurrentEntity.columnNumber += length; 1695 return true; 1696 1697 } 1698 1699 return false; 1700 } 1701 1702 // 1703 // Locator methods 1704 // 1705 // 1706 // Private methods 1707 // 1708 1709 /** 1710 * Loads a chunk of text. 1711 * 1712 * @param offset The offset into the character buffer to 1713 * read the next batch of characters. 1714 * @param changeEntity True if the load should change entities 1715 * at the end of the entity, otherwise leave 1716 * the current entity in place and the entity 1717 * boundary will be signaled by the return 1718 * value. 1719 * @param notify Determine whether to notify listeners of 1720 * the event 1721 * 1722 * @returns Returns true if the entity changed as a result of this 1723 * load operation. 1724 */ 1725 final boolean load(int offset, boolean changeEntity, boolean notify) 1726 throws IOException { 1727 if (DEBUG_BUFFER) { 1728 System.out.print("(load, "+offset+": "); 1729 print(); 1730 System.out.println(); 1731 } 1732 if (notify) { 1733 invokeListeners(offset); 1734 } 1735 //maintaing the count till last load 1736 fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; 1737 // read characters 1738 int length = fCurrentEntity.ch.length - offset; 1739 if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { 1740 length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; 1741 } 1742 if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); 1743 int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); 1744 if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); 1745 1746 // reset count and position 1747 boolean entityChanged = false; 1748 if (count != -1) { 1749 if (count != 0) { 1750 // record the last count 1751 fCurrentEntity.fLastCount = count; 1752 fCurrentEntity.count = count + offset; 1753 fCurrentEntity.position = offset; 1754 } 1755 } 1756 // end of this entity 1757 else { 1758 fCurrentEntity.count = offset; 1759 fCurrentEntity.position = offset; 1760 entityChanged = true; 1761 1762 if (changeEntity) { 1763 //notify the entity manager about the end of entity 1764 fEntityManager.endEntity(); 1765 //return if the current entity becomes null 1766 if(fCurrentEntity == null){ 1767 throw END_OF_DOCUMENT_ENTITY; 1768 } 1769 // handle the trailing edges 1770 if (fCurrentEntity.position == fCurrentEntity.count) { 1771 load(0, true, false); 1772 } 1773 } 1774 1775 } 1776 if (DEBUG_BUFFER) { 1777 System.out.print(")load, "+offset+": "); 1778 print(); 1779 System.out.println(); 1780 } 1781 1782 return entityChanged; 1783 1784 } // load(int, boolean):boolean 1785 1786 /** 1787 * Creates a reader capable of reading the given input stream in 1788 * the specified encoding. 1789 * 1790 * @param inputStream The input stream. 1791 * @param encoding The encoding name that the input stream is 1792 * encoded using. If the user has specified that 1793 * Java encoding names are allowed, then the 1794 * encoding name may be a Java encoding name; 1795 * otherwise, it is an ianaEncoding name. 1796 * @param isBigEndian For encodings (like uCS-4), whose names cannot 1797 * specify a byte order, this tells whether the order is bigEndian. null menas 1798 * unknown or not relevant. 1799 * 1800 * @return Returns a reader. 1801 */ 1802 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 1803 throws IOException { 1804 1805 // normalize encoding name 1806 if (encoding == null) { 1807 encoding = "UTF-8"; 1808 } 1809 1810 // try to use an optimized reader 1811 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 1812 if (ENCODING.equals("UTF-8")) { 1813 if (DEBUG_ENCODINGS) { 1814 System.out.println("$$$ creating UTF8Reader"); 1815 } 1816 return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 1817 } 1818 if (ENCODING.equals("US-ASCII")) { 1819 if (DEBUG_ENCODINGS) { 1820 System.out.println("$$$ creating ASCIIReader"); 1821 } 1822 return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1823 } 1824 if(ENCODING.equals("ISO-10646-UCS-4")) { 1825 if(isBigEndian != null) { 1826 boolean isBE = isBigEndian.booleanValue(); 1827 if(isBE) { 1828 return new UCSReader(inputStream, UCSReader.UCS4BE); 1829 } else { 1830 return new UCSReader(inputStream, UCSReader.UCS4LE); 1831 } 1832 } else { 1833 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1834 "EncodingByteOrderUnsupported", 1835 new Object[] { encoding }, 1836 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1837 } 1838 } 1839 if(ENCODING.equals("ISO-10646-UCS-2")) { 1840 if(isBigEndian != null) { // sould never happen with this encoding... 1841 boolean isBE = isBigEndian.booleanValue(); 1842 if(isBE) { 1843 return new UCSReader(inputStream, UCSReader.UCS2BE); 1844 } else { 1845 return new UCSReader(inputStream, UCSReader.UCS2LE); 1846 } 1847 } else { 1848 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1849 "EncodingByteOrderUnsupported", 1850 new Object[] { encoding }, 1851 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1852 } 1853 } 1854 1855 // check for valid name 1856 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 1857 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 1858 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 1859 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1860 "EncodingDeclInvalid", 1861 new Object[] { encoding }, 1862 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1863 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 1864 // because every byte is a valid ISO Latin 1 character. 1865 // It may not translate correctly but if we failed on 1866 // the encoding anyway, then we're expecting the content 1867 // of the document to be bad. This will just prevent an 1868 // invalid UTF-8 sequence to be detected. This is only 1869 // important when continue-after-fatal-error is turned 1870 // on. -Ac 1871 encoding = "ISO-8859-1"; 1872 } 1873 1874 // try to use a Java reader 1875 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 1876 if (javaEncoding == null) { 1877 if(fAllowJavaEncodings) { 1878 javaEncoding = encoding; 1879 } else { 1880 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1881 "EncodingDeclInvalid", 1882 new Object[] { encoding }, 1883 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1884 // see comment above. 1885 javaEncoding = "ISO8859_1"; 1886 } 1887 } 1888 else if (javaEncoding.equals("ASCII")) { 1889 if (DEBUG_ENCODINGS) { 1890 System.out.println("$$$ creating ASCIIReader"); 1891 } 1892 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1893 } 1894 1895 if (DEBUG_ENCODINGS) { 1896 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 1897 if (javaEncoding == encoding) { 1898 System.out.print(" (IANA encoding)"); 1899 } 1900 System.out.println(); 1901 } 1902 return new InputStreamReader(inputStream, javaEncoding); 1903 1904 } // createReader(InputStream,String, Boolean): Reader 1905 1906 /** 1907 * Returns the IANA encoding name that is auto-detected from 1908 * the bytes specified, with the endian-ness of that encoding where appropriate. 1909 * 1910 * @param b4 The first four bytes of the input. 1911 * @param count The number of bytes actually read. 1912 * @return a 2-element array: the first element, an IANA-encoding string, 1913 * the second element a Boolean which is true iff the document is big endian, false 1914 * if it's little-endian, and null if the distinction isn't relevant. 1915 */ 1916 protected Object[] getEncodingName(byte[] b4, int count) { 1917 1918 if (count < 2) { 1919 return new Object[]{"UTF-8", null}; 1920 } 1921 1922 // UTF-16, with BOM 1923 int b0 = b4[0] & 0xFF; 1924 int b1 = b4[1] & 0xFF; 1925 if (b0 == 0xFE && b1 == 0xFF) { 1926 // UTF-16, big-endian 1927 return new Object [] {"UTF-16BE", new Boolean(true)}; 1928 } 1929 if (b0 == 0xFF && b1 == 0xFE) { 1930 // UTF-16, little-endian 1931 return new Object [] {"UTF-16LE", new Boolean(false)}; 1932 } 1933 1934 // default to UTF-8 if we don't have enough bytes to make a 1935 // good determination of the encoding 1936 if (count < 3) { 1937 return new Object [] {"UTF-8", null}; 1938 } 1939 1940 // UTF-8 with a BOM 1941 int b2 = b4[2] & 0xFF; 1942 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 1943 return new Object [] {"UTF-8", null}; 1944 } 1945 1946 // default to UTF-8 if we don't have enough bytes to make a 1947 // good determination of the encoding 1948 if (count < 4) { 1949 return new Object [] {"UTF-8", null}; 1950 } 1951 1952 // other encodings 1953 int b3 = b4[3] & 0xFF; 1954 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 1955 // UCS-4, big endian (1234) 1956 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 1957 } 1958 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 1959 // UCS-4, little endian (4321) 1960 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 1961 } 1962 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 1963 // UCS-4, unusual octet order (2143) 1964 // REVISIT: What should this be? 1965 return new Object [] {"ISO-10646-UCS-4", null}; 1966 } 1967 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 1968 // UCS-4, unusual octect order (3412) 1969 // REVISIT: What should this be? 1970 return new Object [] {"ISO-10646-UCS-4", null}; 1971 } 1972 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 1973 // UTF-16, big-endian, no BOM 1974 // (or could turn out to be UCS-2... 1975 // REVISIT: What should this be? 1976 return new Object [] {"UTF-16BE", new Boolean(true)}; 1977 } 1978 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 1979 // UTF-16, little-endian, no BOM 1980 // (or could turn out to be UCS-2... 1981 return new Object [] {"UTF-16LE", new Boolean(false)}; 1982 } 1983 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 1984 // EBCDIC 1985 // a la xerces1, return CP037 instead of EBCDIC here 1986 return new Object [] {"CP037", null}; 1987 } 1988 1989 // default encoding 1990 return new Object [] {"UTF-8", null}; 1991 1992 } // getEncodingName(byte[],int):Object[] 1993 1994 /** 1995 * xxx not removing endEntity() so that i remember that we need to implement it. 1996 * Ends an entity. 1997 * 1998 * @throws XNIException Thrown by entity handler to signal an error. 1999 */ 2000 // 2001 /** Prints the contents of the buffer. */ 2002 final void print() { 2003 if (DEBUG_BUFFER) { 2004 if (fCurrentEntity != null) { 2005 System.out.print('['); 2006 System.out.print(fCurrentEntity.count); 2007 System.out.print(' '); 2008 System.out.print(fCurrentEntity.position); 2009 if (fCurrentEntity.count > 0) { 2010 System.out.print(" \""); 2011 for (int i = 0; i < fCurrentEntity.count; i++) { 2012 if (i == fCurrentEntity.position) { 2013 System.out.print('^'); 2014 } 2015 char c = fCurrentEntity.ch[i]; 2016 switch (c) { 2017 case '\n': { 2018 System.out.print("\\n"); 2019 break; 2020 } 2021 case '\r': { 2022 System.out.print("\\r"); 2023 break; 2024 } 2025 case '\t': { 2026 System.out.print("\\t"); 2027 break; 2028 } 2029 case '\\': { 2030 System.out.print("\\\\"); 2031 break; 2032 } 2033 default: { 2034 System.out.print(c); 2035 } 2036 } 2037 } 2038 if (fCurrentEntity.position == fCurrentEntity.count) { 2039 System.out.print('^'); 2040 } 2041 System.out.print('"'); 2042 } 2043 System.out.print(']'); 2044 System.out.print(" @ "); 2045 System.out.print(fCurrentEntity.lineNumber); 2046 System.out.print(','); 2047 System.out.print(fCurrentEntity.columnNumber); 2048 } else { 2049 System.out.print("*NO CURRENT ENTITY*"); 2050 } 2051 } 2052 } 2053 2054 /** 2055 * Registers the listener object and provides callback. 2056 * @param listener listener to which call back should be provided when scanner buffer 2057 * is being changed. 2058 */ 2059 public void registerListener(XMLBufferListener listener) { 2060 if(!listeners.contains(listener)) 2061 listeners.add(listener); 2062 } 2063 2064 /** 2065 * 2066 * @param loadPos Starting position from which new data is being loaded into scanner buffer. 2067 */ 2068 public void invokeListeners(int loadPos){ 2069 for(int i=0;i<listeners.size();i++){ 2070 XMLBufferListener listener =(XMLBufferListener) listeners.get(i); 2071 listener.refresh(loadPos); 2072 } 2073 } 2074 2075 /** 2076 * Skips space characters appearing immediately on the input that would 2077 * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 2078 * normalization is performed. This is useful when scanning structures 2079 * such as the XMLDecl and TextDecl that can only contain US-ASCII 2080 * characters. 2081 * <p> 2082 * <strong>Note:</strong> The characters are consumed only if they would 2083 * match non-terminal S before end of line normalization is performed. 2084 * 2085 * @return Returns true if at least one space character was skipped. 2086 * 2087 * @throws IOException Thrown if i/o error occurs. 2088 * @throws EOFException Thrown on end of file. 2089 * 2090 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 2091 */ 2092 public final boolean skipDeclSpaces() throws IOException { 2093 if (DEBUG_BUFFER) { 2094 System.out.print("(skipDeclSpaces: "); 2095 //XMLEntityManager.print(fCurrentEntity); 2096 System.out.println(); 2097 } 2098 2099 // load more characters, if needed 2100 if (fCurrentEntity.position == fCurrentEntity.count) { 2101 load(0, true, false); 2102 } 2103 2104 // skip spaces 2105 int c = fCurrentEntity.ch[fCurrentEntity.position]; 2106 if (XMLChar.isSpace(c)) { 2107 boolean external = fCurrentEntity.isExternal(); 2108 do { 2109 boolean entityChanged = false; 2110 // handle newlines 2111 if (c == '\n' || (external && c == '\r')) { 2112 fCurrentEntity.lineNumber++; 2113 fCurrentEntity.columnNumber = 1; 2114 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 2115 fCurrentEntity.ch[0] = (char)c; 2116 entityChanged = load(1, true, false); 2117 if (!entityChanged) 2118 // the load change the position to be 1, 2119 // need to restore it when entity not changed 2120 fCurrentEntity.position = 0; 2121 } 2122 if (c == '\r' && external) { 2123 // REVISIT: Does this need to be updated to fix the 2124 // #x0D ^#x0A newline normalization problem? -Ac 2125 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 2126 fCurrentEntity.position--; 2127 } 2128 } 2129 /*** NEWLINE NORMALIZATION *** 2130 * else { 2131 * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' 2132 * && external) { 2133 * fCurrentEntity.position++; 2134 * } 2135 * } 2136 * /***/ 2137 } else { 2138 fCurrentEntity.columnNumber++; 2139 } 2140 // load more characters, if needed 2141 if (!entityChanged) 2142 fCurrentEntity.position++; 2143 if (fCurrentEntity.position == fCurrentEntity.count) { 2144 load(0, true, false); 2145 } 2146 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 2147 if (DEBUG_BUFFER) { 2148 System.out.print(")skipDeclSpaces: "); 2149 // XMLEntityManager.print(fCurrentEntity); 2150 System.out.println(" -> true"); 2151 } 2152 return true; 2153 } 2154 2155 // no spaces were found 2156 if (DEBUG_BUFFER) { 2157 System.out.print(")skipDeclSpaces: "); 2158 //XMLEntityManager.print(fCurrentEntity); 2159 System.out.println(" -> false"); 2160 } 2161 return false; 2162 2163 } // skipDeclSpaces():boolean 2164 2165 2166 } // class XMLEntityScanner