1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 24 25 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 26 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 27 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 29 import com.sun.org.apache.xerces.internal.util.EncodingMap; 30 import com.sun.org.apache.xerces.internal.util.SymbolTable; 31 import com.sun.org.apache.xerces.internal.util.XMLChar; 32 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 33 import com.sun.org.apache.xerces.internal.xni.*; 34 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 35 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 36 import com.sun.xml.internal.stream.Entity; 37 import com.sun.xml.internal.stream.XMLBufferListener; 38 import java.io.EOFException; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.InputStreamReader; 42 import java.io.Reader; 43 import java.util.Locale; 44 import java.util.Vector; 45 46 /** 47 * Implements the entity scanner methods. 48 * 49 * @author Neeraj Bajaj, Sun Microsystems 50 * @author Andy Clark, IBM 51 * @author Arnaud Le Hors, IBM 52 * @author K.Venugopal Sun Microsystems 53 * 54 */ 55 public class XMLEntityScanner implements XMLLocator { 56 57 58 protected Entity.ScannedEntity fCurrentEntity = null ; 59 protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; 60 61 protected XMLEntityManager fEntityManager ; 62 63 /** Debug switching readers for encodings. */ 64 private static final boolean DEBUG_ENCODINGS = false; 65 /** Listeners which should know when load is being called */ 66 private Vector listeners = new Vector(); 67 68 private static final boolean [] VALID_NAMES = new boolean[127]; 69 70 /** 71 * Debug printing of buffer. This debugging flag works best when you 72 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 73 * 64 characters. 74 */ 75 private static final boolean DEBUG_BUFFER = false; 76 private static final boolean DEBUG_SKIP_STRING = false; 77 /** 78 * To signal the end of the document entity, this exception will be thrown. 79 */ 80 private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { 81 private static final long serialVersionUID = 980337771224675268L; 82 public Throwable fillInStackTrace() { 83 return this; 84 } 85 }; 86 87 protected SymbolTable fSymbolTable = null; 88 protected XMLErrorReporter fErrorReporter = null; 89 int [] whiteSpaceLookup = new int[100]; 90 int whiteSpaceLen = 0; 91 boolean whiteSpaceInfoNeeded = true; 92 93 /** 94 * Allow Java encoding names. This feature identifier is: 95 * http://apache.org/xml/features/allow-java-encodings 96 */ 97 protected boolean fAllowJavaEncodings; 98 99 //Will be used only during internal subsets. 100 //for appending data. 101 102 /** Property identifier: symbol table. */ 103 protected static final String SYMBOL_TABLE = 104 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 105 106 /** Property identifier: error reporter. */ 107 protected static final String ERROR_REPORTER = 108 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 109 110 /** Feature identifier: allow Java encodings. */ 111 protected static final String ALLOW_JAVA_ENCODINGS = 112 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 113 114 protected PropertyManager fPropertyManager = null ; 115 116 boolean isExternal = false; 117 static { 118 119 for(int i=0x0041;i<=0x005A ; i++){ 120 VALID_NAMES[i]=true; 121 } 122 for(int i=0x0061;i<=0x007A; i++){ 123 VALID_NAMES[i]=true; 124 } 125 for(int i=0x0030;i<=0x0039; i++){ 126 VALID_NAMES[i]=true; 127 } 128 VALID_NAMES[45]=true; 129 VALID_NAMES[46]=true; 130 VALID_NAMES[58]=true; 131 VALID_NAMES[95]=true; 132 } 133 // SAPJVM: Remember, that the XML version has explicitly been set, 134 // so that XMLStreamReader.getVersion() can find that out. 135 boolean xmlVersionSetExplicitly = false; 136 // 137 // Constructors 138 // 139 140 /** Default constructor. */ 141 public XMLEntityScanner() { 142 } // <init>() 143 144 145 /** private constructor, this class can only be instantiated within this class. Instance of this class should 146 * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) 147 * @see getEntityScanner() 148 * @see getEntityScanner(ScannedEntity) 149 */ 150 public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { 151 fEntityManager = entityManager ; 152 reset(propertyManager); 153 } // <init>() 154 155 156 // set buffer size: 157 public final void setBufferSize(int size) { 158 // REVISIT: Buffer size passed to entity scanner 159 // was not being kept in synch with the actual size 160 // of the buffers in each scanned entity. If any 161 // of the buffers were actually resized, it was possible 162 // that the parser would throw an ArrayIndexOutOfBoundsException 163 // for documents which contained names which are longer than 164 // the current buffer size. Conceivably the buffer size passed 165 // to entity scanner could be used to determine a minimum size 166 // for resizing, if doubling its size is smaller than this 167 // minimum. -- mrglavas 168 fBufferSize = size; 169 } 170 171 /** 172 * Resets the components. 173 */ 174 public void reset(PropertyManager propertyManager){ 175 fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; 176 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; 177 fCurrentEntity = null; 178 whiteSpaceLen = 0; 179 whiteSpaceInfoNeeded = true; 180 listeners.clear(); 181 } 182 183 /** 184 * Resets the component. The component can query the component manager 185 * about any features and properties that affect the operation of the 186 * component. 187 * 188 * @param componentManager The component manager. 189 * 190 * @throws SAXException Thrown by component on initialization error. 191 * For example, if a feature or property is 192 * required for the operation of the component, the 193 * component manager may throw a 194 * SAXNotRecognizedException or a 195 * SAXNotSupportedException. 196 */ 197 public void reset(XMLComponentManager componentManager) 198 throws XMLConfigurationException { 199 200 //System.out.println(" this is being called"); 201 // xerces features 202 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 203 204 //xerces properties 205 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 206 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 207 fCurrentEntity = null; 208 whiteSpaceLen = 0; 209 whiteSpaceInfoNeeded = true; 210 listeners.clear(); 211 } // reset(XMLComponentManager) 212 213 214 public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, 215 XMLErrorReporter reporter) { 216 fCurrentEntity = null; 217 fSymbolTable = symbolTable; 218 fEntityManager = entityManager; 219 fErrorReporter = reporter; 220 } 221 222 /** 223 * Returns the XML version of the current entity. This will normally be the 224 * value from the XML or text declaration or defaulted by the parser. Note that 225 * that this value may be different than the version of the processing rules 226 * applied to the current entity. For instance, an XML 1.1 document may refer to 227 * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire 228 * document. Also note that, for a given entity, this value can only be considered 229 * final once the XML or text declaration has been read or once it has been 230 * determined that there is no such declaration. 231 */ 232 public final String getXMLVersion() { 233 if (fCurrentEntity != null) { 234 return fCurrentEntity.xmlVersion; 235 } 236 return null; 237 } // getXMLVersion():String 238 239 /** 240 * Sets the XML version. This method is used by the 241 * scanners to report the value of the version pseudo-attribute 242 * in an XML or text declaration. 243 * 244 * @param xmlVersion the XML version of the current entity 245 */ 246 public final void setXMLVersion(String xmlVersion) { 247 xmlVersionSetExplicitly = true; // SAPJVM 248 fCurrentEntity.xmlVersion = xmlVersion; 249 } // setXMLVersion(String) 250 251 252 /** set the instance of current scanned entity. 253 * @param ScannedEntity 254 */ 255 256 public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ 257 fCurrentEntity = scannedEntity ; 258 if(fCurrentEntity != null){ 259 isExternal = fCurrentEntity.isExternal(); 260 if(DEBUG_BUFFER) 261 System.out.println("Current Entity is "+scannedEntity.name); 262 } 263 } 264 265 public Entity.ScannedEntity getCurrentEntity(){ 266 return fCurrentEntity ; 267 } 268 // 269 // XMLEntityReader methods 270 // 271 272 /** 273 * Returns the base system identifier of the currently scanned 274 * entity, or null if none is available. 275 */ 276 public final String getBaseSystemId() { 277 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 278 } // getBaseSystemId():String 279 280 /** 281 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) 282 */ 283 public void setBaseSystemId(String systemId) { 284 //no-op 285 } 286 287 ///////////// Locator methods start. 288 public final int getLineNumber(){ 289 //if the entity is closed, we should return -1 290 //xxx at first place why such call should be there... 291 return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; 292 } 293 294 /** 295 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) 296 */ 297 public void setLineNumber(int line) { 298 //no-op 299 } 300 301 302 public final int getColumnNumber(){ 303 //if the entity is closed, we should return -1 304 //xxx at first place why such call should be there... 305 return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; 306 } 307 308 /** 309 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) 310 */ 311 public void setColumnNumber(int col) { 312 // no-op 313 } 314 315 316 public final int getCharacterOffset(){ 317 return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; 318 } 319 320 /** Returns the expanded system identifier. */ 321 public final String getExpandedSystemId() { 322 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 323 } 324 325 /** 326 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) 327 */ 328 public void setExpandedSystemId(String systemId) { 329 //no-op 330 } 331 332 /** Returns the literal system identifier. */ 333 public final String getLiteralSystemId() { 334 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; 335 } 336 337 /** 338 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) 339 */ 340 public void setLiteralSystemId(String systemId) { 341 //no-op 342 } 343 344 /** Returns the public identifier. */ 345 public final String getPublicId() { 346 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 347 } 348 349 /** 350 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) 351 */ 352 public void setPublicId(String publicId) { 353 //no-op 354 } 355 356 ///////////////// Locator methods finished. 357 358 /** the version of the current entity being scanned */ 359 public void setVersion(String version){ 360 fCurrentEntity.version = version; 361 } 362 363 public String getVersion(){ 364 if (fCurrentEntity != null) 365 return fCurrentEntity.version ; 366 return null; 367 } 368 369 /** 370 * Returns the encoding of the current entity. 371 * Note that, for a given entity, this value can only be 372 * considered final once the encoding declaration has been read (or once it 373 * has been determined that there is no such declaration) since, no encoding 374 * having been specified on the XMLInputSource, the parser 375 * will make an initial "guess" which could be in error. 376 */ 377 public final String getEncoding() { 378 if (fCurrentEntity != null) { 379 return fCurrentEntity.encoding; 380 } 381 return null; 382 } // getEncoding():String 383 384 /** 385 * Sets the encoding of the scanner. This method is used by the 386 * scanners if the XMLDecl or TextDecl line contains an encoding 387 * pseudo-attribute. 388 * <p> 389 * <strong>Note:</strong> The underlying character reader on the 390 * current entity will be changed to accomodate the new encoding. 391 * However, the new encoding is ignored if the current reader was 392 * not constructed from an input stream (e.g. an external entity 393 * that is resolved directly to the appropriate java.io.Reader 394 * object). 395 * 396 * @param encoding The IANA encoding name of the new encoding. 397 * 398 * @throws IOException Thrown if the new encoding is not supported. 399 * 400 * @see com.sun.org.apache.xerces.internal.util.EncodingMap 401 */ 402 public final void setEncoding(String encoding) throws IOException { 403 404 if (DEBUG_ENCODINGS) { 405 System.out.println("$$$ setEncoding: "+encoding); 406 } 407 408 if (fCurrentEntity.stream != null) { 409 // if the encoding is the same, don't change the reader and 410 // re-use the original reader used by the OneCharReader 411 // NOTE: Besides saving an object, this overcomes deficiencies 412 // in the UTF-16 reader supplied with the standard Java 413 // distribution (up to and including 1.3). The UTF-16 414 // decoder buffers 8K blocks even when only asked to read 415 // a single char! -Ac 416 if (fCurrentEntity.encoding == null || 417 !fCurrentEntity.encoding.equals(encoding)) { 418 // UTF-16 is a bit of a special case. If the encoding is UTF-16, 419 // and we know the endian-ness, we shouldn't change readers. 420 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce 421 // the endian-ness from the encoding we presently have. 422 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { 423 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 424 if(ENCODING.equals("UTF-16")) return; 425 if(ENCODING.equals("ISO-10646-UCS-4")) { 426 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 427 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); 428 } else { 429 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); 430 } 431 return; 432 } 433 if(ENCODING.equals("ISO-10646-UCS-2")) { 434 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 435 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); 436 } else { 437 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); 438 } 439 return; 440 } 441 } 442 // wrap a new reader around the input stream, changing 443 // the encoding 444 if (DEBUG_ENCODINGS) { 445 System.out.println("$$$ creating new reader from stream: "+ 446 fCurrentEntity.stream); 447 } 448 //fCurrentEntity.stream.reset(); 449 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); 450 fCurrentEntity.encoding = encoding; 451 452 } else { 453 if (DEBUG_ENCODINGS) 454 System.out.println("$$$ reusing old reader on stream"); 455 } 456 } 457 458 } // setEncoding(String) 459 460 /** Returns true if the current entity being scanned is external. */ 461 public final boolean isExternal() { 462 return fCurrentEntity.isExternal(); 463 } // isExternal():boolean 464 465 public int getChar(int relative) throws IOException{ 466 if(arrangeCapacity(relative + 1, false)){ 467 return fCurrentEntity.ch[fCurrentEntity.position + relative]; 468 }else{ 469 return -1; 470 } 471 }//getChar() 472 473 /** 474 * Returns the next character on the input. 475 * <p> 476 * <strong>Note:</strong> The character is <em>not</em> consumed. 477 * 478 * @throws IOException Thrown if i/o error occurs. 479 * @throws EOFException Thrown on end of file. 480 */ 481 public int peekChar() throws IOException { 482 if (DEBUG_BUFFER) { 483 System.out.print("(peekChar: "); 484 print(); 485 System.out.println(); 486 } 487 488 // load more characters, if needed 489 if (fCurrentEntity.position == fCurrentEntity.count) { 490 load(0, true, true); 491 } 492 493 // peek at character 494 int c = fCurrentEntity.ch[fCurrentEntity.position]; 495 496 // return peeked character 497 if (DEBUG_BUFFER) { 498 System.out.print(")peekChar: "); 499 print(); 500 if (isExternal) { 501 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); 502 } else { 503 System.out.println(" -> '"+(char)c+"'"); 504 } 505 } 506 if (isExternal) { 507 return c != '\r' ? c : '\n'; 508 } else { 509 return c; 510 } 511 512 } // peekChar():int 513 514 /** 515 * Returns the next character on the input. 516 * <p> 517 * <strong>Note:</strong> The character is consumed. 518 * 519 * @throws IOException Thrown if i/o error occurs. 520 * @throws EOFException Thrown on end of file. 521 */ 522 public int scanChar() throws IOException { 523 if (DEBUG_BUFFER) { 524 System.out.print("(scanChar: "); 525 print(); 526 System.out.println(); 527 } 528 529 // load more characters, if needed 530 if (fCurrentEntity.position == fCurrentEntity.count) { 531 load(0, true, true); 532 } 533 534 // scan character 535 int c = fCurrentEntity.ch[fCurrentEntity.position++]; 536 if (c == '\n' || 537 (c == '\r' && isExternal)) { 538 fCurrentEntity.lineNumber++; 539 fCurrentEntity.columnNumber = 1; 540 if (fCurrentEntity.position == fCurrentEntity.count) { 541 fCurrentEntity.ch[0] = (char)c; 542 load(1, false, true); 543 } 544 if (c == '\r' && isExternal) { 545 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { 546 fCurrentEntity.position--; 547 } 548 c = '\n'; 549 } 550 } 551 552 // return character that was scanned 553 if (DEBUG_BUFFER) { 554 System.out.print(")scanChar: "); 555 print(); 556 System.out.println(" -> '"+(char)c+"'"); 557 } 558 fCurrentEntity.columnNumber++; 559 return c; 560 561 } // scanChar():int 562 563 /** 564 * Returns a string matching the NMTOKEN production appearing immediately 565 * on the input as a symbol, or null if NMTOKEN Name string is present. 566 * <p> 567 * <strong>Note:</strong> The NMTOKEN characters are consumed. 568 * <p> 569 * <strong>Note:</strong> The string returned must be a symbol. The 570 * SymbolTable can be used for this purpose. 571 * 572 * @throws IOException Thrown if i/o error occurs. 573 * @throws EOFException Thrown on end of file. 574 * 575 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 576 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 577 */ 578 public String scanNmtoken() throws IOException { 579 if (DEBUG_BUFFER) { 580 System.out.print("(scanNmtoken: "); 581 print(); 582 System.out.println(); 583 } 584 585 // load more characters, if needed 586 if (fCurrentEntity.position == fCurrentEntity.count) { 587 load(0, true, true); 588 } 589 590 // scan nmtoken 591 int offset = fCurrentEntity.position; 592 boolean vc = false; 593 char c; 594 while (true){ 595 //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { 596 c = fCurrentEntity.ch[fCurrentEntity.position]; 597 if(c < 127){ 598 vc = VALID_NAMES[c]; 599 }else{ 600 vc = XMLChar.isName(c); 601 } 602 if(!vc)break; 603 604 if (++fCurrentEntity.position == fCurrentEntity.count) { 605 int length = fCurrentEntity.position - offset; 606 invokeListeners(length); 607 if (length == fCurrentEntity.fBufferSize) { 608 // bad luck we have to resize our buffer 609 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 610 System.arraycopy(fCurrentEntity.ch, offset, 611 tmp, 0, length); 612 fCurrentEntity.ch = tmp; 613 fCurrentEntity.fBufferSize *= 2; 614 } else { 615 System.arraycopy(fCurrentEntity.ch, offset, 616 fCurrentEntity.ch, 0, length); 617 } 618 offset = 0; 619 if (load(length, false, false)) { 620 break; 621 } 622 } 623 } 624 int length = fCurrentEntity.position - offset; 625 fCurrentEntity.columnNumber += length; 626 627 // return nmtoken 628 String symbol = null; 629 if (length > 0) { 630 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 631 } 632 if (DEBUG_BUFFER) { 633 System.out.print(")scanNmtoken: "); 634 print(); 635 System.out.println(" -> "+String.valueOf(symbol)); 636 } 637 return symbol; 638 639 } // scanNmtoken():String 640 641 /** 642 * Returns a string matching the Name production appearing immediately 643 * on the input as a symbol, or null if no Name string is present. 644 * <p> 645 * <strong>Note:</strong> The Name characters are consumed. 646 * <p> 647 * <strong>Note:</strong> The string returned must be a symbol. The 648 * SymbolTable can be used for this purpose. 649 * 650 * @throws IOException Thrown if i/o error occurs. 651 * @throws EOFException Thrown on end of file. 652 * 653 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 654 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 655 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 656 */ 657 public String scanName() throws IOException { 658 if (DEBUG_BUFFER) { 659 System.out.print("(scanName: "); 660 print(); 661 System.out.println(); 662 } 663 664 // load more characters, if needed 665 if (fCurrentEntity.position == fCurrentEntity.count) { 666 load(0, true, true); 667 } 668 669 // scan name 670 int offset = fCurrentEntity.position; 671 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 672 if (++fCurrentEntity.position == fCurrentEntity.count) { 673 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 674 offset = 0; 675 if (load(1, false, true)) { 676 fCurrentEntity.columnNumber++; 677 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 678 679 if (DEBUG_BUFFER) { 680 System.out.print(")scanName: "); 681 print(); 682 System.out.println(" -> "+String.valueOf(symbol)); 683 } 684 return symbol; 685 } 686 } 687 boolean vc =false; 688 while (true ){ 689 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 690 char c = fCurrentEntity.ch[fCurrentEntity.position]; 691 if(c < 127){ 692 vc = VALID_NAMES[c]; 693 }else{ 694 vc = XMLChar.isName(c); 695 } 696 if(!vc)break; 697 if (++fCurrentEntity.position == fCurrentEntity.count) { 698 int length = fCurrentEntity.position - offset; 699 invokeListeners(length); 700 if (length == fCurrentEntity.fBufferSize) { 701 // bad luck we have to resize our buffer 702 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 703 System.arraycopy(fCurrentEntity.ch, offset, 704 tmp, 0, length); 705 fCurrentEntity.ch = tmp; 706 fCurrentEntity.fBufferSize *= 2; 707 } else { 708 System.arraycopy(fCurrentEntity.ch, offset, 709 fCurrentEntity.ch, 0, length); 710 } 711 offset = 0; 712 if (load(length, false, false)) { 713 break; 714 } 715 } 716 } 717 } 718 int length = fCurrentEntity.position - offset; 719 fCurrentEntity.columnNumber += length; 720 721 // return name 722 String symbol; 723 if (length > 0) { 724 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 725 } else 726 symbol = null; 727 if (DEBUG_BUFFER) { 728 System.out.print(")scanName: "); 729 print(); 730 System.out.println(" -> "+String.valueOf(symbol)); 731 } 732 return symbol; 733 734 } // scanName():String 735 736 /** 737 * Scans a qualified name from the input, setting the fields of the 738 * QName structure appropriately. 739 * <p> 740 * <strong>Note:</strong> The qualified name characters are consumed. 741 * <p> 742 * <strong>Note:</strong> The strings used to set the values of the 743 * QName structure must be symbols. The SymbolTable can be used for 744 * this purpose. 745 * 746 * @param qname The qualified name structure to fill. 747 * 748 * @return Returns true if a qualified name appeared immediately on 749 * the input and was scanned, false otherwise. 750 * 751 * @throws IOException Thrown if i/o error occurs. 752 * @throws EOFException Thrown on end of file. 753 * 754 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 755 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 756 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 757 */ 758 public boolean scanQName(QName qname) throws IOException { 759 if (DEBUG_BUFFER) { 760 System.out.print("(scanQName, "+qname+": "); 761 print(); 762 System.out.println(); 763 } 764 765 // load more characters, if needed 766 if (fCurrentEntity.position == fCurrentEntity.count) { 767 load(0, true, true); 768 } 769 770 // scan qualified name 771 int offset = fCurrentEntity.position; 772 773 //making a check if if the specified character is a valid name start character 774 //as defined by production [5] in the XML 1.0 specification. 775 // Name ::= (Letter | '_' | ':') (NameChar)* 776 777 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 778 if (++fCurrentEntity.position == fCurrentEntity.count) { 779 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 780 offset = 0; 781 782 if (load(1, false, true)) { 783 fCurrentEntity.columnNumber++; 784 //adding into symbol table. 785 //XXX We are trying to add single character in SymbolTable?????? 786 String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 787 qname.setValues(null, name, name, null); 788 if (DEBUG_BUFFER) { 789 System.out.print(")scanQName, "+qname+": "); 790 print(); 791 System.out.println(" -> true"); 792 } 793 return true; 794 } 795 } 796 int index = -1; 797 boolean vc = false; 798 while ( true){ 799 800 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 801 char c = fCurrentEntity.ch[fCurrentEntity.position]; 802 if(c < 127){ 803 vc = VALID_NAMES[c]; 804 }else{ 805 vc = XMLChar.isName(c); 806 } 807 if(!vc)break; 808 if (c == ':') { 809 if (index != -1) { 810 break; 811 } 812 index = fCurrentEntity.position; 813 } 814 if (++fCurrentEntity.position == fCurrentEntity.count) { 815 int length = fCurrentEntity.position - offset; 816 invokeListeners(length); 817 if (length == fCurrentEntity.fBufferSize) { 818 // bad luck we have to resize our buffer 819 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 820 System.arraycopy(fCurrentEntity.ch, offset, 821 tmp, 0, length); 822 fCurrentEntity.ch = tmp; 823 fCurrentEntity.fBufferSize *= 2; 824 } else { 825 System.arraycopy(fCurrentEntity.ch, offset, 826 fCurrentEntity.ch, 0, length); 827 } 828 if (index != -1) { 829 index = index - offset; 830 } 831 offset = 0; 832 if (load(length, false, false)) { 833 break; 834 } 835 } 836 } 837 int length = fCurrentEntity.position - offset; 838 fCurrentEntity.columnNumber += length; 839 if (length > 0) { 840 String prefix = null; 841 String localpart = null; 842 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, 843 offset, length); 844 845 if (index != -1) { 846 int prefixLength = index - offset; 847 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, 848 offset, prefixLength); 849 int len = length - prefixLength - 1; 850 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, 851 index + 1, len); 852 853 } else { 854 localpart = rawname; 855 } 856 qname.setValues(prefix, localpart, rawname, null); 857 if (DEBUG_BUFFER) { 858 System.out.print(")scanQName, "+qname+": "); 859 print(); 860 System.out.println(" -> true"); 861 } 862 return true; 863 } 864 } 865 866 // no qualified name found 867 if (DEBUG_BUFFER) { 868 System.out.print(")scanQName, "+qname+": "); 869 print(); 870 System.out.println(" -> false"); 871 } 872 return false; 873 874 } // scanQName(QName):boolean 875 876 /** 877 * CHANGED: 878 * Scans a range of parsed character data, This function appends the character data to 879 * the supplied buffer. 880 * <p> 881 * <strong>Note:</strong> The characters are consumed. 882 * <p> 883 * <strong>Note:</strong> This method does not guarantee to return 884 * the longest run of parsed character data. This method may return 885 * before markup due to reaching the end of the input buffer or any 886 * other reason. 887 * <p> 888 * 889 * @param content The content structure to fill. 890 * 891 * @return Returns the next character on the input, if known. This 892 * value may be -1 but this does <em>note</em> designate 893 * end of file. 894 * 895 * @throws IOException Thrown if i/o error occurs. 896 * @throws EOFException Thrown on end of file. 897 */ 898 public int scanContent(XMLString content) throws IOException { 899 if (DEBUG_BUFFER) { 900 System.out.print("(scanContent: "); 901 print(); 902 System.out.println(); 903 } 904 905 // load more characters, if needed 906 if (fCurrentEntity.position == fCurrentEntity.count) { 907 load(0, true, true); 908 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 909 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 910 load(1, false, true); 911 fCurrentEntity.position = 0; 912 } 913 914 // normalize newlines 915 int offset = fCurrentEntity.position; 916 int c = fCurrentEntity.ch[offset]; 917 int newlines = 0; 918 if (c == '\n' || (c == '\r' && isExternal)) { 919 if (DEBUG_BUFFER) { 920 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 921 print(); 922 System.out.println(); 923 } 924 do { 925 c = fCurrentEntity.ch[fCurrentEntity.position++]; 926 if (c == '\r' && isExternal) { 927 newlines++; 928 fCurrentEntity.lineNumber++; 929 fCurrentEntity.columnNumber = 1; 930 if (fCurrentEntity.position == fCurrentEntity.count) { 931 offset = 0; 932 fCurrentEntity.position = newlines; 933 if (load(newlines, false, true)) { 934 break; 935 } 936 } 937 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 938 fCurrentEntity.position++; 939 offset++; 940 } 941 /*** NEWLINE NORMALIZATION ***/ 942 else { 943 newlines++; 944 } 945 } else if (c == '\n') { 946 newlines++; 947 fCurrentEntity.lineNumber++; 948 fCurrentEntity.columnNumber = 1; 949 if (fCurrentEntity.position == fCurrentEntity.count) { 950 offset = 0; 951 fCurrentEntity.position = newlines; 952 if (load(newlines, false, true)) { 953 break; 954 } 955 } 956 } else { 957 fCurrentEntity.position--; 958 break; 959 } 960 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 961 for (int i = offset; i < fCurrentEntity.position; i++) { 962 fCurrentEntity.ch[i] = '\n'; 963 } 964 int length = fCurrentEntity.position - offset; 965 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 966 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 967 //on buffering the data.. 968 content.setValues(fCurrentEntity.ch, offset, length); 969 //content.append(fCurrentEntity.ch, offset, length); 970 if (DEBUG_BUFFER) { 971 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 972 print(); 973 System.out.println(); 974 } 975 return -1; 976 } 977 if (DEBUG_BUFFER) { 978 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 979 print(); 980 System.out.println(); 981 } 982 } 983 984 while (fCurrentEntity.position < fCurrentEntity.count) { 985 c = fCurrentEntity.ch[fCurrentEntity.position++]; 986 if (!XMLChar.isContent(c)) { 987 fCurrentEntity.position--; 988 break; 989 } 990 } 991 int length = fCurrentEntity.position - offset; 992 fCurrentEntity.columnNumber += length - newlines; 993 994 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 995 //on buffering the data.. 996 content.setValues(fCurrentEntity.ch, offset, length); 997 //content.append(fCurrentEntity.ch, offset, length); 998 // return next character 999 if (fCurrentEntity.position != fCurrentEntity.count) { 1000 c = fCurrentEntity.ch[fCurrentEntity.position]; 1001 // REVISIT: Does this need to be updated to fix the 1002 // #x0D ^#x0A newline normalization problem? -Ac 1003 if (c == '\r' && isExternal) { 1004 c = '\n'; 1005 } 1006 } else { 1007 c = -1; 1008 } 1009 if (DEBUG_BUFFER) { 1010 System.out.print(")scanContent: "); 1011 print(); 1012 System.out.println(" -> '"+(char)c+"'"); 1013 } 1014 return c; 1015 1016 } // scanContent(XMLString):int 1017 1018 /** 1019 * Scans a range of attribute value data, setting the fields of the 1020 * XMLString structure, appropriately. 1021 * <p> 1022 * <strong>Note:</strong> The characters are consumed. 1023 * <p> 1024 * <strong>Note:</strong> This method does not guarantee to return 1025 * the longest run of attribute value data. This method may return 1026 * before the quote character due to reaching the end of the input 1027 * buffer or any other reason. 1028 * <p> 1029 * <strong>Note:</strong> The fields contained in the XMLString 1030 * structure are not guaranteed to remain valid upon subsequent calls 1031 * to the entity scanner. Therefore, the caller is responsible for 1032 * immediately using the returned character data or making a copy of 1033 * the character data. 1034 * 1035 * @param quote The quote character that signifies the end of the 1036 * attribute value data. 1037 * @param content The content structure to fill. 1038 * 1039 * @return Returns the next character on the input, if known. This 1040 * value may be -1 but this does <em>note</em> designate 1041 * end of file. 1042 * 1043 * @throws IOException Thrown if i/o error occurs. 1044 * @throws EOFException Thrown on end of file. 1045 */ 1046 public int scanLiteral(int quote, XMLString content) 1047 throws IOException { 1048 if (DEBUG_BUFFER) { 1049 System.out.print("(scanLiteral, '"+(char)quote+"': "); 1050 print(); 1051 System.out.println(); 1052 } 1053 // load more characters, if needed 1054 if (fCurrentEntity.position == fCurrentEntity.count) { 1055 load(0, true, true); 1056 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1057 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 1058 load(1, false, true); 1059 fCurrentEntity.position = 0; 1060 } 1061 1062 // normalize newlines 1063 int offset = fCurrentEntity.position; 1064 int c = fCurrentEntity.ch[offset]; 1065 int newlines = 0; 1066 if(whiteSpaceInfoNeeded) 1067 whiteSpaceLen=0; 1068 if (c == '\n' || (c == '\r' && isExternal)) { 1069 if (DEBUG_BUFFER) { 1070 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1071 print(); 1072 System.out.println(); 1073 } 1074 do { 1075 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1076 if (c == '\r' && isExternal) { 1077 newlines++; 1078 fCurrentEntity.lineNumber++; 1079 fCurrentEntity.columnNumber = 1; 1080 if (fCurrentEntity.position == fCurrentEntity.count) { 1081 offset = 0; 1082 fCurrentEntity.position = newlines; 1083 if (load(newlines, false, true)) { 1084 break; 1085 } 1086 } 1087 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1088 fCurrentEntity.position++; 1089 offset++; 1090 } 1091 /*** NEWLINE NORMALIZATION ***/ 1092 else { 1093 newlines++; 1094 } 1095 /***/ 1096 } else if (c == '\n') { 1097 newlines++; 1098 fCurrentEntity.lineNumber++; 1099 fCurrentEntity.columnNumber = 1; 1100 if (fCurrentEntity.position == fCurrentEntity.count) { 1101 offset = 0; 1102 fCurrentEntity.position = newlines; 1103 if (load(newlines, false, true)) { 1104 break; 1105 } 1106 } 1107 /*** NEWLINE NORMALIZATION *** 1108 * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' 1109 * && external) { 1110 * fCurrentEntity.position++; 1111 * offset++; 1112 * } 1113 * /***/ 1114 } else { 1115 fCurrentEntity.position--; 1116 break; 1117 } 1118 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1119 int i=0; 1120 for ( i = offset; i < fCurrentEntity.position; i++) { 1121 fCurrentEntity.ch[i] = '\n'; 1122 storeWhiteSpace(i); 1123 } 1124 1125 int length = fCurrentEntity.position - offset; 1126 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1127 content.setValues(fCurrentEntity.ch, offset, length); 1128 if (DEBUG_BUFFER) { 1129 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1130 print(); 1131 System.out.println(); 1132 } 1133 return -1; 1134 } 1135 if (DEBUG_BUFFER) { 1136 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1137 print(); 1138 System.out.println(); 1139 } 1140 } 1141 1142 // scan literal value 1143 for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) { 1144 c = fCurrentEntity.ch[fCurrentEntity.position]; 1145 if ((c == quote && 1146 (!fCurrentEntity.literal || isExternal)) || 1147 c == '%' || !XMLChar.isContent(c)) { 1148 break; 1149 } 1150 if (whiteSpaceInfoNeeded && c == '\t') { 1151 storeWhiteSpace(fCurrentEntity.position); 1152 } 1153 } 1154 int length = fCurrentEntity.position - offset; 1155 fCurrentEntity.columnNumber += length - newlines; 1156 content.setValues(fCurrentEntity.ch, offset, length); 1157 1158 // return next character 1159 if (fCurrentEntity.position != fCurrentEntity.count) { 1160 c = fCurrentEntity.ch[fCurrentEntity.position]; 1161 // NOTE: We don't want to accidentally signal the 1162 // end of the literal if we're expanding an 1163 // entity appearing in the literal. -Ac 1164 if (c == quote && fCurrentEntity.literal) { 1165 c = -1; 1166 } 1167 } else { 1168 c = -1; 1169 } 1170 if (DEBUG_BUFFER) { 1171 System.out.print(")scanLiteral, '"+(char)quote+"': "); 1172 print(); 1173 System.out.println(" -> '"+(char)c+"'"); 1174 } 1175 return c; 1176 1177 } // scanLiteral(int,XMLString):int 1178 1179 /** 1180 * Save whitespace information. Increase the whitespace buffer by 100 1181 * when needed. 1182 * 1183 * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). 1184 * 1185 * @param whiteSpacePos position of a whitespace in the scanner entity buffer 1186 */ 1187 private void storeWhiteSpace(int whiteSpacePos) { 1188 if (whiteSpaceLen >= whiteSpaceLookup.length) { 1189 int [] tmp = new int[whiteSpaceLookup.length + 100]; 1190 System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); 1191 whiteSpaceLookup = tmp; 1192 } 1193 1194 whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; 1195 } 1196 1197 //CHANGED: 1198 /** 1199 * Scans a range of character data up to the specified delimiter, 1200 * setting the fields of the XMLString structure, appropriately. 1201 * <p> 1202 * <strong>Note:</strong> The characters are consumed. 1203 * <p> 1204 * <strong>Note:</strong> This assumes that the length of the delimiter 1205 * and that the delimiter contains at least one character. 1206 * <p> 1207 * <strong>Note:</strong> This method does not guarantee to return 1208 * the longest run of character data. This method may return before 1209 * the delimiter due to reaching the end of the input buffer or any 1210 * other reason. 1211 * <p> 1212 * @param delimiter The string that signifies the end of the character 1213 * data to be scanned. 1214 * @param buffer The XMLStringBuffer to fill. 1215 * 1216 * @return Returns true if there is more data to scan, false otherwise. 1217 * 1218 * @throws IOException Thrown if i/o error occurs. 1219 * @throws EOFException Thrown on end of file. 1220 */ 1221 public boolean scanData(String delimiter, XMLStringBuffer buffer) 1222 throws IOException { 1223 1224 boolean done = false; 1225 int delimLen = delimiter.length(); 1226 char charAt0 = delimiter.charAt(0); 1227 do { 1228 if (DEBUG_BUFFER) { 1229 System.out.print("(scanData: "); 1230 print(); 1231 System.out.println(); 1232 } 1233 1234 // load more characters, if needed 1235 1236 if (fCurrentEntity.position == fCurrentEntity.count) { 1237 load(0, true, false); 1238 } 1239 1240 boolean bNextEntity = false; 1241 1242 while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) 1243 && (!bNextEntity)) 1244 { 1245 System.arraycopy(fCurrentEntity.ch, 1246 fCurrentEntity.position, 1247 fCurrentEntity.ch, 1248 0, 1249 fCurrentEntity.count - fCurrentEntity.position); 1250 1251 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false); 1252 fCurrentEntity.position = 0; 1253 fCurrentEntity.startPosition = 0; 1254 } 1255 1256 if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { 1257 // something must be wrong with the input: e.g., file ends in an unterminated comment 1258 int length = fCurrentEntity.count - fCurrentEntity.position; 1259 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 1260 fCurrentEntity.columnNumber += fCurrentEntity.count; 1261 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); 1262 fCurrentEntity.position = fCurrentEntity.count; 1263 fCurrentEntity.startPosition = fCurrentEntity.count; 1264 load(0, true, false); 1265 return false; 1266 } 1267 1268 // normalize newlines 1269 int offset = fCurrentEntity.position; 1270 int c = fCurrentEntity.ch[offset]; 1271 int newlines = 0; 1272 if (c == '\n' || (c == '\r' && isExternal)) { 1273 if (DEBUG_BUFFER) { 1274 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1275 print(); 1276 System.out.println(); 1277 } 1278 do { 1279 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1280 if (c == '\r' && isExternal) { 1281 newlines++; 1282 fCurrentEntity.lineNumber++; 1283 fCurrentEntity.columnNumber = 1; 1284 if (fCurrentEntity.position == fCurrentEntity.count) { 1285 offset = 0; 1286 fCurrentEntity.position = newlines; 1287 if (load(newlines, false, true)) { 1288 break; 1289 } 1290 } 1291 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1292 fCurrentEntity.position++; 1293 offset++; 1294 } 1295 /*** NEWLINE NORMALIZATION ***/ 1296 else { 1297 newlines++; 1298 } 1299 } else if (c == '\n') { 1300 newlines++; 1301 fCurrentEntity.lineNumber++; 1302 fCurrentEntity.columnNumber = 1; 1303 if (fCurrentEntity.position == fCurrentEntity.count) { 1304 offset = 0; 1305 fCurrentEntity.position = newlines; 1306 fCurrentEntity.count = newlines; 1307 if (load(newlines, false, true)) { 1308 break; 1309 } 1310 } 1311 } else { 1312 fCurrentEntity.position--; 1313 break; 1314 } 1315 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1316 for (int i = offset; i < fCurrentEntity.position; i++) { 1317 fCurrentEntity.ch[i] = '\n'; 1318 } 1319 int length = fCurrentEntity.position - offset; 1320 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1321 buffer.append(fCurrentEntity.ch, offset, length); 1322 if (DEBUG_BUFFER) { 1323 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1324 print(); 1325 System.out.println(); 1326 } 1327 return true; 1328 } 1329 if (DEBUG_BUFFER) { 1330 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1331 print(); 1332 System.out.println(); 1333 } 1334 } 1335 1336 // iterate over buffer looking for delimiter 1337 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { 1338 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1339 if (c == charAt0) { 1340 // looks like we just hit the delimiter 1341 int delimOffset = fCurrentEntity.position - 1; 1342 for (int i = 1; i < delimLen; i++) { 1343 if (fCurrentEntity.position == fCurrentEntity.count) { 1344 fCurrentEntity.position -= i; 1345 break OUTER; 1346 } 1347 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1348 if (delimiter.charAt(i) != c) { 1349 fCurrentEntity.position -= i; 1350 break; 1351 } 1352 } 1353 if (fCurrentEntity.position == delimOffset + delimLen) { 1354 done = true; 1355 break; 1356 } 1357 } else if (c == '\n' || (isExternal && c == '\r')) { 1358 fCurrentEntity.position--; 1359 break; 1360 } else if (XMLChar.isInvalid(c)) { 1361 fCurrentEntity.position--; 1362 int length = fCurrentEntity.position - offset; 1363 fCurrentEntity.columnNumber += length - newlines; 1364 buffer.append(fCurrentEntity.ch, offset, length); 1365 return true; 1366 } 1367 } 1368 int length = fCurrentEntity.position - offset; 1369 fCurrentEntity.columnNumber += length - newlines; 1370 if (done) { 1371 length -= delimLen; 1372 } 1373 buffer.append(fCurrentEntity.ch, offset, length); 1374 1375 // return true if string was skipped 1376 if (DEBUG_BUFFER) { 1377 System.out.print(")scanData: "); 1378 print(); 1379 System.out.println(" -> " + done); 1380 } 1381 } while (!done); 1382 return !done; 1383 1384 } // scanData(String,XMLString) 1385 1386 /** 1387 * Skips a character appearing immediately on the input. 1388 * <p> 1389 * <strong>Note:</strong> The character is consumed only if it matches 1390 * the specified character. 1391 * 1392 * @param c The character to skip. 1393 * 1394 * @return Returns true if the character was skipped. 1395 * 1396 * @throws IOException Thrown if i/o error occurs. 1397 * @throws EOFException Thrown on end of file. 1398 */ 1399 public boolean skipChar(int c) throws IOException { 1400 if (DEBUG_BUFFER) { 1401 System.out.print("(skipChar, '"+(char)c+"': "); 1402 print(); 1403 System.out.println(); 1404 } 1405 1406 // load more characters, if needed 1407 if (fCurrentEntity.position == fCurrentEntity.count) { 1408 load(0, true, true); 1409 } 1410 1411 // skip character 1412 int cc = fCurrentEntity.ch[fCurrentEntity.position]; 1413 if (cc == c) { 1414 fCurrentEntity.position++; 1415 if (c == '\n') { 1416 fCurrentEntity.lineNumber++; 1417 fCurrentEntity.columnNumber = 1; 1418 } else { 1419 fCurrentEntity.columnNumber++; 1420 } 1421 if (DEBUG_BUFFER) { 1422 System.out.print(")skipChar, '"+(char)c+"': "); 1423 print(); 1424 System.out.println(" -> true"); 1425 } 1426 return true; 1427 } else if (c == '\n' && cc == '\r' && isExternal) { 1428 // handle newlines 1429 if (fCurrentEntity.position == fCurrentEntity.count) { 1430 fCurrentEntity.ch[0] = (char)cc; 1431 load(1, false, true); 1432 } 1433 fCurrentEntity.position++; 1434 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1435 fCurrentEntity.position++; 1436 } 1437 fCurrentEntity.lineNumber++; 1438 fCurrentEntity.columnNumber = 1; 1439 if (DEBUG_BUFFER) { 1440 System.out.print(")skipChar, '"+(char)c+"': "); 1441 print(); 1442 System.out.println(" -> true"); 1443 } 1444 return true; 1445 } 1446 1447 // character was not skipped 1448 if (DEBUG_BUFFER) { 1449 System.out.print(")skipChar, '"+(char)c+"': "); 1450 print(); 1451 System.out.println(" -> false"); 1452 } 1453 return false; 1454 1455 } // skipChar(int):boolean 1456 1457 public boolean isSpace(char ch){ 1458 return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); 1459 } 1460 /** 1461 * Skips space characters appearing immediately on the input. 1462 * <p> 1463 * <strong>Note:</strong> The characters are consumed only if they are 1464 * space characters. 1465 * 1466 * @return Returns true if at least one space character was skipped. 1467 * 1468 * @throws IOException Thrown if i/o error occurs. 1469 * @throws EOFException Thrown on end of file. 1470 * 1471 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 1472 */ 1473 public boolean skipSpaces() throws IOException { 1474 if (DEBUG_BUFFER) { 1475 System.out.print("(skipSpaces: "); 1476 print(); 1477 System.out.println(); 1478 } 1479 //boolean entityChanged = false; 1480 // load more characters, if needed 1481 if (fCurrentEntity.position == fCurrentEntity.count) { 1482 load(0, true, true); 1483 } 1484 1485 //we are doing this check only in skipSpace() because it is called by 1486 //fMiscDispatcher and we want the parser to exit gracefully when document 1487 //is well-formed. 1488 //it is possible that end of document is reached and 1489 //fCurrentEntity becomes null 1490 //nothing was read so entity changed 'false' should be returned. 1491 if(fCurrentEntity == null){ 1492 return false ; 1493 } 1494 1495 // skip spaces 1496 int c = fCurrentEntity.ch[fCurrentEntity.position]; 1497 if (XMLChar.isSpace(c)) { 1498 do { 1499 boolean entityChanged = false; 1500 // handle newlines 1501 if (c == '\n' || (isExternal && c == '\r')) { 1502 fCurrentEntity.lineNumber++; 1503 fCurrentEntity.columnNumber = 1; 1504 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1505 fCurrentEntity.ch[0] = (char)c; 1506 entityChanged = load(1, true, true); 1507 if (!entityChanged){ 1508 // the load change the position to be 1, 1509 // need to restore it when entity not changed 1510 fCurrentEntity.position = 0; 1511 }else if(fCurrentEntity == null){ 1512 return true ; 1513 } 1514 } 1515 if (c == '\r' && isExternal) { 1516 // REVISIT: Does this need to be updated to fix the 1517 // #x0D ^#x0A newline normalization problem? -Ac 1518 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 1519 fCurrentEntity.position--; 1520 } 1521 } 1522 } else { 1523 fCurrentEntity.columnNumber++; 1524 } 1525 // load more characters, if needed 1526 if (!entityChanged){ 1527 fCurrentEntity.position++; 1528 } 1529 1530 if (fCurrentEntity.position == fCurrentEntity.count) { 1531 load(0, true, true); 1532 1533 //we are doing this check only in skipSpace() because it is called by 1534 //fMiscDispatcher and we want the parser to exit gracefully when document 1535 //is well-formed. 1536 1537 //it is possible that end of document is reached and 1538 //fCurrentEntity becomes null 1539 //nothing was read so entity changed 'false' should be returned. 1540 if(fCurrentEntity == null){ 1541 return true ; 1542 } 1543 1544 } 1545 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 1546 if (DEBUG_BUFFER) { 1547 System.out.print(")skipSpaces: "); 1548 print(); 1549 System.out.println(" -> true"); 1550 } 1551 return true; 1552 } 1553 1554 // no spaces were found 1555 if (DEBUG_BUFFER) { 1556 System.out.print(")skipSpaces: "); 1557 print(); 1558 System.out.println(" -> false"); 1559 } 1560 return false; 1561 1562 } // skipSpaces():boolean 1563 1564 1565 /** 1566 * @param legnth This function checks that following number of characters are available. 1567 * to the underlying buffer. 1568 * @return This function returns true if capacity asked is available. 1569 */ 1570 public boolean arrangeCapacity(int length) throws IOException{ 1571 return arrangeCapacity(length, false); 1572 } 1573 1574 /** 1575 * @param legnth This function checks that following number of characters are available. 1576 * to the underlying buffer. 1577 * @param if the underlying function should change the entity 1578 * @return This function returns true if capacity asked is available. 1579 * 1580 */ 1581 public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ 1582 //check if the capacity is availble in the current buffer 1583 //count is no. of characters in the buffer [x][m][l] 1584 //position is '0' based 1585 //System.out.println("fCurrent Entity " + fCurrentEntity); 1586 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1587 return true; 1588 } 1589 if(DEBUG_SKIP_STRING){ 1590 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1591 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1592 System.out.println("length = " + length); 1593 } 1594 boolean entityChanged = false; 1595 //load more characters -- this function shouldn't change the entity 1596 while((fCurrentEntity.count - fCurrentEntity.position) < length){ 1597 if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ 1598 invokeListeners(0); 1599 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); 1600 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; 1601 fCurrentEntity.position = 0; 1602 } 1603 1604 if((fCurrentEntity.count - fCurrentEntity.position) < length){ 1605 int pos = fCurrentEntity.position; 1606 invokeListeners(pos); 1607 entityChanged = load(fCurrentEntity.count, changeEntity, false); 1608 fCurrentEntity.position = pos; 1609 if(entityChanged)break; 1610 } 1611 if(DEBUG_SKIP_STRING){ 1612 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1613 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1614 System.out.println("length = " + length); 1615 } 1616 } 1617 //load changes the position.. set it back to the point where we started. 1618 1619 //after loading check again. 1620 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1621 return true; 1622 } else { 1623 return false; 1624 } 1625 } 1626 1627 /** 1628 * Skips the specified string appearing immediately on the input. 1629 * <p> 1630 * <strong>Note:</strong> The characters are consumed only if all 1631 * the characters are skipped. 1632 * 1633 * @param s The string to skip. 1634 * 1635 * @return Returns true if the string was skipped. 1636 * 1637 * @throws IOException Thrown if i/o error occurs. 1638 * @throws EOFException Thrown on end of file. 1639 */ 1640 public boolean skipString(String s) throws IOException { 1641 1642 final int length = s.length(); 1643 1644 //first make sure that required capacity is avaible 1645 if(arrangeCapacity(length, false)){ 1646 final int beforeSkip = fCurrentEntity.position ; 1647 int afterSkip = fCurrentEntity.position + length - 1 ; 1648 if(DEBUG_SKIP_STRING){ 1649 System.out.println("skipString,length = " + s + "," + length); 1650 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); 1651 } 1652 1653 //s.charAt() indexes are 0 to 'Length -1' based. 1654 int i = length - 1 ; 1655 //check from reverse 1656 while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ 1657 if(afterSkip-- == beforeSkip){ 1658 fCurrentEntity.position = fCurrentEntity.position + length ; 1659 fCurrentEntity.columnNumber += length; 1660 return true; 1661 } 1662 } 1663 } 1664 1665 return false; 1666 } // skipString(String):boolean 1667 1668 public boolean skipString(char [] s) throws IOException { 1669 1670 final int length = s.length; 1671 //first make sure that required capacity is avaible 1672 if(arrangeCapacity(length, false)){ 1673 int beforeSkip = fCurrentEntity.position ; 1674 int afterSkip = fCurrentEntity.position + length ; 1675 1676 if(DEBUG_SKIP_STRING){ 1677 System.out.println("skipString,length = " + new String(s) + "," + length); 1678 System.out.println("skipString,length = " + new String(s) + "," + length); 1679 } 1680 1681 for(int i=0;i<length;i++){ 1682 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ 1683 return false; 1684 } 1685 } 1686 fCurrentEntity.position = fCurrentEntity.position + length ; 1687 fCurrentEntity.columnNumber += length; 1688 return true; 1689 1690 } 1691 1692 return false; 1693 } 1694 1695 // 1696 // Locator methods 1697 // 1698 // 1699 // Private methods 1700 // 1701 1702 /** 1703 * Loads a chunk of text. 1704 * 1705 * @param offset The offset into the character buffer to 1706 * read the next batch of characters. 1707 * @param changeEntity True if the load should change entities 1708 * at the end of the entity, otherwise leave 1709 * the current entity in place and the entity 1710 * boundary will be signaled by the return 1711 * value. 1712 * @param notify Determine whether to notify listeners of 1713 * the event 1714 * 1715 * @returns Returns true if the entity changed as a result of this 1716 * load operation. 1717 */ 1718 final boolean load(int offset, boolean changeEntity, boolean notify) 1719 throws IOException { 1720 if (DEBUG_BUFFER) { 1721 System.out.print("(load, "+offset+": "); 1722 print(); 1723 System.out.println(); 1724 } 1725 if (notify) { 1726 invokeListeners(offset); 1727 } 1728 //maintaing the count till last load 1729 fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; 1730 // read characters 1731 int length = fCurrentEntity.ch.length - offset; 1732 if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { 1733 length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; 1734 } 1735 if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); 1736 int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); 1737 if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); 1738 1739 // reset count and position 1740 boolean entityChanged = false; 1741 if (count != -1) { 1742 if (count != 0) { 1743 // record the last count 1744 fCurrentEntity.fLastCount = count; 1745 fCurrentEntity.count = count + offset; 1746 fCurrentEntity.position = offset; 1747 } 1748 } 1749 // end of this entity 1750 else { 1751 fCurrentEntity.count = offset; 1752 fCurrentEntity.position = offset; 1753 entityChanged = true; 1754 1755 if (changeEntity) { 1756 //notify the entity manager about the end of entity 1757 fEntityManager.endEntity(); 1758 //return if the current entity becomes null 1759 if(fCurrentEntity == null){ 1760 throw END_OF_DOCUMENT_ENTITY; 1761 } 1762 // handle the trailing edges 1763 if (fCurrentEntity.position == fCurrentEntity.count) { 1764 load(0, true, false); 1765 } 1766 } 1767 1768 } 1769 if (DEBUG_BUFFER) { 1770 System.out.print(")load, "+offset+": "); 1771 print(); 1772 System.out.println(); 1773 } 1774 1775 return entityChanged; 1776 1777 } // load(int, boolean):boolean 1778 1779 /** 1780 * Creates a reader capable of reading the given input stream in 1781 * the specified encoding. 1782 * 1783 * @param inputStream The input stream. 1784 * @param encoding The encoding name that the input stream is 1785 * encoded using. If the user has specified that 1786 * Java encoding names are allowed, then the 1787 * encoding name may be a Java encoding name; 1788 * otherwise, it is an ianaEncoding name. 1789 * @param isBigEndian For encodings (like uCS-4), whose names cannot 1790 * specify a byte order, this tells whether the order is bigEndian. null menas 1791 * unknown or not relevant. 1792 * 1793 * @return Returns a reader. 1794 */ 1795 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 1796 throws IOException { 1797 1798 // normalize encoding name 1799 if (encoding == null) { 1800 encoding = "UTF-8"; 1801 } 1802 1803 // try to use an optimized reader 1804 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 1805 if (ENCODING.equals("UTF-8")) { 1806 if (DEBUG_ENCODINGS) { 1807 System.out.println("$$$ creating UTF8Reader"); 1808 } 1809 return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 1810 } 1811 if (ENCODING.equals("US-ASCII")) { 1812 if (DEBUG_ENCODINGS) { 1813 System.out.println("$$$ creating ASCIIReader"); 1814 } 1815 return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1816 } 1817 if(ENCODING.equals("ISO-10646-UCS-4")) { 1818 if(isBigEndian != null) { 1819 boolean isBE = isBigEndian.booleanValue(); 1820 if(isBE) { 1821 return new UCSReader(inputStream, UCSReader.UCS4BE); 1822 } else { 1823 return new UCSReader(inputStream, UCSReader.UCS4LE); 1824 } 1825 } else { 1826 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1827 "EncodingByteOrderUnsupported", 1828 new Object[] { encoding }, 1829 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1830 } 1831 } 1832 if(ENCODING.equals("ISO-10646-UCS-2")) { 1833 if(isBigEndian != null) { // sould never happen with this encoding... 1834 boolean isBE = isBigEndian.booleanValue(); 1835 if(isBE) { 1836 return new UCSReader(inputStream, UCSReader.UCS2BE); 1837 } else { 1838 return new UCSReader(inputStream, UCSReader.UCS2LE); 1839 } 1840 } else { 1841 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1842 "EncodingByteOrderUnsupported", 1843 new Object[] { encoding }, 1844 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1845 } 1846 } 1847 1848 // check for valid name 1849 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 1850 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 1851 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 1852 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1853 "EncodingDeclInvalid", 1854 new Object[] { encoding }, 1855 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1856 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 1857 // because every byte is a valid ISO Latin 1 character. 1858 // It may not translate correctly but if we failed on 1859 // the encoding anyway, then we're expecting the content 1860 // of the document to be bad. This will just prevent an 1861 // invalid UTF-8 sequence to be detected. This is only 1862 // important when continue-after-fatal-error is turned 1863 // on. -Ac 1864 encoding = "ISO-8859-1"; 1865 } 1866 1867 // try to use a Java reader 1868 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 1869 if (javaEncoding == null) { 1870 if(fAllowJavaEncodings) { 1871 javaEncoding = encoding; 1872 } else { 1873 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1874 "EncodingDeclInvalid", 1875 new Object[] { encoding }, 1876 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1877 // see comment above. 1878 javaEncoding = "ISO8859_1"; 1879 } 1880 } 1881 else if (javaEncoding.equals("ASCII")) { 1882 if (DEBUG_ENCODINGS) { 1883 System.out.println("$$$ creating ASCIIReader"); 1884 } 1885 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1886 } 1887 1888 if (DEBUG_ENCODINGS) { 1889 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 1890 if (javaEncoding == encoding) { 1891 System.out.print(" (IANA encoding)"); 1892 } 1893 System.out.println(); 1894 } 1895 return new InputStreamReader(inputStream, javaEncoding); 1896 1897 } // createReader(InputStream,String, Boolean): Reader 1898 1899 /** 1900 * Returns the IANA encoding name that is auto-detected from 1901 * the bytes specified, with the endian-ness of that encoding where appropriate. 1902 * 1903 * @param b4 The first four bytes of the input. 1904 * @param count The number of bytes actually read. 1905 * @return a 2-element array: the first element, an IANA-encoding string, 1906 * the second element a Boolean which is true iff the document is big endian, false 1907 * if it's little-endian, and null if the distinction isn't relevant. 1908 */ 1909 protected Object[] getEncodingName(byte[] b4, int count) { 1910 1911 if (count < 2) { 1912 return new Object[]{"UTF-8", null}; 1913 } 1914 1915 // UTF-16, with BOM 1916 int b0 = b4[0] & 0xFF; 1917 int b1 = b4[1] & 0xFF; 1918 if (b0 == 0xFE && b1 == 0xFF) { 1919 // UTF-16, big-endian 1920 return new Object [] {"UTF-16BE", new Boolean(true)}; 1921 } 1922 if (b0 == 0xFF && b1 == 0xFE) { 1923 // UTF-16, little-endian 1924 return new Object [] {"UTF-16LE", new Boolean(false)}; 1925 } 1926 1927 // default to UTF-8 if we don't have enough bytes to make a 1928 // good determination of the encoding 1929 if (count < 3) { 1930 return new Object [] {"UTF-8", null}; 1931 } 1932 1933 // UTF-8 with a BOM 1934 int b2 = b4[2] & 0xFF; 1935 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 1936 return new Object [] {"UTF-8", null}; 1937 } 1938 1939 // default to UTF-8 if we don't have enough bytes to make a 1940 // good determination of the encoding 1941 if (count < 4) { 1942 return new Object [] {"UTF-8", null}; 1943 } 1944 1945 // other encodings 1946 int b3 = b4[3] & 0xFF; 1947 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 1948 // UCS-4, big endian (1234) 1949 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 1950 } 1951 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 1952 // UCS-4, little endian (4321) 1953 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 1954 } 1955 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 1956 // UCS-4, unusual octet order (2143) 1957 // REVISIT: What should this be? 1958 return new Object [] {"ISO-10646-UCS-4", null}; 1959 } 1960 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 1961 // UCS-4, unusual octect order (3412) 1962 // REVISIT: What should this be? 1963 return new Object [] {"ISO-10646-UCS-4", null}; 1964 } 1965 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 1966 // UTF-16, big-endian, no BOM 1967 // (or could turn out to be UCS-2... 1968 // REVISIT: What should this be? 1969 return new Object [] {"UTF-16BE", new Boolean(true)}; 1970 } 1971 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 1972 // UTF-16, little-endian, no BOM 1973 // (or could turn out to be UCS-2... 1974 return new Object [] {"UTF-16LE", new Boolean(false)}; 1975 } 1976 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 1977 // EBCDIC 1978 // a la xerces1, return CP037 instead of EBCDIC here 1979 return new Object [] {"CP037", null}; 1980 } 1981 1982 // default encoding 1983 return new Object [] {"UTF-8", null}; 1984 1985 } // getEncodingName(byte[],int):Object[] 1986 1987 /** 1988 * xxx not removing endEntity() so that i remember that we need to implement it. 1989 * Ends an entity. 1990 * 1991 * @throws XNIException Thrown by entity handler to signal an error. 1992 */ 1993 // 1994 /** Prints the contents of the buffer. */ 1995 final void print() { 1996 if (DEBUG_BUFFER) { 1997 if (fCurrentEntity != null) { 1998 System.out.print('['); 1999 System.out.print(fCurrentEntity.count); 2000 System.out.print(' '); 2001 System.out.print(fCurrentEntity.position); 2002 if (fCurrentEntity.count > 0) { 2003 System.out.print(" \""); 2004 for (int i = 0; i < fCurrentEntity.count; i++) { 2005 if (i == fCurrentEntity.position) { 2006 System.out.print('^'); 2007 } 2008 char c = fCurrentEntity.ch[i]; 2009 switch (c) { 2010 case '\n': { 2011 System.out.print("\\n"); 2012 break; 2013 } 2014 case '\r': { 2015 System.out.print("\\r"); 2016 break; 2017 } 2018 case '\t': { 2019 System.out.print("\\t"); 2020 break; 2021 } 2022 case '\\': { 2023 System.out.print("\\\\"); 2024 break; 2025 } 2026 default: { 2027 System.out.print(c); 2028 } 2029 } 2030 } 2031 if (fCurrentEntity.position == fCurrentEntity.count) { 2032 System.out.print('^'); 2033 } 2034 System.out.print('"'); 2035 } 2036 System.out.print(']'); 2037 System.out.print(" @ "); 2038 System.out.print(fCurrentEntity.lineNumber); 2039 System.out.print(','); 2040 System.out.print(fCurrentEntity.columnNumber); 2041 } else { 2042 System.out.print("*NO CURRENT ENTITY*"); 2043 } 2044 } 2045 } 2046 2047 /** 2048 * Registers the listener object and provides callback. 2049 * @param listener listener to which call back should be provided when scanner buffer 2050 * is being changed. 2051 */ 2052 public void registerListener(XMLBufferListener listener) { 2053 if(!listeners.contains(listener)) 2054 listeners.add(listener); 2055 } 2056 2057 /** 2058 * 2059 * @param loadPos Starting position from which new data is being loaded into scanner buffer. 2060 */ 2061 public void invokeListeners(int loadPos){ 2062 for(int i=0;i<listeners.size();i++){ 2063 XMLBufferListener listener =(XMLBufferListener) listeners.get(i); 2064 listener.refresh(loadPos); 2065 } 2066 } 2067 2068 /** 2069 * Skips space characters appearing immediately on the input that would 2070 * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 2071 * normalization is performed. This is useful when scanning structures 2072 * such as the XMLDecl and TextDecl that can only contain US-ASCII 2073 * characters. 2074 * <p> 2075 * <strong>Note:</strong> The characters are consumed only if they would 2076 * match non-terminal S before end of line normalization is performed. 2077 * 2078 * @return Returns true if at least one space character was skipped. 2079 * 2080 * @throws IOException Thrown if i/o error occurs. 2081 * @throws EOFException Thrown on end of file. 2082 * 2083 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 2084 */ 2085 public final boolean skipDeclSpaces() throws IOException { 2086 if (DEBUG_BUFFER) { 2087 System.out.print("(skipDeclSpaces: "); 2088 //XMLEntityManager.print(fCurrentEntity); 2089 System.out.println(); 2090 } 2091 2092 // load more characters, if needed 2093 if (fCurrentEntity.position == fCurrentEntity.count) { 2094 load(0, true, false); 2095 } 2096 2097 // skip spaces 2098 int c = fCurrentEntity.ch[fCurrentEntity.position]; 2099 if (XMLChar.isSpace(c)) { 2100 boolean external = fCurrentEntity.isExternal(); 2101 do { 2102 boolean entityChanged = false; 2103 // handle newlines 2104 if (c == '\n' || (external && c == '\r')) { 2105 fCurrentEntity.lineNumber++; 2106 fCurrentEntity.columnNumber = 1; 2107 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 2108 fCurrentEntity.ch[0] = (char)c; 2109 entityChanged = load(1, true, false); 2110 if (!entityChanged) 2111 // the load change the position to be 1, 2112 // need to restore it when entity not changed 2113 fCurrentEntity.position = 0; 2114 } 2115 if (c == '\r' && external) { 2116 // REVISIT: Does this need to be updated to fix the 2117 // #x0D ^#x0A newline normalization problem? -Ac 2118 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 2119 fCurrentEntity.position--; 2120 } 2121 } 2122 /*** NEWLINE NORMALIZATION *** 2123 * else { 2124 * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' 2125 * && external) { 2126 * fCurrentEntity.position++; 2127 * } 2128 * } 2129 * /***/ 2130 } else { 2131 fCurrentEntity.columnNumber++; 2132 } 2133 // load more characters, if needed 2134 if (!entityChanged) 2135 fCurrentEntity.position++; 2136 if (fCurrentEntity.position == fCurrentEntity.count) { 2137 load(0, true, false); 2138 } 2139 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 2140 if (DEBUG_BUFFER) { 2141 System.out.print(")skipDeclSpaces: "); 2142 // XMLEntityManager.print(fCurrentEntity); 2143 System.out.println(" -> true"); 2144 } 2145 return true; 2146 } 2147 2148 // no spaces were found 2149 if (DEBUG_BUFFER) { 2150 System.out.print(")skipDeclSpaces: "); 2151 //XMLEntityManager.print(fCurrentEntity); 2152 System.out.println(" -> false"); 2153 } 2154 return false; 2155 2156 } // skipDeclSpaces():boolean 2157 2158 2159 } // class XMLEntityScanner