1 /* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 25 import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 26 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 27 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 28 import com.sun.org.apache.xerces.internal.util.EncodingMap; 29 import com.sun.org.apache.xerces.internal.util.SymbolTable; 30 import com.sun.org.apache.xerces.internal.util.XMLChar; 31 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 32 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 35 import com.sun.org.apache.xerces.internal.xni.*; 36 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 37 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 38 import com.sun.xml.internal.stream.Entity; 39 import com.sun.xml.internal.stream.Entity.ScannedEntity; 40 import com.sun.xml.internal.stream.XMLBufferListener; 41 import java.io.EOFException; 42 import java.io.IOException; 43 import java.io.InputStream; 44 import java.io.InputStreamReader; 45 import java.io.Reader; 46 import java.util.ArrayList; 47 import java.util.Locale; 48 49 /** 50 * Implements the entity scanner methods. 51 * 52 * @author Neeraj Bajaj, Sun Microsystems 53 * @author Andy Clark, IBM 54 * @author Arnaud Le Hors, IBM 55 * @author K.Venugopal Sun Microsystems 56 * 57 */ 58 public class XMLEntityScanner implements XMLLocator { 59 60 protected Entity.ScannedEntity fCurrentEntity = null; 61 protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; 62 63 protected XMLEntityManager fEntityManager; 64 65 /** Security manager. */ 66 protected XMLSecurityManager fSecurityManager = null; 67 68 /** Limit analyzer. */ 69 protected XMLLimitAnalyzer fLimitAnalyzer = null; 70 71 /** Debug switching readers for encodings. */ 72 private static final boolean DEBUG_ENCODINGS = false; 73 74 /** Listeners which should know when load is being called */ 75 private ArrayList<XMLBufferListener> listeners = new ArrayList<>(); 76 77 private static final boolean [] VALID_NAMES = new boolean[127]; 78 79 /** 80 * Debug printing of buffer. This debugging flag works best when you 81 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 82 * 64 characters. 83 */ 84 private static final boolean DEBUG_BUFFER = false; 85 private static final boolean DEBUG_SKIP_STRING = false; 86 /** 87 * To signal the end of the document entity, this exception will be thrown. 88 */ 89 private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { 90 private static final long serialVersionUID = 980337771224675268L; 91 public Throwable fillInStackTrace() { 92 return this; 93 } 94 }; 95 96 protected SymbolTable fSymbolTable = null; 97 protected XMLErrorReporter fErrorReporter = null; 98 int [] whiteSpaceLookup = new int[100]; 99 int whiteSpaceLen = 0; 100 boolean whiteSpaceInfoNeeded = true; 101 102 /** 103 * Allow Java encoding names. This feature identifier is: 104 * http://apache.org/xml/features/allow-java-encodings 105 */ 106 protected boolean fAllowJavaEncodings; 107 108 //Will be used only during internal subsets. 109 //for appending data. 110 111 /** Property identifier: symbol table. */ 112 protected static final String SYMBOL_TABLE = 113 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 114 115 /** Property identifier: error reporter. */ 116 protected static final String ERROR_REPORTER = 117 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 118 119 /** Feature identifier: allow Java encodings. */ 120 protected static final String ALLOW_JAVA_ENCODINGS = 121 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 122 123 protected PropertyManager fPropertyManager = null ; 124 125 boolean isExternal = false; 126 static { 127 128 for(int i=0x0041;i<=0x005A ; i++){ 129 VALID_NAMES[i]=true; 130 } 131 for(int i=0x0061;i<=0x007A; i++){ 132 VALID_NAMES[i]=true; 133 } 134 for(int i=0x0030;i<=0x0039; i++){ 135 VALID_NAMES[i]=true; 136 } 137 VALID_NAMES[45]=true; 138 VALID_NAMES[46]=true; 139 VALID_NAMES[58]=true; 140 VALID_NAMES[95]=true; 141 } 142 143 // Remember, that the XML version has explicitly been set, 144 // so that XMLStreamReader.getVersion() can find that out. 145 protected boolean xmlVersionSetExplicitly = false; 146 147 // 148 // Constructors 149 // 150 151 /** Default constructor. */ 152 public XMLEntityScanner() { 153 } // <init>() 154 155 156 /** private constructor, this class can only be instantiated within this class. Instance of this class should 157 * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) 158 * @see getEntityScanner() 159 * @see getEntityScanner(ScannedEntity) 160 */ 161 public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { 162 fEntityManager = entityManager ; 163 reset(propertyManager); 164 } // <init>() 165 166 167 // set buffer size: 168 public final void setBufferSize(int size) { 169 // REVISIT: Buffer size passed to entity scanner 170 // was not being kept in synch with the actual size 171 // of the buffers in each scanned entity. If any 172 // of the buffers were actually resized, it was possible 173 // that the parser would throw an ArrayIndexOutOfBoundsException 174 // for documents which contained names which are longer than 175 // the current buffer size. Conceivably the buffer size passed 176 // to entity scanner could be used to determine a minimum size 177 // for resizing, if doubling its size is smaller than this 178 // minimum. -- mrglavas 179 fBufferSize = size; 180 } 181 182 /** 183 * Resets the components. 184 */ 185 public void reset(PropertyManager propertyManager){ 186 fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; 187 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; 188 resetCommon(); 189 } 190 191 /** 192 * Resets the component. The component can query the component manager 193 * about any features and properties that affect the operation of the 194 * component. 195 * 196 * @param componentManager The component manager. 197 * 198 * @throws SAXException Thrown by component on initialization error. 199 * For example, if a feature or property is 200 * required for the operation of the component, the 201 * component manager may throw a 202 * SAXNotRecognizedException or a 203 * SAXNotSupportedException. 204 */ 205 public void reset(XMLComponentManager componentManager) 206 throws XMLConfigurationException { 207 // xerces features 208 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 209 210 //xerces properties 211 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 212 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 213 resetCommon(); 214 } // reset(XMLComponentManager) 215 216 217 public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, 218 XMLErrorReporter reporter) { 219 fCurrentEntity = null; 220 fSymbolTable = symbolTable; 221 fEntityManager = entityManager; 222 fErrorReporter = reporter; 223 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 224 fSecurityManager = fEntityManager.fSecurityManager; 225 } 226 227 private void resetCommon() { 228 fCurrentEntity = null; 229 whiteSpaceLen = 0; 230 whiteSpaceInfoNeeded = true; 231 listeners.clear(); 232 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 233 fSecurityManager = fEntityManager.fSecurityManager; 234 } 235 236 /** 237 * Returns the XML version of the current entity. This will normally be the 238 * value from the XML or text declaration or defaulted by the parser. Note that 239 * that this value may be different than the version of the processing rules 240 * applied to the current entity. For instance, an XML 1.1 document may refer to 241 * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire 242 * document. Also note that, for a given entity, this value can only be considered 243 * final once the XML or text declaration has been read or once it has been 244 * determined that there is no such declaration. 245 */ 246 public final String getXMLVersion() { 247 if (fCurrentEntity != null) { 248 return fCurrentEntity.xmlVersion; 249 } 250 return null; 251 } // getXMLVersion():String 252 253 /** 254 * Sets the XML version. This method is used by the 255 * scanners to report the value of the version pseudo-attribute 256 * in an XML or text declaration. 257 * 258 * @param xmlVersion the XML version of the current entity 259 */ 260 public final void setXMLVersion(String xmlVersion) { 261 xmlVersionSetExplicitly = true; 262 fCurrentEntity.xmlVersion = xmlVersion; 263 } // setXMLVersion(String) 264 265 266 /** set the instance of current scanned entity. 267 * @param ScannedEntity 268 */ 269 270 public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ 271 fCurrentEntity = scannedEntity ; 272 if(fCurrentEntity != null){ 273 isExternal = fCurrentEntity.isExternal(); 274 if(DEBUG_BUFFER) 275 System.out.println("Current Entity is "+scannedEntity.name); 276 } 277 } 278 279 public Entity.ScannedEntity getCurrentEntity(){ 280 return fCurrentEntity ; 281 } 282 // 283 // XMLEntityReader methods 284 // 285 286 /** 287 * Returns the base system identifier of the currently scanned 288 * entity, or null if none is available. 289 */ 290 public final String getBaseSystemId() { 291 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 292 } // getBaseSystemId():String 293 294 /** 295 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) 296 */ 297 public void setBaseSystemId(String systemId) { 298 //no-op 299 } 300 301 ///////////// Locator methods start. 302 public final int getLineNumber(){ 303 //if the entity is closed, we should return -1 304 //xxx at first place why such call should be there... 305 return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; 306 } 307 308 /** 309 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) 310 */ 311 public void setLineNumber(int line) { 312 //no-op 313 } 314 315 316 public final int getColumnNumber(){ 317 //if the entity is closed, we should return -1 318 //xxx at first place why such call should be there... 319 return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; 320 } 321 322 /** 323 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) 324 */ 325 public void setColumnNumber(int col) { 326 // no-op 327 } 328 329 330 public final int getCharacterOffset(){ 331 return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; 332 } 333 334 /** Returns the expanded system identifier. */ 335 public final String getExpandedSystemId() { 336 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 337 } 338 339 /** 340 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) 341 */ 342 public void setExpandedSystemId(String systemId) { 343 //no-op 344 } 345 346 /** Returns the literal system identifier. */ 347 public final String getLiteralSystemId() { 348 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; 349 } 350 351 /** 352 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) 353 */ 354 public void setLiteralSystemId(String systemId) { 355 //no-op 356 } 357 358 /** Returns the public identifier. */ 359 public final String getPublicId() { 360 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 361 } 362 363 /** 364 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) 365 */ 366 public void setPublicId(String publicId) { 367 //no-op 368 } 369 370 ///////////////// Locator methods finished. 371 372 /** the version of the current entity being scanned */ 373 public void setVersion(String version){ 374 fCurrentEntity.version = version; 375 } 376 377 public String getVersion(){ 378 if (fCurrentEntity != null) 379 return fCurrentEntity.version ; 380 return null; 381 } 382 383 /** 384 * Returns the encoding of the current entity. 385 * Note that, for a given entity, this value can only be 386 * considered final once the encoding declaration has been read (or once it 387 * has been determined that there is no such declaration) since, no encoding 388 * having been specified on the XMLInputSource, the parser 389 * will make an initial "guess" which could be in error. 390 */ 391 public final String getEncoding() { 392 if (fCurrentEntity != null) { 393 return fCurrentEntity.encoding; 394 } 395 return null; 396 } // getEncoding():String 397 398 /** 399 * Sets the encoding of the scanner. This method is used by the 400 * scanners if the XMLDecl or TextDecl line contains an encoding 401 * pseudo-attribute. 402 * <p> 403 * <strong>Note:</strong> The underlying character reader on the 404 * current entity will be changed to accomodate the new encoding. 405 * However, the new encoding is ignored if the current reader was 406 * not constructed from an input stream (e.g. an external entity 407 * that is resolved directly to the appropriate java.io.Reader 408 * object). 409 * 410 * @param encoding The IANA encoding name of the new encoding. 411 * 412 * @throws IOException Thrown if the new encoding is not supported. 413 * 414 * @see com.sun.org.apache.xerces.internal.util.EncodingMap 415 */ 416 public final void setEncoding(String encoding) throws IOException { 417 418 if (DEBUG_ENCODINGS) { 419 System.out.println("$$$ setEncoding: "+encoding); 420 } 421 422 if (fCurrentEntity.stream != null) { 423 // if the encoding is the same, don't change the reader and 424 // re-use the original reader used by the OneCharReader 425 // NOTE: Besides saving an object, this overcomes deficiencies 426 // in the UTF-16 reader supplied with the standard Java 427 // distribution (up to and including 1.3). The UTF-16 428 // decoder buffers 8K blocks even when only asked to read 429 // a single char! -Ac 430 if (fCurrentEntity.encoding == null || 431 !fCurrentEntity.encoding.equals(encoding)) { 432 // UTF-16 is a bit of a special case. If the encoding is UTF-16, 433 // and we know the endian-ness, we shouldn't change readers. 434 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce 435 // the endian-ness from the encoding we presently have. 436 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { 437 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 438 if(ENCODING.equals("UTF-16")) return; 439 if(ENCODING.equals("ISO-10646-UCS-4")) { 440 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 441 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); 442 } else { 443 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); 444 } 445 return; 446 } 447 if(ENCODING.equals("ISO-10646-UCS-2")) { 448 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 449 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); 450 } else { 451 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); 452 } 453 return; 454 } 455 } 456 // wrap a new reader around the input stream, changing 457 // the encoding 458 if (DEBUG_ENCODINGS) { 459 System.out.println("$$$ creating new reader from stream: "+ 460 fCurrentEntity.stream); 461 } 462 //fCurrentEntity.stream.reset(); 463 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); 464 fCurrentEntity.encoding = encoding; 465 466 } else { 467 if (DEBUG_ENCODINGS) 468 System.out.println("$$$ reusing old reader on stream"); 469 } 470 } 471 472 } // setEncoding(String) 473 474 /** Returns true if the current entity being scanned is external. */ 475 public final boolean isExternal() { 476 return fCurrentEntity.isExternal(); 477 } // isExternal():boolean 478 479 public int getChar(int relative) throws IOException{ 480 if(arrangeCapacity(relative + 1, false)){ 481 return fCurrentEntity.ch[fCurrentEntity.position + relative]; 482 }else{ 483 return -1; 484 } 485 }//getChar() 486 487 /** 488 * Returns the next character on the input. 489 * <p> 490 * <strong>Note:</strong> The character is <em>not</em> consumed. 491 * 492 * @throws IOException Thrown if i/o error occurs. 493 * @throws EOFException Thrown on end of file. 494 */ 495 public int peekChar() throws IOException { 496 if (DEBUG_BUFFER) { 497 System.out.print("(peekChar: "); 498 print(); 499 System.out.println(); 500 } 501 502 // load more characters, if needed 503 if (fCurrentEntity.position == fCurrentEntity.count) { 504 load(0, true, true); 505 } 506 507 // peek at character 508 int c = fCurrentEntity.ch[fCurrentEntity.position]; 509 510 // return peeked character 511 if (DEBUG_BUFFER) { 512 System.out.print(")peekChar: "); 513 print(); 514 if (isExternal) { 515 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); 516 } else { 517 System.out.println(" -> '"+(char)c+"'"); 518 } 519 } 520 if (isExternal) { 521 return c != '\r' ? c : '\n'; 522 } else { 523 return c; 524 } 525 526 } // peekChar():int 527 528 /** 529 * Returns the next character on the input. 530 * <p> 531 * <strong>Note:</strong> The character is consumed. 532 * 533 * @throws IOException Thrown if i/o error occurs. 534 * @throws EOFException Thrown on end of file. 535 */ 536 public int scanChar() throws IOException { 537 if (DEBUG_BUFFER) { 538 System.out.print("(scanChar: "); 539 print(); 540 System.out.println(); 541 } 542 543 // load more characters, if needed 544 if (fCurrentEntity.position == fCurrentEntity.count) { 545 load(0, true, true); 546 } 547 548 // scan character 549 int c = fCurrentEntity.ch[fCurrentEntity.position++]; 550 if (c == '\n' || (c == '\r' && isExternal)) { 551 fCurrentEntity.lineNumber++; 552 fCurrentEntity.columnNumber = 1; 553 if (fCurrentEntity.position == fCurrentEntity.count) { 554 invokeListeners(1); 555 fCurrentEntity.ch[0] = (char)c; 556 load(1, false, false); 557 } 558 if (c == '\r' && isExternal) { 559 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { 560 fCurrentEntity.position--; 561 } 562 c = '\n'; 563 } 564 } 565 566 // return character that was scanned 567 if (DEBUG_BUFFER) { 568 System.out.print(")scanChar: "); 569 print(); 570 System.out.println(" -> '"+(char)c+"'"); 571 } 572 fCurrentEntity.columnNumber++; 573 return c; 574 575 } // scanChar():int 576 577 /** 578 * Returns a string matching the NMTOKEN production appearing immediately 579 * on the input as a symbol, or null if NMTOKEN Name string is present. 580 * <p> 581 * <strong>Note:</strong> The NMTOKEN characters are consumed. 582 * <p> 583 * <strong>Note:</strong> The string returned must be a symbol. The 584 * SymbolTable can be used for this purpose. 585 * 586 * @throws IOException Thrown if i/o error occurs. 587 * @throws EOFException Thrown on end of file. 588 * 589 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 590 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 591 */ 592 public String scanNmtoken() throws IOException { 593 if (DEBUG_BUFFER) { 594 System.out.print("(scanNmtoken: "); 595 print(); 596 System.out.println(); 597 } 598 599 // load more characters, if needed 600 if (fCurrentEntity.position == fCurrentEntity.count) { 601 load(0, true, true); 602 } 603 604 // scan nmtoken 605 int offset = fCurrentEntity.position; 606 boolean vc = false; 607 char c; 608 while (true){ 609 //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { 610 c = fCurrentEntity.ch[fCurrentEntity.position]; 611 if(c < 127){ 612 vc = VALID_NAMES[c]; 613 }else{ 614 vc = XMLChar.isName(c); 615 } 616 if(!vc)break; 617 618 if (++fCurrentEntity.position == fCurrentEntity.count) { 619 int length = fCurrentEntity.position - offset; 620 invokeListeners(length); 621 if (length == fCurrentEntity.fBufferSize) { 622 // bad luck we have to resize our buffer 623 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 624 System.arraycopy(fCurrentEntity.ch, offset, 625 tmp, 0, length); 626 fCurrentEntity.ch = tmp; 627 fCurrentEntity.fBufferSize *= 2; 628 } else { 629 System.arraycopy(fCurrentEntity.ch, offset, 630 fCurrentEntity.ch, 0, length); 631 } 632 offset = 0; 633 if (load(length, false, false)) { 634 break; 635 } 636 } 637 } 638 int length = fCurrentEntity.position - offset; 639 fCurrentEntity.columnNumber += length; 640 641 // return nmtoken 642 String symbol = null; 643 if (length > 0) { 644 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 645 } 646 if (DEBUG_BUFFER) { 647 System.out.print(")scanNmtoken: "); 648 print(); 649 System.out.println(" -> "+String.valueOf(symbol)); 650 } 651 return symbol; 652 653 } // scanNmtoken():String 654 655 /** 656 * Returns a string matching the Name production appearing immediately 657 * on the input as a symbol, or null if no Name string is present. 658 * <p> 659 * <strong>Note:</strong> The Name characters are consumed. 660 * <p> 661 * <strong>Note:</strong> The string returned must be a symbol. The 662 * SymbolTable can be used for this purpose. 663 * 664 * @throws IOException Thrown if i/o error occurs. 665 * @throws EOFException Thrown on end of file. 666 * 667 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 668 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 669 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 670 */ 671 public String scanName() throws IOException { 672 if (DEBUG_BUFFER) { 673 System.out.print("(scanName: "); 674 print(); 675 System.out.println(); 676 } 677 678 // load more characters, if needed 679 if (fCurrentEntity.position == fCurrentEntity.count) { 680 load(0, true, true); 681 } 682 683 // scan name 684 int offset = fCurrentEntity.position; 685 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 686 if (++fCurrentEntity.position == fCurrentEntity.count) { 687 invokeListeners(1); 688 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 689 offset = 0; 690 if (load(1, false, false)) { 691 fCurrentEntity.columnNumber++; 692 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 693 694 if (DEBUG_BUFFER) { 695 System.out.print(")scanName: "); 696 print(); 697 System.out.println(" -> "+String.valueOf(symbol)); 698 } 699 return symbol; 700 } 701 } 702 boolean vc =false; 703 while (true ){ 704 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 705 char c = fCurrentEntity.ch[fCurrentEntity.position]; 706 if(c < 127){ 707 vc = VALID_NAMES[c]; 708 }else{ 709 vc = XMLChar.isName(c); 710 } 711 if(!vc)break; 712 if (++fCurrentEntity.position == fCurrentEntity.count) { 713 int length = fCurrentEntity.position - offset; 714 invokeListeners(length); 715 if (length == fCurrentEntity.fBufferSize) { 716 // bad luck we have to resize our buffer 717 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 718 System.arraycopy(fCurrentEntity.ch, offset, 719 tmp, 0, length); 720 fCurrentEntity.ch = tmp; 721 fCurrentEntity.fBufferSize *= 2; 722 } else { 723 System.arraycopy(fCurrentEntity.ch, offset, 724 fCurrentEntity.ch, 0, length); 725 } 726 offset = 0; 727 if (load(length, false, false)) { 728 break; 729 } 730 } 731 } 732 } 733 int length = fCurrentEntity.position - offset; 734 fCurrentEntity.columnNumber += length; 735 736 // return name 737 String symbol; 738 if (length > 0) { 739 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 740 } else 741 symbol = null; 742 if (DEBUG_BUFFER) { 743 System.out.print(")scanName: "); 744 print(); 745 System.out.println(" -> "+String.valueOf(symbol)); 746 } 747 return symbol; 748 749 } // scanName():String 750 751 /** 752 * Scans a qualified name from the input, setting the fields of the 753 * QName structure appropriately. 754 * <p> 755 * <strong>Note:</strong> The qualified name characters are consumed. 756 * <p> 757 * <strong>Note:</strong> The strings used to set the values of the 758 * QName structure must be symbols. The SymbolTable can be used for 759 * this purpose. 760 * 761 * @param qname The qualified name structure to fill. 762 * 763 * @return Returns true if a qualified name appeared immediately on 764 * the input and was scanned, false otherwise. 765 * 766 * @throws IOException Thrown if i/o error occurs. 767 * @throws EOFException Thrown on end of file. 768 * 769 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 770 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 771 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 772 */ 773 public boolean scanQName(QName qname) throws IOException { 774 if (DEBUG_BUFFER) { 775 System.out.print("(scanQName, "+qname+": "); 776 print(); 777 System.out.println(); 778 } 779 780 // load more characters, if needed 781 if (fCurrentEntity.position == fCurrentEntity.count) { 782 load(0, true, true); 783 } 784 785 // scan qualified name 786 int offset = fCurrentEntity.position; 787 788 //making a check if if the specified character is a valid name start character 789 //as defined by production [5] in the XML 1.0 specification. 790 // Name ::= (Letter | '_' | ':') (NameChar)* 791 792 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 793 if (++fCurrentEntity.position == fCurrentEntity.count) { 794 invokeListeners(1); 795 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 796 offset = 0; 797 798 if (load(1, false, false)) { 799 fCurrentEntity.columnNumber++; 800 //adding into symbol table. 801 //XXX We are trying to add single character in SymbolTable?????? 802 String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 803 qname.setValues(null, name, name, null); 804 if (DEBUG_BUFFER) { 805 System.out.print(")scanQName, "+qname+": "); 806 print(); 807 System.out.println(" -> true"); 808 } 809 return true; 810 } 811 } 812 int index = -1; 813 boolean vc = false; 814 while ( true){ 815 816 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 817 char c = fCurrentEntity.ch[fCurrentEntity.position]; 818 if(c < 127){ 819 vc = VALID_NAMES[c]; 820 }else{ 821 vc = XMLChar.isName(c); 822 } 823 if(!vc)break; 824 if (c == ':') { 825 if (index != -1) { 826 break; 827 } 828 index = fCurrentEntity.position; 829 //check prefix before further read 830 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset); 831 } 832 if (++fCurrentEntity.position == fCurrentEntity.count) { 833 int length = fCurrentEntity.position - offset; 834 //check localpart before loading more data 835 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length - index - 1); 836 invokeListeners(length); 837 if (length == fCurrentEntity.fBufferSize) { 838 // bad luck we have to resize our buffer 839 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 840 System.arraycopy(fCurrentEntity.ch, offset, 841 tmp, 0, length); 842 fCurrentEntity.ch = tmp; 843 fCurrentEntity.fBufferSize *= 2; 844 } else { 845 System.arraycopy(fCurrentEntity.ch, offset, 846 fCurrentEntity.ch, 0, length); 847 } 848 if (index != -1) { 849 index = index - offset; 850 } 851 offset = 0; 852 if (load(length, false, false)) { 853 break; 854 } 855 } 856 } 857 int length = fCurrentEntity.position - offset; 858 fCurrentEntity.columnNumber += length; 859 if (length > 0) { 860 String prefix = null; 861 String localpart = null; 862 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, 863 offset, length); 864 865 if (index != -1) { 866 int prefixLength = index - offset; 867 //check the result: prefix 868 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength); 869 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, 870 offset, prefixLength); 871 int len = length - prefixLength - 1; 872 //check the result: localpart 873 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len); 874 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, 875 index + 1, len); 876 877 } else { 878 localpart = rawname; 879 //check the result: localpart 880 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); 881 } 882 qname.setValues(prefix, localpart, rawname, null); 883 if (DEBUG_BUFFER) { 884 System.out.print(")scanQName, "+qname+": "); 885 print(); 886 System.out.println(" -> true"); 887 } 888 return true; 889 } 890 } 891 892 // no qualified name found 893 if (DEBUG_BUFFER) { 894 System.out.print(")scanQName, "+qname+": "); 895 print(); 896 System.out.println(" -> false"); 897 } 898 return false; 899 900 } // scanQName(QName):boolean 901 902 /** 903 * Checks whether the value of the specified Limit exceeds its limit 904 * 905 * @param limit The Limit to be checked. 906 * @param entity The current entity. 907 * @param offset The index of the first byte 908 * @param length The length of the entity scanned. 909 */ 910 protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) { 911 fLimitAnalyzer.addValue(limit, null, length); 912 if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) { 913 fSecurityManager.debugPrint(fLimitAnalyzer); 914 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(), 915 new Object[]{new String(entity.ch, offset, length), 916 fLimitAnalyzer.getTotalValue(limit), 917 fSecurityManager.getLimit(limit), 918 fSecurityManager.getStateLiteral(limit)}, 919 XMLErrorReporter.SEVERITY_FATAL_ERROR); 920 } 921 } 922 923 /** 924 * CHANGED: 925 * Scans a range of parsed character data, This function appends the character data to 926 * the supplied buffer. 927 * <p> 928 * <strong>Note:</strong> The characters are consumed. 929 * <p> 930 * <strong>Note:</strong> This method does not guarantee to return 931 * the longest run of parsed character data. This method may return 932 * before markup due to reaching the end of the input buffer or any 933 * other reason. 934 * <p> 935 * 936 * @param content The content structure to fill. 937 * 938 * @return Returns the next character on the input, if known. This 939 * value may be -1 but this does <em>note</em> designate 940 * end of file. 941 * 942 * @throws IOException Thrown if i/o error occurs. 943 * @throws EOFException Thrown on end of file. 944 */ 945 public int scanContent(XMLString content) throws IOException { 946 if (DEBUG_BUFFER) { 947 System.out.print("(scanContent: "); 948 print(); 949 System.out.println(); 950 } 951 952 // load more characters, if needed 953 if (fCurrentEntity.position == fCurrentEntity.count) { 954 load(0, true, true); 955 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 956 invokeListeners(1); 957 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 958 load(1, false, false); 959 fCurrentEntity.position = 0; 960 } 961 962 // normalize newlines 963 int offset = fCurrentEntity.position; 964 int c = fCurrentEntity.ch[offset]; 965 int newlines = 0; 966 if (c == '\n' || (c == '\r' && isExternal)) { 967 if (DEBUG_BUFFER) { 968 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 969 print(); 970 System.out.println(); 971 } 972 do { 973 c = fCurrentEntity.ch[fCurrentEntity.position++]; 974 if (c == '\r' && isExternal) { 975 newlines++; 976 fCurrentEntity.lineNumber++; 977 fCurrentEntity.columnNumber = 1; 978 if (fCurrentEntity.position == fCurrentEntity.count) { 979 offset = 0; 980 fCurrentEntity.position = newlines; 981 if (load(newlines, false, true)) { 982 break; 983 } 984 } 985 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 986 fCurrentEntity.position++; 987 offset++; 988 } 989 /*** NEWLINE NORMALIZATION ***/ 990 else { 991 newlines++; 992 } 993 } else if (c == '\n') { 994 newlines++; 995 fCurrentEntity.lineNumber++; 996 fCurrentEntity.columnNumber = 1; 997 if (fCurrentEntity.position == fCurrentEntity.count) { 998 offset = 0; 999 fCurrentEntity.position = newlines; 1000 if (load(newlines, false, true)) { 1001 break; 1002 } 1003 } 1004 } else { 1005 fCurrentEntity.position--; 1006 break; 1007 } 1008 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1009 for (int i = offset; i < fCurrentEntity.position; i++) { 1010 fCurrentEntity.ch[i] = '\n'; 1011 } 1012 int length = fCurrentEntity.position - offset; 1013 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1014 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 1015 //on buffering the data.. 1016 content.setValues(fCurrentEntity.ch, offset, length); 1017 //content.append(fCurrentEntity.ch, offset, length); 1018 if (DEBUG_BUFFER) { 1019 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1020 print(); 1021 System.out.println(); 1022 } 1023 return -1; 1024 } 1025 if (DEBUG_BUFFER) { 1026 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1027 print(); 1028 System.out.println(); 1029 } 1030 } 1031 1032 while (fCurrentEntity.position < fCurrentEntity.count) { 1033 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1034 if (!XMLChar.isContent(c)) { 1035 fCurrentEntity.position--; 1036 break; 1037 } 1038 } 1039 int length = fCurrentEntity.position - offset; 1040 fCurrentEntity.columnNumber += length - newlines; 1041 if (fCurrentEntity.isGE) { 1042 checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length); 1043 } 1044 1045 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 1046 //on buffering the data.. 1047 content.setValues(fCurrentEntity.ch, offset, length); 1048 //content.append(fCurrentEntity.ch, offset, length); 1049 // return next character 1050 if (fCurrentEntity.position != fCurrentEntity.count) { 1051 c = fCurrentEntity.ch[fCurrentEntity.position]; 1052 // REVISIT: Does this need to be updated to fix the 1053 // #x0D ^#x0A newline normalization problem? -Ac 1054 if (c == '\r' && isExternal) { 1055 c = '\n'; 1056 } 1057 } else { 1058 c = -1; 1059 } 1060 if (DEBUG_BUFFER) { 1061 System.out.print(")scanContent: "); 1062 print(); 1063 System.out.println(" -> '"+(char)c+"'"); 1064 } 1065 return c; 1066 1067 } // scanContent(XMLString):int 1068 1069 /** 1070 * Scans a range of attribute value data, setting the fields of the 1071 * XMLString structure, appropriately. 1072 * <p> 1073 * <strong>Note:</strong> The characters are consumed. 1074 * <p> 1075 * <strong>Note:</strong> This method does not guarantee to return 1076 * the longest run of attribute value data. This method may return 1077 * before the quote character due to reaching the end of the input 1078 * buffer or any other reason. 1079 * <p> 1080 * <strong>Note:</strong> The fields contained in the XMLString 1081 * structure are not guaranteed to remain valid upon subsequent calls 1082 * to the entity scanner. Therefore, the caller is responsible for 1083 * immediately using the returned character data or making a copy of 1084 * the character data. 1085 * 1086 * @param quote The quote character that signifies the end of the 1087 * attribute value data. 1088 * @param content The content structure to fill. 1089 * 1090 * @return Returns the next character on the input, if known. This 1091 * value may be -1 but this does <em>note</em> designate 1092 * end of file. 1093 * 1094 * @throws IOException Thrown if i/o error occurs. 1095 * @throws EOFException Thrown on end of file. 1096 */ 1097 public int scanLiteral(int quote, XMLString content) 1098 throws IOException { 1099 if (DEBUG_BUFFER) { 1100 System.out.print("(scanLiteral, '"+(char)quote+"': "); 1101 print(); 1102 System.out.println(); 1103 } 1104 // load more characters, if needed 1105 if (fCurrentEntity.position == fCurrentEntity.count) { 1106 load(0, true, true); 1107 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1108 invokeListeners(1); 1109 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 1110 load(1, false, false); 1111 fCurrentEntity.position = 0; 1112 } 1113 1114 // normalize newlines 1115 int offset = fCurrentEntity.position; 1116 int c = fCurrentEntity.ch[offset]; 1117 int newlines = 0; 1118 if(whiteSpaceInfoNeeded) 1119 whiteSpaceLen=0; 1120 if (c == '\n' || (c == '\r' && isExternal)) { 1121 if (DEBUG_BUFFER) { 1122 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1123 print(); 1124 System.out.println(); 1125 } 1126 do { 1127 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1128 if (c == '\r' && isExternal) { 1129 newlines++; 1130 fCurrentEntity.lineNumber++; 1131 fCurrentEntity.columnNumber = 1; 1132 if (fCurrentEntity.position == fCurrentEntity.count) { 1133 offset = 0; 1134 fCurrentEntity.position = newlines; 1135 if (load(newlines, false, true)) { 1136 break; 1137 } 1138 } 1139 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1140 fCurrentEntity.position++; 1141 offset++; 1142 } 1143 /*** NEWLINE NORMALIZATION ***/ 1144 else { 1145 newlines++; 1146 } 1147 /***/ 1148 } else if (c == '\n') { 1149 newlines++; 1150 fCurrentEntity.lineNumber++; 1151 fCurrentEntity.columnNumber = 1; 1152 if (fCurrentEntity.position == fCurrentEntity.count) { 1153 offset = 0; 1154 fCurrentEntity.position = newlines; 1155 if (load(newlines, false, true)) { 1156 break; 1157 } 1158 } 1159 /*** NEWLINE NORMALIZATION *** 1160 * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' 1161 * && external) { 1162 * fCurrentEntity.position++; 1163 * offset++; 1164 * } 1165 * /***/ 1166 } else { 1167 fCurrentEntity.position--; 1168 break; 1169 } 1170 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1171 int i=0; 1172 for ( i = offset; i < fCurrentEntity.position; i++) { 1173 fCurrentEntity.ch[i] = '\n'; 1174 storeWhiteSpace(i); 1175 } 1176 1177 int length = fCurrentEntity.position - offset; 1178 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1179 content.setValues(fCurrentEntity.ch, offset, length); 1180 if (DEBUG_BUFFER) { 1181 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1182 print(); 1183 System.out.println(); 1184 } 1185 return -1; 1186 } 1187 if (DEBUG_BUFFER) { 1188 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1189 print(); 1190 System.out.println(); 1191 } 1192 } 1193 1194 // scan literal value 1195 for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) { 1196 c = fCurrentEntity.ch[fCurrentEntity.position]; 1197 if ((c == quote && 1198 (!fCurrentEntity.literal || isExternal)) || 1199 c == '%' || !XMLChar.isContent(c)) { 1200 break; 1201 } 1202 if (whiteSpaceInfoNeeded && c == '\t') { 1203 storeWhiteSpace(fCurrentEntity.position); 1204 } 1205 } 1206 int length = fCurrentEntity.position - offset; 1207 fCurrentEntity.columnNumber += length - newlines; 1208 if (fCurrentEntity.isGE) { 1209 checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length); 1210 } 1211 content.setValues(fCurrentEntity.ch, offset, length); 1212 1213 // return next character 1214 if (fCurrentEntity.position != fCurrentEntity.count) { 1215 c = fCurrentEntity.ch[fCurrentEntity.position]; 1216 // NOTE: We don't want to accidentally signal the 1217 // end of the literal if we're expanding an 1218 // entity appearing in the literal. -Ac 1219 if (c == quote && fCurrentEntity.literal) { 1220 c = -1; 1221 } 1222 } else { 1223 c = -1; 1224 } 1225 if (DEBUG_BUFFER) { 1226 System.out.print(")scanLiteral, '"+(char)quote+"': "); 1227 print(); 1228 System.out.println(" -> '"+(char)c+"'"); 1229 } 1230 return c; 1231 1232 } // scanLiteral(int,XMLString):int 1233 1234 /** 1235 * Save whitespace information. Increase the whitespace buffer by 100 1236 * when needed. 1237 * 1238 * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). 1239 * 1240 * @param whiteSpacePos position of a whitespace in the scanner entity buffer 1241 */ 1242 private void storeWhiteSpace(int whiteSpacePos) { 1243 if (whiteSpaceLen >= whiteSpaceLookup.length) { 1244 int [] tmp = new int[whiteSpaceLookup.length + 100]; 1245 System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); 1246 whiteSpaceLookup = tmp; 1247 } 1248 1249 whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; 1250 } 1251 1252 //CHANGED: 1253 /** 1254 * Scans a range of character data up to the specified delimiter, 1255 * setting the fields of the XMLString structure, appropriately. 1256 * <p> 1257 * <strong>Note:</strong> The characters are consumed. 1258 * <p> 1259 * <strong>Note:</strong> This assumes that the delimiter contains at 1260 * least one character. 1261 * <p> 1262 * <strong>Note:</strong> This method does not guarantee to return 1263 * the longest run of character data. This method may return before 1264 * the delimiter due to reaching the end of the input buffer or any 1265 * other reason. 1266 * <p> 1267 * @param delimiter The string that signifies the end of the character 1268 * data to be scanned. 1269 * @param buffer The XMLStringBuffer to fill. 1270 * 1271 * @return Returns true if there is more data to scan, false otherwise. 1272 * 1273 * @throws IOException Thrown if i/o error occurs. 1274 * @throws EOFException Thrown on end of file. 1275 */ 1276 public boolean scanData(String delimiter, XMLStringBuffer buffer) 1277 throws IOException { 1278 1279 boolean done = false; 1280 int delimLen = delimiter.length(); 1281 char charAt0 = delimiter.charAt(0); 1282 do { 1283 if (DEBUG_BUFFER) { 1284 System.out.print("(scanData: "); 1285 print(); 1286 System.out.println(); 1287 } 1288 1289 // load more characters, if needed 1290 1291 if (fCurrentEntity.position == fCurrentEntity.count) { 1292 load(0, true, false); 1293 } 1294 1295 boolean bNextEntity = false; 1296 1297 while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) 1298 && (!bNextEntity)) 1299 { 1300 System.arraycopy(fCurrentEntity.ch, 1301 fCurrentEntity.position, 1302 fCurrentEntity.ch, 1303 0, 1304 fCurrentEntity.count - fCurrentEntity.position); 1305 1306 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false); 1307 fCurrentEntity.position = 0; 1308 fCurrentEntity.startPosition = 0; 1309 } 1310 1311 if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { 1312 // something must be wrong with the input: e.g., file ends in an unterminated comment 1313 int length = fCurrentEntity.count - fCurrentEntity.position; 1314 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 1315 fCurrentEntity.columnNumber += fCurrentEntity.count; 1316 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); 1317 fCurrentEntity.position = fCurrentEntity.count; 1318 fCurrentEntity.startPosition = fCurrentEntity.count; 1319 load(0, true, false); 1320 return false; 1321 } 1322 1323 // normalize newlines 1324 int offset = fCurrentEntity.position; 1325 int c = fCurrentEntity.ch[offset]; 1326 int newlines = 0; 1327 if (c == '\n' || (c == '\r' && isExternal)) { 1328 if (DEBUG_BUFFER) { 1329 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1330 print(); 1331 System.out.println(); 1332 } 1333 do { 1334 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1335 if (c == '\r' && isExternal) { 1336 newlines++; 1337 fCurrentEntity.lineNumber++; 1338 fCurrentEntity.columnNumber = 1; 1339 if (fCurrentEntity.position == fCurrentEntity.count) { 1340 offset = 0; 1341 fCurrentEntity.position = newlines; 1342 if (load(newlines, false, true)) { 1343 break; 1344 } 1345 } 1346 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1347 fCurrentEntity.position++; 1348 offset++; 1349 } 1350 /*** NEWLINE NORMALIZATION ***/ 1351 else { 1352 newlines++; 1353 } 1354 } else if (c == '\n') { 1355 newlines++; 1356 fCurrentEntity.lineNumber++; 1357 fCurrentEntity.columnNumber = 1; 1358 if (fCurrentEntity.position == fCurrentEntity.count) { 1359 offset = 0; 1360 fCurrentEntity.position = newlines; 1361 fCurrentEntity.count = newlines; 1362 if (load(newlines, false, true)) { 1363 break; 1364 } 1365 } 1366 } else { 1367 fCurrentEntity.position--; 1368 break; 1369 } 1370 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1371 for (int i = offset; i < fCurrentEntity.position; i++) { 1372 fCurrentEntity.ch[i] = '\n'; 1373 } 1374 int length = fCurrentEntity.position - offset; 1375 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1376 buffer.append(fCurrentEntity.ch, offset, length); 1377 if (DEBUG_BUFFER) { 1378 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1379 print(); 1380 System.out.println(); 1381 } 1382 return true; 1383 } 1384 if (DEBUG_BUFFER) { 1385 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1386 print(); 1387 System.out.println(); 1388 } 1389 } 1390 1391 // iterate over buffer looking for delimiter 1392 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { 1393 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1394 if (c == charAt0) { 1395 // looks like we just hit the delimiter 1396 int delimOffset = fCurrentEntity.position - 1; 1397 for (int i = 1; i < delimLen; i++) { 1398 if (fCurrentEntity.position == fCurrentEntity.count) { 1399 fCurrentEntity.position -= i; 1400 break OUTER; 1401 } 1402 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1403 if (delimiter.charAt(i) != c) { 1404 fCurrentEntity.position -= i; 1405 break; 1406 } 1407 } 1408 if (fCurrentEntity.position == delimOffset + delimLen) { 1409 done = true; 1410 break; 1411 } 1412 } else if (c == '\n' || (isExternal && c == '\r')) { 1413 fCurrentEntity.position--; 1414 break; 1415 } else if (XMLChar.isInvalid(c)) { 1416 fCurrentEntity.position--; 1417 int length = fCurrentEntity.position - offset; 1418 fCurrentEntity.columnNumber += length - newlines; 1419 buffer.append(fCurrentEntity.ch, offset, length); 1420 return true; 1421 } 1422 } 1423 int length = fCurrentEntity.position - offset; 1424 fCurrentEntity.columnNumber += length - newlines; 1425 if (done) { 1426 length -= delimLen; 1427 } 1428 buffer.append(fCurrentEntity.ch, offset, length); 1429 1430 // return true if string was skipped 1431 if (DEBUG_BUFFER) { 1432 System.out.print(")scanData: "); 1433 print(); 1434 System.out.println(" -> " + done); 1435 } 1436 } while (!done); 1437 return !done; 1438 1439 } // scanData(String, XMLStringBuffer) 1440 1441 /** 1442 * Skips a character appearing immediately on the input. 1443 * <p> 1444 * <strong>Note:</strong> The character is consumed only if it matches 1445 * the specified character. 1446 * 1447 * @param c The character to skip. 1448 * 1449 * @return Returns true if the character was skipped. 1450 * 1451 * @throws IOException Thrown if i/o error occurs. 1452 * @throws EOFException Thrown on end of file. 1453 */ 1454 public boolean skipChar(int c) throws IOException { 1455 if (DEBUG_BUFFER) { 1456 System.out.print("(skipChar, '"+(char)c+"': "); 1457 print(); 1458 System.out.println(); 1459 } 1460 1461 // load more characters, if needed 1462 if (fCurrentEntity.position == fCurrentEntity.count) { 1463 load(0, true, true); 1464 } 1465 1466 // skip character 1467 int cc = fCurrentEntity.ch[fCurrentEntity.position]; 1468 if (cc == c) { 1469 fCurrentEntity.position++; 1470 if (c == '\n') { 1471 fCurrentEntity.lineNumber++; 1472 fCurrentEntity.columnNumber = 1; 1473 } else { 1474 fCurrentEntity.columnNumber++; 1475 } 1476 if (DEBUG_BUFFER) { 1477 System.out.print(")skipChar, '"+(char)c+"': "); 1478 print(); 1479 System.out.println(" -> true"); 1480 } 1481 return true; 1482 } else if (c == '\n' && cc == '\r' && isExternal) { 1483 // handle newlines 1484 if (fCurrentEntity.position == fCurrentEntity.count) { 1485 invokeListeners(1); 1486 fCurrentEntity.ch[0] = (char)cc; 1487 load(1, false, false); 1488 } 1489 fCurrentEntity.position++; 1490 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1491 fCurrentEntity.position++; 1492 } 1493 fCurrentEntity.lineNumber++; 1494 fCurrentEntity.columnNumber = 1; 1495 if (DEBUG_BUFFER) { 1496 System.out.print(")skipChar, '"+(char)c+"': "); 1497 print(); 1498 System.out.println(" -> true"); 1499 } 1500 return true; 1501 } 1502 1503 // character was not skipped 1504 if (DEBUG_BUFFER) { 1505 System.out.print(")skipChar, '"+(char)c+"': "); 1506 print(); 1507 System.out.println(" -> false"); 1508 } 1509 return false; 1510 1511 } // skipChar(int):boolean 1512 1513 public boolean isSpace(char ch){ 1514 return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); 1515 } 1516 /** 1517 * Skips space characters appearing immediately on the input. 1518 * <p> 1519 * <strong>Note:</strong> The characters are consumed only if they are 1520 * space characters. 1521 * 1522 * @return Returns true if at least one space character was skipped. 1523 * 1524 * @throws IOException Thrown if i/o error occurs. 1525 * @throws EOFException Thrown on end of file. 1526 * 1527 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 1528 */ 1529 public boolean skipSpaces() throws IOException { 1530 if (DEBUG_BUFFER) { 1531 System.out.print("(skipSpaces: "); 1532 print(); 1533 System.out.println(); 1534 } 1535 //boolean entityChanged = false; 1536 // load more characters, if needed 1537 if (fCurrentEntity.position == fCurrentEntity.count) { 1538 load(0, true, true); 1539 } 1540 1541 //we are doing this check only in skipSpace() because it is called by 1542 //fMiscDispatcher and we want the parser to exit gracefully when document 1543 //is well-formed. 1544 //it is possible that end of document is reached and 1545 //fCurrentEntity becomes null 1546 //nothing was read so entity changed 'false' should be returned. 1547 if(fCurrentEntity == null){ 1548 return false ; 1549 } 1550 1551 // skip spaces 1552 int c = fCurrentEntity.ch[fCurrentEntity.position]; 1553 if (XMLChar.isSpace(c)) { 1554 do { 1555 boolean entityChanged = false; 1556 // handle newlines 1557 if (c == '\n' || (isExternal && c == '\r')) { 1558 fCurrentEntity.lineNumber++; 1559 fCurrentEntity.columnNumber = 1; 1560 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1561 invokeListeners(1); 1562 fCurrentEntity.ch[0] = (char)c; 1563 entityChanged = load(1, true, false); 1564 if (!entityChanged){ 1565 // the load change the position to be 1, 1566 // need to restore it when entity not changed 1567 fCurrentEntity.position = 0; 1568 }else if(fCurrentEntity == null){ 1569 return true ; 1570 } 1571 } 1572 if (c == '\r' && isExternal) { 1573 // REVISIT: Does this need to be updated to fix the 1574 // #x0D ^#x0A newline normalization problem? -Ac 1575 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 1576 fCurrentEntity.position--; 1577 } 1578 } 1579 } else { 1580 fCurrentEntity.columnNumber++; 1581 } 1582 // load more characters, if needed 1583 if (!entityChanged){ 1584 fCurrentEntity.position++; 1585 } 1586 1587 if (fCurrentEntity.position == fCurrentEntity.count) { 1588 load(0, true, true); 1589 1590 //we are doing this check only in skipSpace() because it is called by 1591 //fMiscDispatcher and we want the parser to exit gracefully when document 1592 //is well-formed. 1593 1594 //it is possible that end of document is reached and 1595 //fCurrentEntity becomes null 1596 //nothing was read so entity changed 'false' should be returned. 1597 if(fCurrentEntity == null){ 1598 return true ; 1599 } 1600 1601 } 1602 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 1603 if (DEBUG_BUFFER) { 1604 System.out.print(")skipSpaces: "); 1605 print(); 1606 System.out.println(" -> true"); 1607 } 1608 return true; 1609 } 1610 1611 // no spaces were found 1612 if (DEBUG_BUFFER) { 1613 System.out.print(")skipSpaces: "); 1614 print(); 1615 System.out.println(" -> false"); 1616 } 1617 return false; 1618 1619 } // skipSpaces():boolean 1620 1621 1622 /** 1623 * @param legnth This function checks that following number of characters are available. 1624 * to the underlying buffer. 1625 * @return This function returns true if capacity asked is available. 1626 */ 1627 public boolean arrangeCapacity(int length) throws IOException{ 1628 return arrangeCapacity(length, false); 1629 } 1630 1631 /** 1632 * @param legnth This function checks that following number of characters are available. 1633 * to the underlying buffer. 1634 * @param if the underlying function should change the entity 1635 * @return This function returns true if capacity asked is available. 1636 * 1637 */ 1638 public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ 1639 //check if the capacity is availble in the current buffer 1640 //count is no. of characters in the buffer [x][m][l] 1641 //position is '0' based 1642 //System.out.println("fCurrent Entity " + fCurrentEntity); 1643 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1644 return true; 1645 } 1646 if(DEBUG_SKIP_STRING){ 1647 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1648 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1649 System.out.println("length = " + length); 1650 } 1651 boolean entityChanged = false; 1652 //load more characters -- this function shouldn't change the entity 1653 while((fCurrentEntity.count - fCurrentEntity.position) < length){ 1654 if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ 1655 invokeListeners(0); 1656 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); 1657 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; 1658 fCurrentEntity.position = 0; 1659 } 1660 1661 if((fCurrentEntity.count - fCurrentEntity.position) < length){ 1662 int pos = fCurrentEntity.position; 1663 invokeListeners(pos); 1664 entityChanged = load(fCurrentEntity.count, changeEntity, false); 1665 fCurrentEntity.position = pos; 1666 if(entityChanged)break; 1667 } 1668 if(DEBUG_SKIP_STRING){ 1669 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1670 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1671 System.out.println("length = " + length); 1672 } 1673 } 1674 //load changes the position.. set it back to the point where we started. 1675 1676 //after loading check again. 1677 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1678 return true; 1679 } else { 1680 return false; 1681 } 1682 } 1683 1684 /** 1685 * Skips the specified string appearing immediately on the input. 1686 * <p> 1687 * <strong>Note:</strong> The characters are consumed only if all 1688 * the characters are skipped. 1689 * 1690 * @param s The string to skip. 1691 * 1692 * @return Returns true if the string was skipped. 1693 * 1694 * @throws IOException Thrown if i/o error occurs. 1695 * @throws EOFException Thrown on end of file. 1696 */ 1697 public boolean skipString(String s) throws IOException { 1698 1699 final int length = s.length(); 1700 1701 //first make sure that required capacity is avaible 1702 if(arrangeCapacity(length, false)){ 1703 final int beforeSkip = fCurrentEntity.position ; 1704 int afterSkip = fCurrentEntity.position + length - 1 ; 1705 if(DEBUG_SKIP_STRING){ 1706 System.out.println("skipString,length = " + s + "," + length); 1707 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); 1708 } 1709 1710 //s.charAt() indexes are 0 to 'Length -1' based. 1711 int i = length - 1 ; 1712 //check from reverse 1713 while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ 1714 if(afterSkip-- == beforeSkip){ 1715 fCurrentEntity.position = fCurrentEntity.position + length ; 1716 fCurrentEntity.columnNumber += length; 1717 return true; 1718 } 1719 } 1720 } 1721 1722 return false; 1723 } // skipString(String):boolean 1724 1725 public boolean skipString(char [] s) throws IOException { 1726 1727 final int length = s.length; 1728 //first make sure that required capacity is avaible 1729 if(arrangeCapacity(length, false)){ 1730 int beforeSkip = fCurrentEntity.position; 1731 1732 if(DEBUG_SKIP_STRING){ 1733 System.out.println("skipString,length = " + new String(s) + "," + length); 1734 System.out.println("skipString,length = " + new String(s) + "," + length); 1735 } 1736 1737 for(int i=0;i<length;i++){ 1738 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ 1739 return false; 1740 } 1741 } 1742 fCurrentEntity.position = fCurrentEntity.position + length ; 1743 fCurrentEntity.columnNumber += length; 1744 return true; 1745 1746 } 1747 1748 return false; 1749 } 1750 1751 // 1752 // Locator methods 1753 // 1754 // 1755 // Private methods 1756 // 1757 1758 /** 1759 * Loads a chunk of text. 1760 * 1761 * @param offset The offset into the character buffer to 1762 * read the next batch of characters. 1763 * @param changeEntity True if the load should change entities 1764 * at the end of the entity, otherwise leave 1765 * the current entity in place and the entity 1766 * boundary will be signaled by the return 1767 * value. 1768 * @param notify Determine whether to notify listeners of 1769 * the event 1770 * 1771 * @returns Returns true if the entity changed as a result of this 1772 * load operation. 1773 */ 1774 final boolean load(int offset, boolean changeEntity, boolean notify) 1775 throws IOException { 1776 if (DEBUG_BUFFER) { 1777 System.out.print("(load, "+offset+": "); 1778 print(); 1779 System.out.println(); 1780 } 1781 if (notify) { 1782 invokeListeners(offset); 1783 } 1784 //maintaing the count till last load 1785 fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; 1786 // read characters 1787 int length = fCurrentEntity.ch.length - offset; 1788 if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { 1789 length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; 1790 } 1791 if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); 1792 int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); 1793 if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); 1794 1795 // reset count and position 1796 boolean entityChanged = false; 1797 if (count != -1) { 1798 if (count != 0) { 1799 // record the last count 1800 fCurrentEntity.fLastCount = count; 1801 fCurrentEntity.count = count + offset; 1802 fCurrentEntity.position = offset; 1803 } 1804 } 1805 // end of this entity 1806 else { 1807 fCurrentEntity.count = offset; 1808 fCurrentEntity.position = offset; 1809 entityChanged = true; 1810 1811 if (changeEntity) { 1812 //notify the entity manager about the end of entity 1813 fEntityManager.endEntity(); 1814 //return if the current entity becomes null 1815 if(fCurrentEntity == null){ 1816 throw END_OF_DOCUMENT_ENTITY; 1817 } 1818 // handle the trailing edges 1819 if (fCurrentEntity.position == fCurrentEntity.count) { 1820 load(0, true, false); 1821 } 1822 } 1823 1824 } 1825 if (DEBUG_BUFFER) { 1826 System.out.print(")load, "+offset+": "); 1827 print(); 1828 System.out.println(); 1829 } 1830 1831 return entityChanged; 1832 1833 } // load(int, boolean):boolean 1834 1835 /** 1836 * Creates a reader capable of reading the given input stream in 1837 * the specified encoding. 1838 * 1839 * @param inputStream The input stream. 1840 * @param encoding The encoding name that the input stream is 1841 * encoded using. If the user has specified that 1842 * Java encoding names are allowed, then the 1843 * encoding name may be a Java encoding name; 1844 * otherwise, it is an ianaEncoding name. 1845 * @param isBigEndian For encodings (like uCS-4), whose names cannot 1846 * specify a byte order, this tells whether the order is bigEndian. null menas 1847 * unknown or not relevant. 1848 * 1849 * @return Returns a reader. 1850 */ 1851 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 1852 throws IOException { 1853 1854 // normalize encoding name 1855 if (encoding == null) { 1856 encoding = "UTF-8"; 1857 } 1858 1859 // try to use an optimized reader 1860 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 1861 if (ENCODING.equals("UTF-8")) { 1862 if (DEBUG_ENCODINGS) { 1863 System.out.println("$$$ creating UTF8Reader"); 1864 } 1865 return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 1866 } 1867 if (ENCODING.equals("US-ASCII")) { 1868 if (DEBUG_ENCODINGS) { 1869 System.out.println("$$$ creating ASCIIReader"); 1870 } 1871 return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1872 } 1873 if(ENCODING.equals("ISO-10646-UCS-4")) { 1874 if(isBigEndian != null) { 1875 boolean isBE = isBigEndian.booleanValue(); 1876 if(isBE) { 1877 return new UCSReader(inputStream, UCSReader.UCS4BE); 1878 } else { 1879 return new UCSReader(inputStream, UCSReader.UCS4LE); 1880 } 1881 } else { 1882 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1883 "EncodingByteOrderUnsupported", 1884 new Object[] { encoding }, 1885 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1886 } 1887 } 1888 if(ENCODING.equals("ISO-10646-UCS-2")) { 1889 if(isBigEndian != null) { // sould never happen with this encoding... 1890 boolean isBE = isBigEndian.booleanValue(); 1891 if(isBE) { 1892 return new UCSReader(inputStream, UCSReader.UCS2BE); 1893 } else { 1894 return new UCSReader(inputStream, UCSReader.UCS2LE); 1895 } 1896 } else { 1897 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1898 "EncodingByteOrderUnsupported", 1899 new Object[] { encoding }, 1900 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1901 } 1902 } 1903 1904 // check for valid name 1905 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 1906 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 1907 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 1908 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1909 "EncodingDeclInvalid", 1910 new Object[] { encoding }, 1911 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1912 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 1913 // because every byte is a valid ISO Latin 1 character. 1914 // It may not translate correctly but if we failed on 1915 // the encoding anyway, then we're expecting the content 1916 // of the document to be bad. This will just prevent an 1917 // invalid UTF-8 sequence to be detected. This is only 1918 // important when continue-after-fatal-error is turned 1919 // on. -Ac 1920 encoding = "ISO-8859-1"; 1921 } 1922 1923 // try to use a Java reader 1924 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 1925 if (javaEncoding == null) { 1926 if(fAllowJavaEncodings) { 1927 javaEncoding = encoding; 1928 } else { 1929 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1930 "EncodingDeclInvalid", 1931 new Object[] { encoding }, 1932 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1933 // see comment above. 1934 javaEncoding = "ISO8859_1"; 1935 } 1936 } 1937 else if (javaEncoding.equals("ASCII")) { 1938 if (DEBUG_ENCODINGS) { 1939 System.out.println("$$$ creating ASCIIReader"); 1940 } 1941 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1942 } 1943 1944 if (DEBUG_ENCODINGS) { 1945 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 1946 if (javaEncoding == encoding) { 1947 System.out.print(" (IANA encoding)"); 1948 } 1949 System.out.println(); 1950 } 1951 return new InputStreamReader(inputStream, javaEncoding); 1952 1953 } // createReader(InputStream,String, Boolean): Reader 1954 1955 /** 1956 * Returns the IANA encoding name that is auto-detected from 1957 * the bytes specified, with the endian-ness of that encoding where appropriate. 1958 * 1959 * @param b4 The first four bytes of the input. 1960 * @param count The number of bytes actually read. 1961 * @return a 2-element array: the first element, an IANA-encoding string, 1962 * the second element a Boolean which is true iff the document is big endian, false 1963 * if it's little-endian, and null if the distinction isn't relevant. 1964 */ 1965 protected Object[] getEncodingName(byte[] b4, int count) { 1966 1967 if (count < 2) { 1968 return new Object[]{"UTF-8", null}; 1969 } 1970 1971 // UTF-16, with BOM 1972 int b0 = b4[0] & 0xFF; 1973 int b1 = b4[1] & 0xFF; 1974 if (b0 == 0xFE && b1 == 0xFF) { 1975 // UTF-16, big-endian 1976 return new Object [] {"UTF-16BE", new Boolean(true)}; 1977 } 1978 if (b0 == 0xFF && b1 == 0xFE) { 1979 // UTF-16, little-endian 1980 return new Object [] {"UTF-16LE", new Boolean(false)}; 1981 } 1982 1983 // default to UTF-8 if we don't have enough bytes to make a 1984 // good determination of the encoding 1985 if (count < 3) { 1986 return new Object [] {"UTF-8", null}; 1987 } 1988 1989 // UTF-8 with a BOM 1990 int b2 = b4[2] & 0xFF; 1991 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 1992 return new Object [] {"UTF-8", null}; 1993 } 1994 1995 // default to UTF-8 if we don't have enough bytes to make a 1996 // good determination of the encoding 1997 if (count < 4) { 1998 return new Object [] {"UTF-8", null}; 1999 } 2000 2001 // other encodings 2002 int b3 = b4[3] & 0xFF; 2003 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2004 // UCS-4, big endian (1234) 2005 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2006 } 2007 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2008 // UCS-4, little endian (4321) 2009 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2010 } 2011 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2012 // UCS-4, unusual octet order (2143) 2013 // REVISIT: What should this be? 2014 return new Object [] {"ISO-10646-UCS-4", null}; 2015 } 2016 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2017 // UCS-4, unusual octect order (3412) 2018 // REVISIT: What should this be? 2019 return new Object [] {"ISO-10646-UCS-4", null}; 2020 } 2021 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2022 // UTF-16, big-endian, no BOM 2023 // (or could turn out to be UCS-2... 2024 // REVISIT: What should this be? 2025 return new Object [] {"UTF-16BE", new Boolean(true)}; 2026 } 2027 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2028 // UTF-16, little-endian, no BOM 2029 // (or could turn out to be UCS-2... 2030 return new Object [] {"UTF-16LE", new Boolean(false)}; 2031 } 2032 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2033 // EBCDIC 2034 // a la xerces1, return CP037 instead of EBCDIC here 2035 return new Object [] {"CP037", null}; 2036 } 2037 2038 // default encoding 2039 return new Object [] {"UTF-8", null}; 2040 2041 } // getEncodingName(byte[],int):Object[] 2042 2043 /** 2044 * xxx not removing endEntity() so that i remember that we need to implement it. 2045 * Ends an entity. 2046 * 2047 * @throws XNIException Thrown by entity handler to signal an error. 2048 */ 2049 // 2050 /** Prints the contents of the buffer. */ 2051 final void print() { 2052 if (DEBUG_BUFFER) { 2053 if (fCurrentEntity != null) { 2054 System.out.print('['); 2055 System.out.print(fCurrentEntity.count); 2056 System.out.print(' '); 2057 System.out.print(fCurrentEntity.position); 2058 if (fCurrentEntity.count > 0) { 2059 System.out.print(" \""); 2060 for (int i = 0; i < fCurrentEntity.count; i++) { 2061 if (i == fCurrentEntity.position) { 2062 System.out.print('^'); 2063 } 2064 char c = fCurrentEntity.ch[i]; 2065 switch (c) { 2066 case '\n': { 2067 System.out.print("\\n"); 2068 break; 2069 } 2070 case '\r': { 2071 System.out.print("\\r"); 2072 break; 2073 } 2074 case '\t': { 2075 System.out.print("\\t"); 2076 break; 2077 } 2078 case '\\': { 2079 System.out.print("\\\\"); 2080 break; 2081 } 2082 default: { 2083 System.out.print(c); 2084 } 2085 } 2086 } 2087 if (fCurrentEntity.position == fCurrentEntity.count) { 2088 System.out.print('^'); 2089 } 2090 System.out.print('"'); 2091 } 2092 System.out.print(']'); 2093 System.out.print(" @ "); 2094 System.out.print(fCurrentEntity.lineNumber); 2095 System.out.print(','); 2096 System.out.print(fCurrentEntity.columnNumber); 2097 } else { 2098 System.out.print("*NO CURRENT ENTITY*"); 2099 } 2100 } 2101 } 2102 2103 /** 2104 * Registers the listener object and provides callback. 2105 * @param listener listener to which call back should be provided when scanner buffer 2106 * is being changed. 2107 */ 2108 public void registerListener(XMLBufferListener listener) { 2109 if (!listeners.contains(listener)) { 2110 listeners.add(listener); 2111 } 2112 } 2113 2114 /** 2115 * 2116 * @param loadPos Starting position from which new data is being loaded into scanner buffer. 2117 */ 2118 public void invokeListeners(int loadPos){ 2119 for (int i=0; i<listeners.size(); i++) { 2120 listeners.get(i).refresh(loadPos); 2121 } 2122 } 2123 2124 /** 2125 * Skips space characters appearing immediately on the input that would 2126 * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 2127 * normalization is performed. This is useful when scanning structures 2128 * such as the XMLDecl and TextDecl that can only contain US-ASCII 2129 * characters. 2130 * <p> 2131 * <strong>Note:</strong> The characters are consumed only if they would 2132 * match non-terminal S before end of line normalization is performed. 2133 * 2134 * @return Returns true if at least one space character was skipped. 2135 * 2136 * @throws IOException Thrown if i/o error occurs. 2137 * @throws EOFException Thrown on end of file. 2138 * 2139 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 2140 */ 2141 public final boolean skipDeclSpaces() throws IOException { 2142 if (DEBUG_BUFFER) { 2143 System.out.print("(skipDeclSpaces: "); 2144 //XMLEntityManager.print(fCurrentEntity); 2145 System.out.println(); 2146 } 2147 2148 // load more characters, if needed 2149 if (fCurrentEntity.position == fCurrentEntity.count) { 2150 load(0, true, false); 2151 } 2152 2153 // skip spaces 2154 int c = fCurrentEntity.ch[fCurrentEntity.position]; 2155 if (XMLChar.isSpace(c)) { 2156 boolean external = fCurrentEntity.isExternal(); 2157 do { 2158 boolean entityChanged = false; 2159 // handle newlines 2160 if (c == '\n' || (external && c == '\r')) { 2161 fCurrentEntity.lineNumber++; 2162 fCurrentEntity.columnNumber = 1; 2163 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 2164 fCurrentEntity.ch[0] = (char)c; 2165 entityChanged = load(1, true, false); 2166 if (!entityChanged) 2167 // the load change the position to be 1, 2168 // need to restore it when entity not changed 2169 fCurrentEntity.position = 0; 2170 } 2171 if (c == '\r' && external) { 2172 // REVISIT: Does this need to be updated to fix the 2173 // #x0D ^#x0A newline normalization problem? -Ac 2174 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 2175 fCurrentEntity.position--; 2176 } 2177 } 2178 /*** NEWLINE NORMALIZATION *** 2179 * else { 2180 * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' 2181 * && external) { 2182 * fCurrentEntity.position++; 2183 * } 2184 * } 2185 * /***/ 2186 } else { 2187 fCurrentEntity.columnNumber++; 2188 } 2189 // load more characters, if needed 2190 if (!entityChanged) 2191 fCurrentEntity.position++; 2192 if (fCurrentEntity.position == fCurrentEntity.count) { 2193 load(0, true, false); 2194 } 2195 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 2196 if (DEBUG_BUFFER) { 2197 System.out.print(")skipDeclSpaces: "); 2198 // XMLEntityManager.print(fCurrentEntity); 2199 System.out.println(" -> true"); 2200 } 2201 return true; 2202 } 2203 2204 // no spaces were found 2205 if (DEBUG_BUFFER) { 2206 System.out.print(")skipDeclSpaces: "); 2207 //XMLEntityManager.print(fCurrentEntity); 2208 System.out.println(" -> false"); 2209 } 2210 return false; 2211 2212 } // skipDeclSpaces():boolean 2213 2214 2215 } // class XMLEntityScanner