1 /*
   2  * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl;
  22 
  23 import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
  24 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  25 import com.sun.org.apache.xerces.internal.util.XML11Char;
  26 import com.sun.org.apache.xerces.internal.util.XMLChar;
  27 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  28 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit;
  29 import com.sun.org.apache.xerces.internal.xni.QName;
  30 import com.sun.org.apache.xerces.internal.xni.XMLString;
  31 import com.sun.xml.internal.stream.Entity;
  32 import java.io.IOException;
  33 
  34 /**
  35  * Implements the entity scanner methods in
  36  * the context of XML 1.1.
  37  *
  38  * @xerces.internal
  39  *
  40  * @author Michael Glavassevich, IBM
  41  * @author Neil Graham, IBM
  42  * @version $Id: XML11EntityScanner.java,v 1.5 2010-11-01 04:39:40 joehw Exp $
  43  *
  44  * @LastModified: Apr 2021
  45  */
  46 
  47 public class XML11EntityScanner
  48     extends XMLEntityScanner {
  49 
  50     //
  51     // Constructors
  52     //
  53 
  54     /** Default constructor. */
  55     public XML11EntityScanner() {
  56         super();
  57     } // <init>()
  58 
  59     //
  60     // XMLEntityScanner methods
  61     //
  62 
  63     /**
  64      * Returns the next character on the input.
  65      * <p>
  66      * <strong>Note:</strong> The character is <em>not</em> consumed.
  67      *
  68      * @throws IOException  Thrown if i/o error occurs.
  69      * @throws EOFException Thrown on end of file.
  70      */
  71     public int peekChar() throws IOException {
  72 
  73         // load more characters, if needed
  74         if (fCurrentEntity.position == fCurrentEntity.count) {
  75             load(0, true, true);
  76         }
  77 
  78         // peek at character
  79         int c = fCurrentEntity.ch[fCurrentEntity.position];
  80 
  81         // return peeked character
  82         if (fCurrentEntity.isExternal()) {
  83             return (c != '\r' && c != 0x85 && c != 0x2028) ? c : '\n';
  84         }
  85         else {
  86             return c;
  87         }
  88 
  89     } // peekChar():int
  90 
  91     /**
  92      * Returns the next character on the input.
  93      * <p>
  94      * <strong>Note:</strong> The character is consumed.
  95      *
  96      * @throws IOException  Thrown if i/o error occurs.
  97      * @throws EOFException Thrown on end of file.
  98      */
  99     protected int scanChar(NameType nt) throws IOException {
 100 
 101         // load more characters, if needed
 102         if (fCurrentEntity.position == fCurrentEntity.count) {
 103             load(0, true, true);
 104         }
 105 
 106         // scan character
 107         int offset = fCurrentEntity.position;
 108         int c = fCurrentEntity.ch[fCurrentEntity.position++];
 109         boolean external = false;
 110         if (c == '\n' ||
 111             ((c == '\r' || c == 0x85 || c == 0x2028) && (external = fCurrentEntity.isExternal()))) {
 112             fCurrentEntity.lineNumber++;
 113             fCurrentEntity.columnNumber = 1;
 114             if (fCurrentEntity.position == fCurrentEntity.count) {
 115                 invokeListeners(1);
 116                 fCurrentEntity.ch[0] = (char)c;
 117                 load(1, false, false);
 118                 offset = 0;
 119             }
 120             if (c == '\r' && external) {
 121                 int cc = fCurrentEntity.ch[fCurrentEntity.position++];
 122                 if (cc != '\n' && cc != 0x85) {
 123                     fCurrentEntity.position--;
 124                 }
 125             }
 126             c = '\n';
 127         }
 128 
 129         // return character that was scanned
 130         fCurrentEntity.columnNumber++;
 131         if (!detectingVersion) {
 132             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
 133         }
 134         return c;
 135 
 136     } // scanChar():int
 137 
 138     /**
 139      * Returns a string matching the NMTOKEN production appearing immediately
 140      * on the input as a symbol, or null if NMTOKEN Name string is present.
 141      * <p>
 142      * <strong>Note:</strong> The NMTOKEN characters are consumed.
 143      * <p>
 144      * <strong>Note:</strong> The string returned must be a symbol. The
 145      * SymbolTable can be used for this purpose.
 146      *
 147      * @throws IOException  Thrown if i/o error occurs.
 148      * @throws EOFException Thrown on end of file.
 149      *
 150      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 151      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11Name
 152      */
 153     protected String scanNmtoken() throws IOException {
 154         // load more characters, if needed
 155         if (fCurrentEntity.position == fCurrentEntity.count) {
 156             load(0, true, true);
 157         }
 158 
 159         // scan nmtoken
 160         int offset = fCurrentEntity.position;
 161 
 162         do {
 163             char ch = fCurrentEntity.ch[fCurrentEntity.position];
 164             if (XML11Char.isXML11Name(ch)) {
 165                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 166                     int length = fCurrentEntity.position - offset;
 167                     invokeListeners(length);
 168                     if (length == fCurrentEntity.ch.length) {
 169                         // bad luck we have to resize our buffer
 170                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 171                         System.arraycopy(fCurrentEntity.ch, offset,
 172                                          tmp, 0, length);
 173                         fCurrentEntity.ch = tmp;
 174                     }
 175                     else {
 176                         System.arraycopy(fCurrentEntity.ch, offset,
 177                                          fCurrentEntity.ch, 0, length);
 178                     }
 179                     offset = 0;
 180                     if (load(length, false, false)) {
 181                         break;
 182                     }
 183                 }
 184             }
 185             else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 186                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 187                     int length = fCurrentEntity.position - offset;
 188                     invokeListeners(length);
 189                     if (length == fCurrentEntity.ch.length) {
 190                         // bad luck we have to resize our buffer
 191                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 192                         System.arraycopy(fCurrentEntity.ch, offset,
 193                                          tmp, 0, length);
 194                         fCurrentEntity.ch = tmp;
 195                     }
 196                     else {
 197                         System.arraycopy(fCurrentEntity.ch, offset,
 198                                          fCurrentEntity.ch, 0, length);
 199                     }
 200                     offset = 0;
 201                     if (load(length, false, false)) {
 202                         --fCurrentEntity.startPosition;
 203                         --fCurrentEntity.position;
 204                         break;
 205                     }
 206                 }
 207                 char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 208                 if ( !XMLChar.isLowSurrogate(ch2) ||
 209                      !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
 210                     --fCurrentEntity.position;
 211                     break;
 212                 }
 213                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 214                     int length = fCurrentEntity.position - offset;
 215                     invokeListeners(length);
 216                     if (length == fCurrentEntity.ch.length) {
 217                         // bad luck we have to resize our buffer
 218                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 219                         System.arraycopy(fCurrentEntity.ch, offset,
 220                                          tmp, 0, length);
 221                         fCurrentEntity.ch = tmp;
 222                     }
 223                     else {
 224                         System.arraycopy(fCurrentEntity.ch, offset,
 225                                          fCurrentEntity.ch, 0, length);
 226                     }
 227                     offset = 0;
 228                     if (load(length, false, false)) {
 229                         break;
 230                     }
 231                 }
 232             }
 233             else {
 234                 break;
 235             }
 236         }
 237         while (true);
 238 
 239         int length = fCurrentEntity.position - offset;
 240         fCurrentEntity.columnNumber += length;
 241 
 242         // return nmtoken
 243         String symbol = null;
 244         if (length > 0) {
 245             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 246         }
 247         return symbol;
 248 
 249     } // scanNmtoken():String
 250 
 251     /**
 252      * Returns a string matching the Name production appearing immediately
 253      * on the input as a symbol, or null if no Name string is present.
 254      * <p>
 255      * <strong>Note:</strong> The Name characters are consumed.
 256      * <p>
 257      * <strong>Note:</strong> The string returned must be a symbol. The
 258      * SymbolTable can be used for this purpose.
 259      *
 260      * @param nt The type of the name (element or attribute)
 261      *
 262      * @throws IOException  Thrown if i/o error occurs.
 263      * @throws EOFException Thrown on end of file.
 264      *
 265      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 266      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11Name
 267      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11NameStart
 268      */
 269     protected String scanName(NameType nt) throws IOException {
 270         // load more characters, if needed
 271         if (fCurrentEntity.position == fCurrentEntity.count) {
 272             load(0, true, true);
 273         }
 274 
 275         // scan name
 276         int offset = fCurrentEntity.position;
 277         char ch = fCurrentEntity.ch[offset];
 278 
 279         if (XML11Char.isXML11NameStart(ch)) {
 280             if (++fCurrentEntity.position == fCurrentEntity.count) {
 281                 invokeListeners(1);
 282                 fCurrentEntity.ch[0] = ch;
 283                 offset = 0;
 284                 if (load(1, false, false)) {
 285                     fCurrentEntity.columnNumber++;
 286                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 287                     return symbol;
 288                 }
 289             }
 290         }
 291         else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 292             if (++fCurrentEntity.position == fCurrentEntity.count) {
 293                 invokeListeners(1);
 294                 fCurrentEntity.ch[0] = ch;
 295                 offset = 0;
 296                 if (load(1, false, false)) {
 297                     --fCurrentEntity.position;
 298                     --fCurrentEntity.startPosition;
 299                     return null;
 300                 }
 301             }
 302             char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 303             if ( !XMLChar.isLowSurrogate(ch2) ||
 304                  !XML11Char.isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
 305                 --fCurrentEntity.position;
 306                 return null;
 307             }
 308             if (++fCurrentEntity.position == fCurrentEntity.count) {
 309                 invokeListeners(2);
 310                 fCurrentEntity.ch[0] = ch;
 311                 fCurrentEntity.ch[1] = ch2;
 312                 offset = 0;
 313                 if (load(2, false, false)) {
 314                     fCurrentEntity.columnNumber += 2;
 315                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
 316                     return symbol;
 317                 }
 318             }
 319         }
 320         else {
 321             return null;
 322         }
 323 
 324         int length = 0;
 325         do {
 326             ch = fCurrentEntity.ch[fCurrentEntity.position];
 327             if (XML11Char.isXML11Name(ch)) {
 328                 if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) {
 329                     offset = 0;
 330                     if (load(length, false, false)) {
 331                         break;
 332                     }
 333                 }
 334             }
 335             else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 336                 if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) {
 337                     offset = 0;
 338                     if (load(length, false, false)) {
 339                         --fCurrentEntity.position;
 340                         --fCurrentEntity.startPosition;
 341                         break;
 342                     }
 343                 }
 344                 char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 345                 if ( !XMLChar.isLowSurrogate(ch2) ||
 346                      !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
 347                     --fCurrentEntity.position;
 348                     break;
 349                 }
 350                 if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) {
 351                     offset = 0;
 352                     if (load(length, false, false)) {
 353                         break;
 354                     }
 355                 }
 356             }
 357             else {
 358                 break;
 359             }
 360         }
 361         while (true);
 362 
 363         length = fCurrentEntity.position - offset;
 364         fCurrentEntity.columnNumber += length;
 365 
 366         // return name
 367         String symbol = null;
 368         if (length > 0) {
 369             checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
 370             checkEntityLimit(nt, fCurrentEntity, offset, length);
 371             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 372         }
 373         return symbol;
 374 
 375     } // scanName():String
 376 
 377     /**
 378      * Returns a string matching the NCName production appearing immediately
 379      * on the input as a symbol, or null if no NCName string is present.
 380      * <p>
 381      * <strong>Note:</strong> The NCName characters are consumed.
 382      * <p>
 383      * <strong>Note:</strong> The string returned must be a symbol. The
 384      * SymbolTable can be used for this purpose.
 385      *
 386      * @throws IOException  Thrown if i/o error occurs.
 387      * @throws EOFException Thrown on end of file.
 388      *
 389      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 390      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11NCName
 391      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11NCNameStart
 392      */
 393     protected String scanNCName() throws IOException {
 394 
 395         // load more characters, if needed
 396         if (fCurrentEntity.position == fCurrentEntity.count) {
 397             load(0, true, true);
 398         }
 399 
 400         // scan name
 401         int offset = fCurrentEntity.position;
 402         char ch = fCurrentEntity.ch[offset];
 403 
 404         if (XML11Char.isXML11NCNameStart(ch)) {
 405             if (++fCurrentEntity.position == fCurrentEntity.count) {
 406                 invokeListeners(1);
 407                 fCurrentEntity.ch[0] = ch;
 408                 offset = 0;
 409                 if (load(1, false, false)) {
 410                     fCurrentEntity.columnNumber++;
 411                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 412                     return symbol;
 413                 }
 414             }
 415         }
 416         else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 417             if (++fCurrentEntity.position == fCurrentEntity.count) {
 418                 invokeListeners(1);
 419                 fCurrentEntity.ch[0] = ch;
 420                 offset = 0;
 421                 if (load(1, false, false)) {
 422                     --fCurrentEntity.position;
 423                     --fCurrentEntity.startPosition;
 424                     return null;
 425                 }
 426             }
 427             char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 428             if ( !XMLChar.isLowSurrogate(ch2) ||
 429                  !XML11Char.isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
 430                 --fCurrentEntity.position;
 431                 return null;
 432             }
 433             if (++fCurrentEntity.position == fCurrentEntity.count) {
 434                 invokeListeners(2);
 435                 fCurrentEntity.ch[0] = ch;
 436                 fCurrentEntity.ch[1] = ch2;
 437                 offset = 0;
 438                 if (load(2, false, false)) {
 439                     fCurrentEntity.columnNumber += 2;
 440                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
 441                     return symbol;
 442                 }
 443             }
 444         }
 445         else {
 446             return null;
 447         }
 448 
 449         do {
 450             ch = fCurrentEntity.ch[fCurrentEntity.position];
 451             if (XML11Char.isXML11NCName(ch)) {
 452                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 453                     int length = fCurrentEntity.position - offset;
 454                     invokeListeners(length);
 455                     if (length == fCurrentEntity.ch.length) {
 456                         // bad luck we have to resize our buffer
 457                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 458                         System.arraycopy(fCurrentEntity.ch, offset,
 459                                          tmp, 0, length);
 460                         fCurrentEntity.ch = tmp;
 461                     }
 462                     else {
 463                         System.arraycopy(fCurrentEntity.ch, offset,
 464                                          fCurrentEntity.ch, 0, length);
 465                     }
 466                     offset = 0;
 467                     if (load(length, false, false)) {
 468                         break;
 469                     }
 470                 }
 471             }
 472             else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 473                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 474                     int length = fCurrentEntity.position - offset;
 475                     invokeListeners(length);
 476                     if (length == fCurrentEntity.ch.length) {
 477                         // bad luck we have to resize our buffer
 478                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 479                         System.arraycopy(fCurrentEntity.ch, offset,
 480                                          tmp, 0, length);
 481                         fCurrentEntity.ch = tmp;
 482                     }
 483                     else {
 484                         System.arraycopy(fCurrentEntity.ch, offset,
 485                                          fCurrentEntity.ch, 0, length);
 486                     }
 487                     offset = 0;
 488                     if (load(length, false, false)) {
 489                         --fCurrentEntity.startPosition;
 490                         --fCurrentEntity.position;
 491                         break;
 492                     }
 493                 }
 494                 char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 495                 if ( !XMLChar.isLowSurrogate(ch2) ||
 496                      !XML11Char.isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
 497                     --fCurrentEntity.position;
 498                     break;
 499                 }
 500                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 501                     int length = fCurrentEntity.position - offset;
 502                     invokeListeners(length);
 503                     if (length == fCurrentEntity.ch.length) {
 504                         // bad luck we have to resize our buffer
 505                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 506                         System.arraycopy(fCurrentEntity.ch, offset,
 507                                          tmp, 0, length);
 508                         fCurrentEntity.ch = tmp;
 509                     }
 510                     else {
 511                         System.arraycopy(fCurrentEntity.ch, offset,
 512                                          fCurrentEntity.ch, 0, length);
 513                     }
 514                     offset = 0;
 515                     if (load(length, false, false)) {
 516                         break;
 517                     }
 518                 }
 519             }
 520             else {
 521                 break;
 522             }
 523         }
 524         while (true);
 525 
 526         int length = fCurrentEntity.position - offset;
 527         fCurrentEntity.columnNumber += length;
 528 
 529         // return name
 530         String symbol = null;
 531         if (length > 0) {
 532             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 533         }
 534         return symbol;
 535 
 536     } // scanNCName():String
 537 
 538     /**
 539      * Scans a qualified name from the input, setting the fields of the
 540      * QName structure appropriately.
 541      * <p>
 542      * <strong>Note:</strong> The qualified name characters are consumed.
 543      * <p>
 544      * <strong>Note:</strong> The strings used to set the values of the
 545      * QName structure must be symbols. The SymbolTable can be used for
 546      * this purpose.
 547      *
 548      * @param qname The qualified name structure to fill.
 549      * @param nt The type of the name (element or attribute)
 550      *
 551      * @return Returns true if a qualified name appeared immediately on
 552      *         the input and was scanned, false otherwise.
 553      *
 554      * @throws IOException  Thrown if i/o error occurs.
 555      * @throws EOFException Thrown on end of file.
 556      *
 557      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 558      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11Name
 559      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11NameStart
 560      */
 561     protected boolean scanQName(QName qname, XMLScanner.NameType nt) throws IOException {
 562 
 563         // load more characters, if needed
 564         if (fCurrentEntity.position == fCurrentEntity.count) {
 565             load(0, true, true);
 566         }
 567 
 568         // scan qualified name
 569         int offset = fCurrentEntity.position;
 570         char ch = fCurrentEntity.ch[offset];
 571 
 572         if (XML11Char.isXML11NCNameStart(ch)) {
 573             if (++fCurrentEntity.position == fCurrentEntity.count) {
 574                 invokeListeners(1);
 575                 fCurrentEntity.ch[0] = ch;
 576                 offset = 0;
 577                 if (load(1, false, false)) {
 578                     fCurrentEntity.columnNumber++;
 579                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 580                     qname.setValues(null, name, name, null);
 581                     checkEntityLimit(nt, fCurrentEntity, 0, 1);
 582                     return true;
 583                 }
 584             }
 585         }
 586         else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 587             if (++fCurrentEntity.position == fCurrentEntity.count) {
 588                 invokeListeners(1);
 589                 fCurrentEntity.ch[0] = ch;
 590                 offset = 0;
 591                 if (load(1, false, false)) {
 592                     --fCurrentEntity.startPosition;
 593                     --fCurrentEntity.position;
 594                     return false;
 595                 }
 596             }
 597             char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 598             if ( !XMLChar.isLowSurrogate(ch2) ||
 599                  !XML11Char.isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
 600                 --fCurrentEntity.position;
 601                 return false;
 602             }
 603             if (++fCurrentEntity.position == fCurrentEntity.count) {
 604                 invokeListeners(2);
 605                 fCurrentEntity.ch[0] = ch;
 606                 fCurrentEntity.ch[1] = ch2;
 607                 offset = 0;
 608                 if (load(2, false, false)) {
 609                     fCurrentEntity.columnNumber += 2;
 610                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
 611                     qname.setValues(null, name, name, null);
 612                     checkEntityLimit(nt, fCurrentEntity, 0, 2);
 613                     return true;
 614                 }
 615             }
 616         }
 617         else {
 618             return false;
 619         }
 620 
 621         int index = -1;
 622         int length = 0;
 623         boolean sawIncompleteSurrogatePair = false;
 624         do {
 625             ch = fCurrentEntity.ch[fCurrentEntity.position];
 626             if (XML11Char.isXML11Name(ch)) {
 627                 if (ch == ':') {
 628                     if (index != -1) {
 629                         break;
 630                     }
 631                     index = fCurrentEntity.position;
 632                     //check prefix before further read
 633                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset);
 634                 }
 635                 if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) {
 636                     if (index != -1) {
 637                         index = index - offset;
 638                     }
 639                     offset = 0;
 640                     if (load(length, false, false)) {
 641                         break;
 642                     }
 643                 }
 644             }
 645             else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 646                 if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) {
 647                     if (index != -1) {
 648                         index = index - offset;
 649                     }
 650                     offset = 0;
 651                     if (load(length, false, false)) {
 652                         sawIncompleteSurrogatePair = true;
 653                         --fCurrentEntity.startPosition;
 654                         --fCurrentEntity.position;
 655                         break;
 656                     }
 657                 }
 658                 char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 659                 if ( !XMLChar.isLowSurrogate(ch2) ||
 660                      !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
 661                     sawIncompleteSurrogatePair = true;
 662                     --fCurrentEntity.position;
 663                     break;
 664                 }
 665                 if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) {
 666                     if (index != -1) {
 667                         index = index - offset;
 668                     }
 669                     offset = 0;
 670                     if (load(length, false, false)) {
 671                         break;
 672                     }
 673                 }
 674             }
 675             else {
 676                 break;
 677             }
 678         }
 679         while (true);
 680 
 681         length = fCurrentEntity.position - offset;
 682         fCurrentEntity.columnNumber += length;
 683 
 684         if (length > 0) {
 685             String prefix = null;
 686             String localpart = null;
 687             String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
 688                                                     offset, length);
 689             if (index != -1) {
 690                 int prefixLength = index - offset;
 691                 //check the result: prefix
 692                 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength);
 693                 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
 694                                                     offset, prefixLength);
 695                 int len = length - prefixLength - 1;
 696                 int startLocal = index +1;
 697                 if (!XML11Char.isXML11NCNameStart(fCurrentEntity.ch[startLocal]) &&
 698                     (!XML11Char.isXML11NameHighSurrogate(fCurrentEntity.ch[startLocal]) ||
 699                     sawIncompleteSurrogatePair)){
 700                     fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 701                                                "IllegalQName",
 702                                                new Object[]{rawname},
 703                                                XMLErrorReporter.SEVERITY_FATAL_ERROR);
 704                 }
 705                 //check the result: localpart
 706                 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len);
 707                 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
 708                                                    index + 1, len);
 709 
 710             }
 711             else {
 712                 localpart = rawname;
 713                 //check the result: localpart
 714                 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
 715             }
 716             qname.setValues(prefix, localpart, rawname, null);
 717             checkEntityLimit(nt, fCurrentEntity, offset, length);
 718             return true;
 719         }
 720         return false;
 721 
 722     } // scanQName(QName):boolean
 723 
 724     /**
 725      * Scans a range of parsed character data, setting the fields of the
 726      * XMLString structure, appropriately.
 727      * <p>
 728      * <strong>Note:</strong> The characters are consumed.
 729      * <p>
 730      * <strong>Note:</strong> This method does not guarantee to return
 731      * the longest run of parsed character data. This method may return
 732      * before markup due to reaching the end of the input buffer or any
 733      * other reason.
 734      * <p>
 735      * <strong>Note:</strong> The fields contained in the XMLString
 736      * structure are not guaranteed to remain valid upon subsequent calls
 737      * to the entity scanner. Therefore, the caller is responsible for
 738      * immediately using the returned character data or making a copy of
 739      * the character data.
 740      *
 741      * @param content The content structure to fill.
 742      *
 743      * @return Returns the next character on the input, if known. This
 744      *         value may be -1 but this does <em>note</em> designate
 745      *         end of file.
 746      *
 747      * @throws IOException  Thrown if i/o error occurs.
 748      * @throws EOFException Thrown on end of file.
 749      */
 750     protected int scanContent(XMLString content) throws IOException {
 751 
 752         // load more characters, if needed
 753         if (fCurrentEntity.position == fCurrentEntity.count) {
 754             load(0, true, true);
 755         }
 756         else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 757             invokeListeners(1);
 758             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 759             load(1, false, false);
 760             fCurrentEntity.position = 0;
 761             fCurrentEntity.startPosition = 0;
 762         }
 763 
 764         // normalize newlines
 765         int offset = fCurrentEntity.position;
 766         int c = fCurrentEntity.ch[offset];
 767         int newlines = 0;
 768         boolean counted = false;
 769         boolean external = fCurrentEntity.isExternal();
 770         if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
 771             do {
 772                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 773                 if ((c == '\r' ) && external) {
 774                     newlines++;
 775                     fCurrentEntity.lineNumber++;
 776                     fCurrentEntity.columnNumber = 1;
 777                     if (fCurrentEntity.position == fCurrentEntity.count) {
 778                         checkEntityLimit(null, fCurrentEntity, offset, newlines);
 779                         offset = 0;
 780                         fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 781                         fCurrentEntity.position = newlines;
 782                         fCurrentEntity.startPosition = newlines;
 783                         if (load(newlines, false, true)) {
 784                             counted = true;
 785                             break;
 786                         }
 787                     }
 788                     int cc = fCurrentEntity.ch[fCurrentEntity.position];
 789                     if (cc == '\n' || cc == 0x85) {
 790                         fCurrentEntity.position++;
 791                         offset++;
 792                     }
 793                     /*** NEWLINE NORMALIZATION ***/
 794                     else {
 795                         newlines++;
 796                     }
 797                 }
 798                 else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
 799                     newlines++;
 800                     fCurrentEntity.lineNumber++;
 801                     fCurrentEntity.columnNumber = 1;
 802                     if (fCurrentEntity.position == fCurrentEntity.count) {
 803                         checkEntityLimit(null, fCurrentEntity, offset, newlines);
 804                         offset = 0;
 805                         fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 806                         fCurrentEntity.position = newlines;
 807                         fCurrentEntity.startPosition = newlines;
 808                         if (load(newlines, false, true)) {
 809                             counted = true;
 810                             break;
 811                         }
 812                     }
 813                 }
 814                 else {
 815                     fCurrentEntity.position--;
 816                     break;
 817                 }
 818             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 819             for (int i = offset; i < fCurrentEntity.position; i++) {
 820                 fCurrentEntity.ch[i] = '\n';
 821             }
 822             int length = fCurrentEntity.position - offset;
 823             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 824                 checkEntityLimit(null, fCurrentEntity, offset, length);
 825                 content.setValues(fCurrentEntity.ch, offset, length);
 826                 return -1;
 827             }
 828         }
 829 
 830         // inner loop, scanning for content
 831         if (external) {
 832             while (fCurrentEntity.position < fCurrentEntity.count) {
 833                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 834                 if (!XML11Char.isXML11Content(c) || c == 0x85 || c == 0x2028) {
 835                     fCurrentEntity.position--;
 836                     break;
 837                 }
 838             }
 839         }
 840         else {
 841             while (fCurrentEntity.position < fCurrentEntity.count) {
 842                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 843                 // In internal entities control characters are allowed to appear unescaped.
 844                 if (!XML11Char.isXML11InternalEntityContent(c)) {
 845                     fCurrentEntity.position--;
 846                     break;
 847                 }
 848             }
 849         }
 850         int length = fCurrentEntity.position - offset;
 851         fCurrentEntity.columnNumber += length - newlines;
 852         if (!counted) {
 853             checkEntityLimit(null, fCurrentEntity, offset, length);
 854         }
 855         content.setValues(fCurrentEntity.ch, offset, length);
 856 
 857         // return next character
 858         if (fCurrentEntity.position != fCurrentEntity.count) {
 859             c = fCurrentEntity.ch[fCurrentEntity.position];
 860             // REVISIT: Does this need to be updated to fix the
 861             //          #x0D ^#x0A newline normalization problem? -Ac
 862             if ((c == '\r' || c == 0x85 || c == 0x2028) && external) {
 863                 c = '\n';
 864             }
 865         }
 866         else {
 867             c = -1;
 868         }
 869         return c;
 870 
 871     } // scanContent(XMLString):int
 872 
 873     /**
 874      * Scans a range of attribute value data, setting the fields of the
 875      * XMLString structure, appropriately.
 876      * <p>
 877      * <strong>Note:</strong> The characters are consumed.
 878      * <p>
 879      * <strong>Note:</strong> This method does not guarantee to return
 880      * the longest run of attribute value data. This method may return
 881      * before the quote character due to reaching the end of the input
 882      * buffer or any other reason.
 883      * <p>
 884      * <strong>Note:</strong> The fields contained in the XMLString
 885      * structure are not guaranteed to remain valid upon subsequent calls
 886      * to the entity scanner. Therefore, the caller is responsible for
 887      * immediately using the returned character data or making a copy of
 888      * the character data.
 889      *
 890      * @param quote   The quote character that signifies the end of the
 891      *                attribute value data.
 892      * @param content The content structure to fill.
 893      * @param isNSURI a flag indicating whether the content is a Namespace URI
 894      *
 895      * @return Returns the next character on the input, if known. This
 896      *         value may be -1 but this does <em>note</em> designate
 897      *         end of file.
 898      *
 899      * @throws IOException  Thrown if i/o error occurs.
 900      * @throws EOFException Thrown on end of file.
 901      */
 902     protected int scanLiteral(int quote, XMLString content, boolean isNSURI)
 903         throws IOException {
 904         // load more characters, if needed
 905         if (fCurrentEntity.position == fCurrentEntity.count) {
 906             load(0, true, true);
 907         }
 908         else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 909             invokeListeners(1);
 910             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 911             load(1, false, false);
 912             fCurrentEntity.startPosition = 0;
 913             fCurrentEntity.position = 0;
 914         }
 915 
 916         // normalize newlines
 917         int offset = fCurrentEntity.position;
 918         int c = fCurrentEntity.ch[offset];
 919         int newlines = 0;
 920         boolean external = fCurrentEntity.isExternal();
 921         if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
 922             do {
 923                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 924                 if ((c == '\r' ) && external) {
 925                     newlines++;
 926                     fCurrentEntity.lineNumber++;
 927                     fCurrentEntity.columnNumber = 1;
 928                     if (fCurrentEntity.position == fCurrentEntity.count) {
 929                         offset = 0;
 930                         fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 931                         fCurrentEntity.position = newlines;
 932                         fCurrentEntity.startPosition = newlines;
 933                         if (load(newlines, false, true)) {
 934                             break;
 935                         }
 936                     }
 937                     int cc = fCurrentEntity.ch[fCurrentEntity.position];
 938                     if (cc == '\n' || cc == 0x85) {
 939                         fCurrentEntity.position++;
 940                         offset++;
 941                     }
 942                     /*** NEWLINE NORMALIZATION ***/
 943                     else {
 944                         newlines++;
 945                     }
 946                 }
 947                 else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
 948                     newlines++;
 949                     fCurrentEntity.lineNumber++;
 950                     fCurrentEntity.columnNumber = 1;
 951                     if (fCurrentEntity.position == fCurrentEntity.count) {
 952                         offset = 0;
 953                         fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 954                         fCurrentEntity.position = newlines;
 955                         fCurrentEntity.startPosition = newlines;
 956                         if (load(newlines, false, true)) {
 957                             break;
 958                         }
 959                     }
 960                 }
 961                 else {
 962                     fCurrentEntity.position--;
 963                     break;
 964                 }
 965             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 966             for (int i = offset; i < fCurrentEntity.position; i++) {
 967                 fCurrentEntity.ch[i] = '\n';
 968             }
 969             int length = fCurrentEntity.position - offset;
 970             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 971                 content.setValues(fCurrentEntity.ch, offset, length);
 972                 return -1;
 973             }
 974         }
 975 
 976         // scan literal value
 977         if (external) {
 978             while (fCurrentEntity.position < fCurrentEntity.count) {
 979                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 980                 if (c == quote || c == '%' || !XML11Char.isXML11Content(c)
 981                     || c == 0x85 || c == 0x2028) {
 982                     fCurrentEntity.position--;
 983                     break;
 984                 }
 985             }
 986         }
 987         else {
 988             while (fCurrentEntity.position < fCurrentEntity.count) {
 989                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 990                 // In internal entities control characters are allowed to appear unescaped.
 991                 if ((c == quote && !fCurrentEntity.literal)
 992                     || c == '%' || !XML11Char.isXML11InternalEntityContent(c)) {
 993                     fCurrentEntity.position--;
 994                     break;
 995                 }
 996             }
 997         }
 998         int length = fCurrentEntity.position - offset;
 999         fCurrentEntity.columnNumber += length - newlines;
1000 
1001         checkEntityLimit(null, fCurrentEntity, offset, length);
1002         if (isNSURI) {
1003             checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
1004         }
1005         content.setValues(fCurrentEntity.ch, offset, length);
1006 
1007         // return next character
1008         if (fCurrentEntity.position != fCurrentEntity.count) {
1009             c = fCurrentEntity.ch[fCurrentEntity.position];
1010             // NOTE: We don't want to accidentally signal the
1011             //       end of the literal if we're expanding an
1012             //       entity appearing in the literal. -Ac
1013             if (c == quote && fCurrentEntity.literal) {
1014                 c = -1;
1015             }
1016         }
1017         else {
1018             c = -1;
1019         }
1020         return c;
1021 
1022     } // scanLiteral(int,XMLString):int
1023 
1024     /**
1025      * Scans a range of character data up to the specicied delimiter,
1026      * setting the fields of the XMLString structure, appropriately.
1027      * <p>
1028      * <strong>Note:</strong> The characters are consumed.
1029      * <p>
1030      * <strong>Note:</strong> This assumes that the internal buffer is
1031      * at least the same size, or bigger, than the length of the delimiter
1032      * and that the delimiter contains at least one character.
1033      * <p>
1034      * <strong>Note:</strong> This method does not guarantee to return
1035      * the longest run of character data. This method may return before
1036      * the delimiter due to reaching the end of the input buffer or any
1037      * other reason.
1038      * <p>
1039      * <strong>Note:</strong> The fields contained in the XMLString
1040      * structure are not guaranteed to remain valid upon subsequent calls
1041      * to the entity scanner. Therefore, the caller is responsible for
1042      * immediately using the returned character data or making a copy of
1043      * the character data.
1044      *
1045      * @param delimiter The string that signifies the end of the character
1046      *                  data to be scanned.
1047      * @param data      The data structure to fill.
1048      *
1049      * @return Returns true if there is more data to scan, false otherwise.
1050      *
1051      * @throws IOException  Thrown if i/o error occurs.
1052      * @throws EOFException Thrown on end of file.
1053      */
1054     protected boolean scanData(String delimiter, XMLStringBuffer buffer)
1055         throws IOException {
1056 
1057         boolean done = false;
1058         int delimLen = delimiter.length();
1059         char charAt0 = delimiter.charAt(0);
1060         boolean external = fCurrentEntity.isExternal();
1061         do {
1062             // load more characters, if needed
1063             if (fCurrentEntity.position == fCurrentEntity.count) {
1064                 load(0, true, false);
1065             }
1066 
1067             boolean bNextEntity = false;
1068 
1069             while ((fCurrentEntity.position >= fCurrentEntity.count - delimLen)
1070                 && (!bNextEntity))
1071             {
1072               System.arraycopy(fCurrentEntity.ch,
1073                                fCurrentEntity.position,
1074                                fCurrentEntity.ch,
1075                                0,
1076                                fCurrentEntity.count - fCurrentEntity.position);
1077 
1078               bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false);
1079               fCurrentEntity.position = 0;
1080               fCurrentEntity.startPosition = 0;
1081             }
1082 
1083             if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
1084                 // something must be wrong with the input:  e.g., file ends  an unterminated comment
1085                 int length = fCurrentEntity.count - fCurrentEntity.position;
1086                 checkEntityLimit(NameType.COMMENT, fCurrentEntity, fCurrentEntity.position, length);
1087                 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1088                 fCurrentEntity.columnNumber += fCurrentEntity.count;
1089                 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1090                 fCurrentEntity.position = fCurrentEntity.count;
1091                 fCurrentEntity.startPosition = fCurrentEntity.count;
1092                 load(0,true, false);
1093                 return false;
1094             }
1095 
1096             // normalize newlines
1097             int offset = fCurrentEntity.position;
1098             int c = fCurrentEntity.ch[offset];
1099             int newlines = 0;
1100             if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
1101                 do {
1102                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1103                     if ((c == '\r' ) && external) {
1104                         newlines++;
1105                         fCurrentEntity.lineNumber++;
1106                         fCurrentEntity.columnNumber = 1;
1107                         if (fCurrentEntity.position == fCurrentEntity.count) {
1108                             offset = 0;
1109                             fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1110                             fCurrentEntity.position = newlines;
1111                             fCurrentEntity.startPosition = newlines;
1112                             if (load(newlines, false, true)) {
1113                                 break;
1114                             }
1115                         }
1116                         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1117                         if (cc == '\n' || cc == 0x85) {
1118                             fCurrentEntity.position++;
1119                             offset++;
1120                         }
1121                         /*** NEWLINE NORMALIZATION ***/
1122                         else {
1123                             newlines++;
1124                         }
1125                     }
1126                     else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
1127                         newlines++;
1128                         fCurrentEntity.lineNumber++;
1129                         fCurrentEntity.columnNumber = 1;
1130                         if (fCurrentEntity.position == fCurrentEntity.count) {
1131                             offset = 0;
1132                             fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1133                             fCurrentEntity.position = newlines;
1134                             fCurrentEntity.startPosition = newlines;
1135                             fCurrentEntity.count = newlines;
1136                             if (load(newlines, false, true)) {
1137                                 break;
1138                             }
1139                         }
1140                     }
1141                     else {
1142                         fCurrentEntity.position--;
1143                         break;
1144                     }
1145                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1146                 for (int i = offset; i < fCurrentEntity.position; i++) {
1147                     fCurrentEntity.ch[i] = '\n';
1148                 }
1149                 int length = fCurrentEntity.position - offset;
1150                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1151                     checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1152                     buffer.append(fCurrentEntity.ch, offset, length);
1153                     return true;
1154                 }
1155             }
1156 
1157             // iterate over buffer looking for delimiter
1158             if (external) {
1159                 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1160                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1161                     if (c == charAt0) {
1162                         // looks like we just hit the delimiter
1163                         int delimOffset = fCurrentEntity.position - 1;
1164                         for (int i = 1; i < delimLen; i++) {
1165                             if (fCurrentEntity.position == fCurrentEntity.count) {
1166                                 fCurrentEntity.position -= i;
1167                                 break OUTER;
1168                             }
1169                             c = fCurrentEntity.ch[fCurrentEntity.position++];
1170                             if (delimiter.charAt(i) != c) {
1171                                 fCurrentEntity.position--;
1172                                 break;
1173                             }
1174                          }
1175                          if (fCurrentEntity.position == delimOffset + delimLen) {
1176                             done = true;
1177                             break;
1178                          }
1179                     }
1180                     else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
1181                         fCurrentEntity.position--;
1182                         break;
1183                     }
1184                     // In external entities control characters cannot appear
1185                     // as literals so do not skip over them.
1186                     else if (!XML11Char.isXML11ValidLiteral(c)) {
1187                         fCurrentEntity.position--;
1188                         int length = fCurrentEntity.position - offset;
1189                         fCurrentEntity.columnNumber += length - newlines;
1190                         checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1191                         buffer.append(fCurrentEntity.ch, offset, length);
1192                         return true;
1193                     }
1194                 }
1195             }
1196             else {
1197                 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1198                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1199                     if (c == charAt0) {
1200                         // looks like we just hit the delimiter
1201                         int delimOffset = fCurrentEntity.position - 1;
1202                         for (int i = 1; i < delimLen; i++) {
1203                             if (fCurrentEntity.position == fCurrentEntity.count) {
1204                                 fCurrentEntity.position -= i;
1205                                 break OUTER;
1206                             }
1207                             c = fCurrentEntity.ch[fCurrentEntity.position++];
1208                             if (delimiter.charAt(i) != c) {
1209                                 fCurrentEntity.position--;
1210                                 break;
1211                             }
1212                         }
1213                         if (fCurrentEntity.position == delimOffset + delimLen) {
1214                             done = true;
1215                             break;
1216                         }
1217                     }
1218                     else if (c == '\n') {
1219                         fCurrentEntity.position--;
1220                         break;
1221                     }
1222                     // Control characters are allowed to appear as literals
1223                     // in internal entities.
1224                     else if (!XML11Char.isXML11Valid(c)) {
1225                         fCurrentEntity.position--;
1226                         int length = fCurrentEntity.position - offset;
1227                         fCurrentEntity.columnNumber += length - newlines;
1228                         checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1229                         buffer.append(fCurrentEntity.ch, offset, length);
1230                         return true;
1231                     }
1232                 }
1233             }
1234             int length = fCurrentEntity.position - offset;
1235             fCurrentEntity.columnNumber += length - newlines;
1236             checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1237             if (done) {
1238                 length -= delimLen;
1239             }
1240             buffer.append(fCurrentEntity.ch, offset, length);
1241 
1242             // return true if string was skipped
1243         } while (!done);
1244         return !done;
1245 
1246     } // scanData(String,XMLString)
1247 
1248     /**
1249      * Skips a character appearing immediately on the input.
1250      * <p>
1251      * <strong>Note:</strong> The character is consumed only if it matches
1252      * the specified character.
1253      *
1254      * @param c The character to skip.
1255      *
1256      * @return Returns true if the character was skipped.
1257      *
1258      * @throws IOException  Thrown if i/o error occurs.
1259      * @throws EOFException Thrown on end of file.
1260      */
1261     protected boolean skipChar(int c, NameType nt) throws IOException {
1262 
1263         // load more characters, if needed
1264         if (fCurrentEntity.position == fCurrentEntity.count) {
1265             load(0, true, true);
1266         }
1267 
1268         // skip character
1269         int offset = fCurrentEntity.position;
1270         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1271         if (cc == c) {
1272             fCurrentEntity.position++;
1273             if (c == '\n') {
1274                 fCurrentEntity.lineNumber++;
1275                 fCurrentEntity.columnNumber = 1;
1276             }
1277             else {
1278                 fCurrentEntity.columnNumber++;
1279             }
1280             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
1281             return true;
1282         }
1283         else if (c == '\n' && ((cc == 0x2028 || cc == 0x85) && fCurrentEntity.isExternal())) {
1284             fCurrentEntity.position++;
1285             fCurrentEntity.lineNumber++;
1286             fCurrentEntity.columnNumber = 1;
1287             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
1288             return true;
1289         }
1290         else if (c == '\n' && (cc == '\r' ) && fCurrentEntity.isExternal()) {
1291             // handle newlines
1292             if (fCurrentEntity.position == fCurrentEntity.count) {
1293                 invokeListeners(1);
1294                 fCurrentEntity.ch[0] = (char)cc;
1295                 load(1, false, false);
1296             }
1297             int ccc = fCurrentEntity.ch[++fCurrentEntity.position];
1298             if (ccc == '\n' || ccc == 0x85) {
1299                 fCurrentEntity.position++;
1300             }
1301             fCurrentEntity.lineNumber++;
1302             fCurrentEntity.columnNumber = 1;
1303             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
1304             return true;
1305         }
1306 
1307         // character was not skipped
1308         return false;
1309 
1310     } // skipChar(int):boolean
1311 
1312     /**
1313      * Skips space characters appearing immediately on the input.
1314      * <p>
1315      * <strong>Note:</strong> The characters are consumed only if they are
1316      * space characters.
1317      *
1318      * @return Returns true if at least one space character was skipped.
1319      *
1320      * @throws IOException  Thrown if i/o error occurs.
1321      * @throws EOFException Thrown on end of file.
1322      *
1323      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1324      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11Space
1325      */
1326     protected boolean skipSpaces() throws IOException {
1327 
1328         // load more characters, if needed
1329         if (fCurrentEntity.position == fCurrentEntity.count) {
1330             load(0, true, true);
1331         }
1332 
1333 
1334         //we are doing this check only in skipSpace() because it is called by
1335         //fMiscDispatcher and we want the parser to exit gracefully when document
1336         //is well-formed.
1337         //it is possible that end of document is reached and
1338         //fCurrentEntity becomes null
1339         //nothing was read so entity changed  'false' should be returned.
1340         if(fCurrentEntity == null){
1341             return false ;
1342         }
1343 
1344         // skip spaces
1345         int c = fCurrentEntity.ch[fCurrentEntity.position];
1346         int offset = fCurrentEntity.position - 1;
1347         // External --  Match: S + 0x85 + 0x2028, and perform end of line normalization
1348         if (fCurrentEntity.isExternal()) {
1349             if (XML11Char.isXML11Space(c)) {
1350                 do {
1351                     boolean entityChanged = false;
1352                     // handle newlines
1353                     if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
1354                         fCurrentEntity.lineNumber++;
1355                         fCurrentEntity.columnNumber = 1;
1356                         if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1357                             invokeListeners(1);
1358                             fCurrentEntity.ch[0] = (char)c;
1359                             entityChanged = load(1, true, false);
1360                             if (!entityChanged) {
1361                                 // the load change the position to be 1,
1362                                 // need to restore it when entity not changed
1363                                 fCurrentEntity.startPosition = 0;
1364                                 fCurrentEntity.position = 0;
1365                             } else if(fCurrentEntity == null){
1366                                 return true ;
1367                             }
1368 
1369                         }
1370                         if (c == '\r') {
1371                             // REVISIT: Does this need to be updated to fix the
1372                             //          #x0D ^#x0A newline normalization problem? -Ac
1373                             int cc = fCurrentEntity.ch[++fCurrentEntity.position];
1374                             if (cc != '\n' && cc != 0x85 ) {
1375                                 fCurrentEntity.position--;
1376                             }
1377                         }
1378                     }
1379                     else {
1380                         fCurrentEntity.columnNumber++;
1381                     }
1382 
1383                     //If this is a general entity, spaces within a start element should be counted
1384                     checkEntityLimit(null, fCurrentEntity, offset, fCurrentEntity.position - offset);
1385                     offset = fCurrentEntity.position;
1386 
1387                     // load more characters, if needed
1388                     if (!entityChanged)
1389                         fCurrentEntity.position++;
1390                     if (fCurrentEntity.position == fCurrentEntity.count) {
1391                         load(0, true, true);
1392 
1393                         if(fCurrentEntity == null){
1394                         return true ;
1395                         }
1396 
1397                     }
1398                 } while (XML11Char.isXML11Space(c = fCurrentEntity.ch[fCurrentEntity.position]));
1399                 return true;
1400             }
1401         }
1402         // Internal -- Match: S (only)
1403         else if (XMLChar.isSpace(c)) {
1404             do {
1405                 boolean entityChanged = false;
1406                 // handle newlines
1407                 if (c == '\n') {
1408                     fCurrentEntity.lineNumber++;
1409                     fCurrentEntity.columnNumber = 1;
1410                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1411                         invokeListeners(1);
1412                         fCurrentEntity.ch[0] = (char)c;
1413                         entityChanged = load(1, true, false);
1414                         if (!entityChanged) {
1415                             // the load change the position to be 1,
1416                             // need to restore it when entity not changed
1417                             fCurrentEntity.startPosition = 0;
1418                             fCurrentEntity.position = 0;
1419                         } else if(fCurrentEntity == null){
1420                         return true ;
1421                         }
1422                     }
1423                 }
1424                 else {
1425                     fCurrentEntity.columnNumber++;
1426                 }
1427 
1428                 //If this is a general entity, spaces within a start element should be counted
1429                 checkEntityLimit(null, fCurrentEntity, offset, fCurrentEntity.position - offset);
1430                 offset = fCurrentEntity.position;
1431 
1432                 // load more characters, if needed
1433                 if (!entityChanged)
1434                     fCurrentEntity.position++;
1435                 if (fCurrentEntity.position == fCurrentEntity.count) {
1436                     load(0, true, true);
1437 
1438                     if(fCurrentEntity == null){
1439                         return true ;
1440                     }
1441 
1442                 }
1443             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1444             return true;
1445         }
1446 
1447         // no spaces were found
1448         return false;
1449 
1450     } // skipSpaces():boolean
1451 
1452     /**
1453      * Skips the specified string appearing immediately on the input.
1454      * <p>
1455      * <strong>Note:</strong> The characters are consumed only if they are
1456      * space characters.
1457      *
1458      * @param s The string to skip.
1459      *
1460      * @return Returns true if the string was skipped.
1461      *
1462      * @throws IOException  Thrown if i/o error occurs.
1463      * @throws EOFException Thrown on end of file.
1464      */
1465     protected boolean skipString(String s) throws IOException {
1466 
1467         // load more characters, if needed
1468         if (fCurrentEntity.position == fCurrentEntity.count) {
1469             load(0, true, true);
1470         }
1471 
1472         // skip string
1473         final int length = s.length();
1474         final int beforeSkip = fCurrentEntity.position ;
1475         for (int i = 0; i < length; i++) {
1476             char c = fCurrentEntity.ch[fCurrentEntity.position++];
1477             if (c != s.charAt(i)) {
1478                 fCurrentEntity.position -= i + 1;
1479                 return false;
1480             }
1481             if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
1482                 invokeListeners(0);
1483                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
1484                 // REVISIT: Can a string to be skipped cross an
1485                 //          entity boundary? -Ac
1486                 if (load(i + 1, false, false)) {
1487                     fCurrentEntity.startPosition -= i + 1;
1488                     fCurrentEntity.position -= i + 1;
1489                     return false;
1490                 }
1491             }
1492         }
1493         fCurrentEntity.columnNumber += length;
1494         if (!detectingVersion) {
1495             checkEntityLimit(null, fCurrentEntity, beforeSkip, length);
1496         }
1497         return true;
1498 
1499     } // skipString(String):boolean
1500 
1501 } // class XML11EntityScanner