1 /*
   2  * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl;
  22 
  23 import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
  24 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  25 import com.sun.org.apache.xerces.internal.util.XML11Char;
  26 import com.sun.org.apache.xerces.internal.util.XMLChar;
  27 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
  28 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit;
  29 import com.sun.org.apache.xerces.internal.xni.QName;
  30 import com.sun.org.apache.xerces.internal.xni.XMLString;
  31 import com.sun.xml.internal.stream.Entity;
  32 import java.io.IOException;
  33 
  34 /**
  35  * Implements the entity scanner methods in
  36  * the context of XML 1.1.
  37  *
  38  * @xerces.internal
  39  *
  40  * @author Michael Glavassevich, IBM
  41  * @author Neil Graham, IBM
  42  *
  43  * @LastModified: Apr 2021
  44  */
  45 
  46 public class XML11EntityScanner
  47     extends XMLEntityScanner {
  48 
  49     //
  50     // Constructors
  51     //
  52 
  53     /** Default constructor. */
  54     public XML11EntityScanner() {
  55         super();
  56     } // <init>()
  57 
  58     //
  59     // XMLEntityScanner methods
  60     //
  61 
  62     /**
  63      * Returns the next character on the input.
  64      * <p>
  65      * <strong>Note:</strong> The character is <em>not</em> consumed.
  66      *
  67      * @throws IOException  Thrown if i/o error occurs.
  68      * @throws EOFException Thrown on end of file.
  69      */
  70     public int peekChar() throws IOException {
  71 
  72         // load more characters, if needed
  73         if (fCurrentEntity.position == fCurrentEntity.count) {
  74             load(0, true, true);
  75         }
  76 
  77         // peek at character
  78         int c = fCurrentEntity.ch[fCurrentEntity.position];
  79 
  80         // return peeked character
  81         if (fCurrentEntity.isExternal()) {
  82             return (c != '\r' && c != 0x85 && c != 0x2028) ? c : '\n';
  83         }
  84         else {
  85             return c;
  86         }
  87 
  88     } // peekChar():int
  89 
  90     /**
  91      * Returns the next character on the input.
  92      * <p>
  93      * <strong>Note:</strong> The character is consumed.
  94      *
  95      * @throws IOException  Thrown if i/o error occurs.
  96      * @throws EOFException Thrown on end of file.
  97      */
  98     protected int scanChar(NameType nt) throws IOException {
  99 
 100         // load more characters, if needed
 101         if (fCurrentEntity.position == fCurrentEntity.count) {
 102             load(0, true, true);
 103         }
 104 
 105         // scan character
 106         int offset = fCurrentEntity.position;
 107         int c = fCurrentEntity.ch[fCurrentEntity.position++];
 108         boolean external = false;
 109         if (c == '\n' ||
 110             ((c == '\r' || c == 0x85 || c == 0x2028) && (external = fCurrentEntity.isExternal()))) {
 111             fCurrentEntity.lineNumber++;
 112             fCurrentEntity.columnNumber = 1;
 113             if (fCurrentEntity.position == fCurrentEntity.count) {
 114                 invokeListeners(1);
 115                 fCurrentEntity.ch[0] = (char)c;
 116                 load(1, false, false);
 117                 offset = 0;
 118             }
 119             if (c == '\r' && external) {
 120                 int cc = fCurrentEntity.ch[fCurrentEntity.position++];
 121                 if (cc != '\n' && cc != 0x85) {
 122                     fCurrentEntity.position--;
 123                 }
 124             }
 125             c = '\n';
 126         }
 127 
 128         // return character that was scanned
 129         fCurrentEntity.columnNumber++;
 130         if (!detectingVersion) {
 131             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
 132         }
 133         return c;
 134 
 135     } // scanChar():int
 136 
 137     /**
 138      * Returns a string matching the NMTOKEN production appearing immediately
 139      * on the input as a symbol, or null if NMTOKEN Name string is present.
 140      * <p>
 141      * <strong>Note:</strong> The NMTOKEN characters are consumed.
 142      * <p>
 143      * <strong>Note:</strong> The string returned must be a symbol. The
 144      * SymbolTable can be used for this purpose.
 145      *
 146      * @throws IOException  Thrown if i/o error occurs.
 147      * @throws EOFException Thrown on end of file.
 148      *
 149      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 150      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11Name
 151      */
 152     protected String scanNmtoken() throws IOException {
 153         // load more characters, if needed
 154         if (fCurrentEntity.position == fCurrentEntity.count) {
 155             load(0, true, true);
 156         }
 157 
 158         // scan nmtoken
 159         int offset = fCurrentEntity.position;
 160 
 161         do {
 162             char ch = fCurrentEntity.ch[fCurrentEntity.position];
 163             if (XML11Char.isXML11Name(ch)) {
 164                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 165                     int length = fCurrentEntity.position - offset;
 166                     invokeListeners(length);
 167                     if (length == fCurrentEntity.ch.length) {
 168                         // bad luck we have to resize our buffer
 169                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 170                         System.arraycopy(fCurrentEntity.ch, offset,
 171                                          tmp, 0, length);
 172                         fCurrentEntity.ch = tmp;
 173                     }
 174                     else {
 175                         System.arraycopy(fCurrentEntity.ch, offset,
 176                                          fCurrentEntity.ch, 0, length);
 177                     }
 178                     offset = 0;
 179                     if (load(length, false, false)) {
 180                         break;
 181                     }
 182                 }
 183             }
 184             else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 185                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 186                     int length = fCurrentEntity.position - offset;
 187                     invokeListeners(length);
 188                     if (length == fCurrentEntity.ch.length) {
 189                         // bad luck we have to resize our buffer
 190                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 191                         System.arraycopy(fCurrentEntity.ch, offset,
 192                                          tmp, 0, length);
 193                         fCurrentEntity.ch = tmp;
 194                     }
 195                     else {
 196                         System.arraycopy(fCurrentEntity.ch, offset,
 197                                          fCurrentEntity.ch, 0, length);
 198                     }
 199                     offset = 0;
 200                     if (load(length, false, false)) {
 201                         --fCurrentEntity.startPosition;
 202                         --fCurrentEntity.position;
 203                         break;
 204                     }
 205                 }
 206                 char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 207                 if ( !XMLChar.isLowSurrogate(ch2) ||
 208                      !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
 209                     --fCurrentEntity.position;
 210                     break;
 211                 }
 212                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 213                     int length = fCurrentEntity.position - offset;
 214                     invokeListeners(length);
 215                     if (length == fCurrentEntity.ch.length) {
 216                         // bad luck we have to resize our buffer
 217                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 218                         System.arraycopy(fCurrentEntity.ch, offset,
 219                                          tmp, 0, length);
 220                         fCurrentEntity.ch = tmp;
 221                     }
 222                     else {
 223                         System.arraycopy(fCurrentEntity.ch, offset,
 224                                          fCurrentEntity.ch, 0, length);
 225                     }
 226                     offset = 0;
 227                     if (load(length, false, false)) {
 228                         break;
 229                     }
 230                 }
 231             }
 232             else {
 233                 break;
 234             }
 235         }
 236         while (true);
 237 
 238         int length = fCurrentEntity.position - offset;
 239         fCurrentEntity.columnNumber += length;
 240 
 241         // return nmtoken
 242         String symbol = null;
 243         if (length > 0) {
 244             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 245         }
 246         return symbol;
 247 
 248     } // scanNmtoken():String
 249 
 250     /**
 251      * Returns a string matching the Name production appearing immediately
 252      * on the input as a symbol, or null if no Name string is present.
 253      * <p>
 254      * <strong>Note:</strong> The Name characters are consumed.
 255      * <p>
 256      * <strong>Note:</strong> The string returned must be a symbol. The
 257      * SymbolTable can be used for this purpose.
 258      *
 259      * @param nt The type of the name (element or attribute)
 260      *
 261      * @throws IOException  Thrown if i/o error occurs.
 262      * @throws EOFException Thrown on end of file.
 263      *
 264      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 265      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11Name
 266      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11NameStart
 267      */
 268     protected String scanName(NameType nt) throws IOException {
 269         // load more characters, if needed
 270         if (fCurrentEntity.position == fCurrentEntity.count) {
 271             load(0, true, true);
 272         }
 273 
 274         // scan name
 275         int offset = fCurrentEntity.position;
 276         char ch = fCurrentEntity.ch[offset];
 277 
 278         if (XML11Char.isXML11NameStart(ch)) {
 279             if (++fCurrentEntity.position == fCurrentEntity.count) {
 280                 invokeListeners(1);
 281                 fCurrentEntity.ch[0] = ch;
 282                 offset = 0;
 283                 if (load(1, false, false)) {
 284                     fCurrentEntity.columnNumber++;
 285                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 286                     return symbol;
 287                 }
 288             }
 289         }
 290         else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 291             if (++fCurrentEntity.position == fCurrentEntity.count) {
 292                 invokeListeners(1);
 293                 fCurrentEntity.ch[0] = ch;
 294                 offset = 0;
 295                 if (load(1, false, false)) {
 296                     --fCurrentEntity.position;
 297                     --fCurrentEntity.startPosition;
 298                     return null;
 299                 }
 300             }
 301             char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 302             if ( !XMLChar.isLowSurrogate(ch2) ||
 303                  !XML11Char.isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
 304                 --fCurrentEntity.position;
 305                 return null;
 306             }
 307             if (++fCurrentEntity.position == fCurrentEntity.count) {
 308                 invokeListeners(2);
 309                 fCurrentEntity.ch[0] = ch;
 310                 fCurrentEntity.ch[1] = ch2;
 311                 offset = 0;
 312                 if (load(2, false, false)) {
 313                     fCurrentEntity.columnNumber += 2;
 314                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
 315                     return symbol;
 316                 }
 317             }
 318         }
 319         else {
 320             return null;
 321         }
 322 
 323         int length = 0;
 324         do {
 325             ch = fCurrentEntity.ch[fCurrentEntity.position];
 326             if (XML11Char.isXML11Name(ch)) {
 327                 if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) {
 328                     offset = 0;
 329                     if (load(length, false, false)) {
 330                         break;
 331                     }
 332                 }
 333             }
 334             else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 335                 if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) {
 336                     offset = 0;
 337                     if (load(length, false, false)) {
 338                         --fCurrentEntity.position;
 339                         --fCurrentEntity.startPosition;
 340                         break;
 341                     }
 342                 }
 343                 char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 344                 if ( !XMLChar.isLowSurrogate(ch2) ||
 345                      !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
 346                     --fCurrentEntity.position;
 347                     break;
 348                 }
 349                 if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) {
 350                     offset = 0;
 351                     if (load(length, false, false)) {
 352                         break;
 353                     }
 354                 }
 355             }
 356             else {
 357                 break;
 358             }
 359         }
 360         while (true);
 361 
 362         length = fCurrentEntity.position - offset;
 363         fCurrentEntity.columnNumber += length;
 364 
 365         // return name
 366         String symbol = null;
 367         if (length > 0) {
 368             checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
 369             checkEntityLimit(nt, fCurrentEntity, offset, length);
 370             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 371         }
 372         return symbol;
 373 
 374     } // scanName():String
 375 
 376     /**
 377      * Returns a string matching the NCName production appearing immediately
 378      * on the input as a symbol, or null if no NCName string is present.
 379      * <p>
 380      * <strong>Note:</strong> The NCName characters are consumed.
 381      * <p>
 382      * <strong>Note:</strong> The string returned must be a symbol. The
 383      * SymbolTable can be used for this purpose.
 384      *
 385      * @throws IOException  Thrown if i/o error occurs.
 386      * @throws EOFException Thrown on end of file.
 387      *
 388      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 389      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11NCName
 390      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11NCNameStart
 391      */
 392     protected String scanNCName() throws IOException {
 393 
 394         // load more characters, if needed
 395         if (fCurrentEntity.position == fCurrentEntity.count) {
 396             load(0, true, true);
 397         }
 398 
 399         // scan name
 400         int offset = fCurrentEntity.position;
 401         char ch = fCurrentEntity.ch[offset];
 402 
 403         if (XML11Char.isXML11NCNameStart(ch)) {
 404             if (++fCurrentEntity.position == fCurrentEntity.count) {
 405                 invokeListeners(1);
 406                 fCurrentEntity.ch[0] = ch;
 407                 offset = 0;
 408                 if (load(1, false, false)) {
 409                     fCurrentEntity.columnNumber++;
 410                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 411                     return symbol;
 412                 }
 413             }
 414         }
 415         else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 416             if (++fCurrentEntity.position == fCurrentEntity.count) {
 417                 invokeListeners(1);
 418                 fCurrentEntity.ch[0] = ch;
 419                 offset = 0;
 420                 if (load(1, false, false)) {
 421                     --fCurrentEntity.position;
 422                     --fCurrentEntity.startPosition;
 423                     return null;
 424                 }
 425             }
 426             char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 427             if ( !XMLChar.isLowSurrogate(ch2) ||
 428                  !XML11Char.isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
 429                 --fCurrentEntity.position;
 430                 return null;
 431             }
 432             if (++fCurrentEntity.position == fCurrentEntity.count) {
 433                 invokeListeners(2);
 434                 fCurrentEntity.ch[0] = ch;
 435                 fCurrentEntity.ch[1] = ch2;
 436                 offset = 0;
 437                 if (load(2, false, false)) {
 438                     fCurrentEntity.columnNumber += 2;
 439                     String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
 440                     return symbol;
 441                 }
 442             }
 443         }
 444         else {
 445             return null;
 446         }
 447 
 448         do {
 449             ch = fCurrentEntity.ch[fCurrentEntity.position];
 450             if (XML11Char.isXML11NCName(ch)) {
 451                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 452                     int length = fCurrentEntity.position - offset;
 453                     invokeListeners(length);
 454                     if (length == fCurrentEntity.ch.length) {
 455                         // bad luck we have to resize our buffer
 456                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 457                         System.arraycopy(fCurrentEntity.ch, offset,
 458                                          tmp, 0, length);
 459                         fCurrentEntity.ch = tmp;
 460                     }
 461                     else {
 462                         System.arraycopy(fCurrentEntity.ch, offset,
 463                                          fCurrentEntity.ch, 0, length);
 464                     }
 465                     offset = 0;
 466                     if (load(length, false, false)) {
 467                         break;
 468                     }
 469                 }
 470             }
 471             else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 472                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 473                     int length = fCurrentEntity.position - offset;
 474                     invokeListeners(length);
 475                     if (length == fCurrentEntity.ch.length) {
 476                         // bad luck we have to resize our buffer
 477                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 478                         System.arraycopy(fCurrentEntity.ch, offset,
 479                                          tmp, 0, length);
 480                         fCurrentEntity.ch = tmp;
 481                     }
 482                     else {
 483                         System.arraycopy(fCurrentEntity.ch, offset,
 484                                          fCurrentEntity.ch, 0, length);
 485                     }
 486                     offset = 0;
 487                     if (load(length, false, false)) {
 488                         --fCurrentEntity.startPosition;
 489                         --fCurrentEntity.position;
 490                         break;
 491                     }
 492                 }
 493                 char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 494                 if ( !XMLChar.isLowSurrogate(ch2) ||
 495                      !XML11Char.isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
 496                     --fCurrentEntity.position;
 497                     break;
 498                 }
 499                 if (++fCurrentEntity.position == fCurrentEntity.count) {
 500                     int length = fCurrentEntity.position - offset;
 501                     invokeListeners(length);
 502                     if (length == fCurrentEntity.ch.length) {
 503                         // bad luck we have to resize our buffer
 504                         char[] tmp = new char[fCurrentEntity.ch.length << 1];
 505                         System.arraycopy(fCurrentEntity.ch, offset,
 506                                          tmp, 0, length);
 507                         fCurrentEntity.ch = tmp;
 508                     }
 509                     else {
 510                         System.arraycopy(fCurrentEntity.ch, offset,
 511                                          fCurrentEntity.ch, 0, length);
 512                     }
 513                     offset = 0;
 514                     if (load(length, false, false)) {
 515                         break;
 516                     }
 517                 }
 518             }
 519             else {
 520                 break;
 521             }
 522         }
 523         while (true);
 524 
 525         int length = fCurrentEntity.position - offset;
 526         fCurrentEntity.columnNumber += length;
 527 
 528         // return name
 529         String symbol = null;
 530         if (length > 0) {
 531             symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
 532         }
 533         return symbol;
 534 
 535     } // scanNCName():String
 536 
 537     /**
 538      * Scans a qualified name from the input, setting the fields of the
 539      * QName structure appropriately.
 540      * <p>
 541      * <strong>Note:</strong> The qualified name characters are consumed.
 542      * <p>
 543      * <strong>Note:</strong> The strings used to set the values of the
 544      * QName structure must be symbols. The SymbolTable can be used for
 545      * this purpose.
 546      *
 547      * @param qname The qualified name structure to fill.
 548      * @param nt The type of the name (element or attribute)
 549      *
 550      * @return Returns true if a qualified name appeared immediately on
 551      *         the input and was scanned, false otherwise.
 552      *
 553      * @throws IOException  Thrown if i/o error occurs.
 554      * @throws EOFException Thrown on end of file.
 555      *
 556      * @see com.sun.org.apache.xerces.internal.util.SymbolTable
 557      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11Name
 558      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11NameStart
 559      */
 560     protected boolean scanQName(QName qname, XMLScanner.NameType nt) throws IOException {
 561 
 562         // load more characters, if needed
 563         if (fCurrentEntity.position == fCurrentEntity.count) {
 564             load(0, true, true);
 565         }
 566 
 567         // scan qualified name
 568         int offset = fCurrentEntity.position;
 569         char ch = fCurrentEntity.ch[offset];
 570 
 571         if (XML11Char.isXML11NCNameStart(ch)) {
 572             if (++fCurrentEntity.position == fCurrentEntity.count) {
 573                 invokeListeners(1);
 574                 fCurrentEntity.ch[0] = ch;
 575                 offset = 0;
 576                 if (load(1, false, false)) {
 577                     fCurrentEntity.columnNumber++;
 578                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
 579                     qname.setValues(null, name, name, null);
 580                     checkEntityLimit(nt, fCurrentEntity, 0, 1);
 581                     return true;
 582                 }
 583             }
 584         }
 585         else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 586             if (++fCurrentEntity.position == fCurrentEntity.count) {
 587                 invokeListeners(1);
 588                 fCurrentEntity.ch[0] = ch;
 589                 offset = 0;
 590                 if (load(1, false, false)) {
 591                     --fCurrentEntity.startPosition;
 592                     --fCurrentEntity.position;
 593                     return false;
 594                 }
 595             }
 596             char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 597             if ( !XMLChar.isLowSurrogate(ch2) ||
 598                  !XML11Char.isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
 599                 --fCurrentEntity.position;
 600                 return false;
 601             }
 602             if (++fCurrentEntity.position == fCurrentEntity.count) {
 603                 invokeListeners(2);
 604                 fCurrentEntity.ch[0] = ch;
 605                 fCurrentEntity.ch[1] = ch2;
 606                 offset = 0;
 607                 if (load(2, false, false)) {
 608                     fCurrentEntity.columnNumber += 2;
 609                     String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
 610                     qname.setValues(null, name, name, null);
 611                     checkEntityLimit(nt, fCurrentEntity, 0, 2);
 612                     return true;
 613                 }
 614             }
 615         }
 616         else {
 617             return false;
 618         }
 619 
 620         int index = -1;
 621         int length = 0;
 622         boolean sawIncompleteSurrogatePair = false;
 623         do {
 624             ch = fCurrentEntity.ch[fCurrentEntity.position];
 625             if (XML11Char.isXML11Name(ch)) {
 626                 if (ch == ':') {
 627                     if (index != -1) {
 628                         break;
 629                     }
 630                     index = fCurrentEntity.position;
 631                     //check prefix before further read
 632                     checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset);
 633                 }
 634                 if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) {
 635                     if (index != -1) {
 636                         index = index - offset;
 637                     }
 638                     offset = 0;
 639                     if (load(length, false, false)) {
 640                         break;
 641                     }
 642                 }
 643             }
 644             else if (XML11Char.isXML11NameHighSurrogate(ch)) {
 645                 if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) {
 646                     if (index != -1) {
 647                         index = index - offset;
 648                     }
 649                     offset = 0;
 650                     if (load(length, false, false)) {
 651                         sawIncompleteSurrogatePair = true;
 652                         --fCurrentEntity.startPosition;
 653                         --fCurrentEntity.position;
 654                         break;
 655                     }
 656                 }
 657                 char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
 658                 if ( !XMLChar.isLowSurrogate(ch2) ||
 659                      !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
 660                     sawIncompleteSurrogatePair = true;
 661                     --fCurrentEntity.position;
 662                     break;
 663                 }
 664                 if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) {
 665                     if (index != -1) {
 666                         index = index - offset;
 667                     }
 668                     offset = 0;
 669                     if (load(length, false, false)) {
 670                         break;
 671                     }
 672                 }
 673             }
 674             else {
 675                 break;
 676             }
 677         }
 678         while (true);
 679 
 680         length = fCurrentEntity.position - offset;
 681         fCurrentEntity.columnNumber += length;
 682 
 683         if (length > 0) {
 684             String prefix = null;
 685             String localpart = null;
 686             String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
 687                                                     offset, length);
 688             if (index != -1) {
 689                 int prefixLength = index - offset;
 690                 //check the result: prefix
 691                 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength);
 692                 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
 693                                                     offset, prefixLength);
 694                 int len = length - prefixLength - 1;
 695                 int startLocal = index +1;
 696                 if (!XML11Char.isXML11NCNameStart(fCurrentEntity.ch[startLocal]) &&
 697                     (!XML11Char.isXML11NameHighSurrogate(fCurrentEntity.ch[startLocal]) ||
 698                     sawIncompleteSurrogatePair)){
 699                     fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 700                                                "IllegalQName",
 701                                                new Object[]{rawname},
 702                                                XMLErrorReporter.SEVERITY_FATAL_ERROR);
 703                 }
 704                 //check the result: localpart
 705                 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len);
 706                 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
 707                                                    index + 1, len);
 708 
 709             }
 710             else {
 711                 localpart = rawname;
 712                 //check the result: localpart
 713                 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
 714             }
 715             qname.setValues(prefix, localpart, rawname, null);
 716             checkEntityLimit(nt, fCurrentEntity, offset, length);
 717             return true;
 718         }
 719         return false;
 720 
 721     } // scanQName(QName):boolean
 722 
 723     /**
 724      * Scans a range of parsed character data, setting the fields of the
 725      * XMLString structure, appropriately.
 726      * <p>
 727      * <strong>Note:</strong> The characters are consumed.
 728      * <p>
 729      * <strong>Note:</strong> This method does not guarantee to return
 730      * the longest run of parsed character data. This method may return
 731      * before markup due to reaching the end of the input buffer or any
 732      * other reason.
 733      * <p>
 734      * <strong>Note:</strong> The fields contained in the XMLString
 735      * structure are not guaranteed to remain valid upon subsequent calls
 736      * to the entity scanner. Therefore, the caller is responsible for
 737      * immediately using the returned character data or making a copy of
 738      * the character data.
 739      *
 740      * @param content The content structure to fill.
 741      *
 742      * @return Returns the next character on the input, if known. This
 743      *         value may be -1 but this does <em>note</em> designate
 744      *         end of file.
 745      *
 746      * @throws IOException  Thrown if i/o error occurs.
 747      * @throws EOFException Thrown on end of file.
 748      */
 749     protected int scanContent(XMLString content) throws IOException {
 750 
 751         // load more characters, if needed
 752         if (fCurrentEntity.position == fCurrentEntity.count) {
 753             load(0, true, true);
 754         }
 755         else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 756             invokeListeners(1);
 757             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 758             load(1, false, false);
 759             fCurrentEntity.position = 0;
 760             fCurrentEntity.startPosition = 0;
 761         }
 762 
 763         // normalize newlines
 764         int offset = fCurrentEntity.position;
 765         int c = fCurrentEntity.ch[offset];
 766         int newlines = 0;
 767         boolean counted = false;
 768         boolean external = fCurrentEntity.isExternal();
 769         if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
 770             do {
 771                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 772                 if ((c == '\r' ) && external) {
 773                     newlines++;
 774                     fCurrentEntity.lineNumber++;
 775                     fCurrentEntity.columnNumber = 1;
 776                     if (fCurrentEntity.position == fCurrentEntity.count) {
 777                         checkEntityLimit(null, fCurrentEntity, offset, newlines);
 778                         offset = 0;
 779                         fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 780                         fCurrentEntity.position = newlines;
 781                         fCurrentEntity.startPosition = newlines;
 782                         if (load(newlines, false, true)) {
 783                             counted = true;
 784                             break;
 785                         }
 786                     }
 787                     int cc = fCurrentEntity.ch[fCurrentEntity.position];
 788                     if (cc == '\n' || cc == 0x85) {
 789                         fCurrentEntity.position++;
 790                         offset++;
 791                     }
 792                     /*** NEWLINE NORMALIZATION ***/
 793                     else {
 794                         newlines++;
 795                     }
 796                 }
 797                 else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
 798                     newlines++;
 799                     fCurrentEntity.lineNumber++;
 800                     fCurrentEntity.columnNumber = 1;
 801                     if (fCurrentEntity.position == fCurrentEntity.count) {
 802                         checkEntityLimit(null, fCurrentEntity, offset, newlines);
 803                         offset = 0;
 804                         fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 805                         fCurrentEntity.position = newlines;
 806                         fCurrentEntity.startPosition = newlines;
 807                         if (load(newlines, false, true)) {
 808                             counted = true;
 809                             break;
 810                         }
 811                     }
 812                 }
 813                 else {
 814                     fCurrentEntity.position--;
 815                     break;
 816                 }
 817             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 818             for (int i = offset; i < fCurrentEntity.position; i++) {
 819                 fCurrentEntity.ch[i] = '\n';
 820             }
 821             int length = fCurrentEntity.position - offset;
 822             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 823                 checkEntityLimit(null, fCurrentEntity, offset, length);
 824                 content.setValues(fCurrentEntity.ch, offset, length);
 825                 return -1;
 826             }
 827         }
 828 
 829         // inner loop, scanning for content
 830         if (external) {
 831             while (fCurrentEntity.position < fCurrentEntity.count) {
 832                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 833                 if (!XML11Char.isXML11Content(c) || c == 0x85 || c == 0x2028) {
 834                     fCurrentEntity.position--;
 835                     break;
 836                 }
 837             }
 838         }
 839         else {
 840             while (fCurrentEntity.position < fCurrentEntity.count) {
 841                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 842                 // In internal entities control characters are allowed to appear unescaped.
 843                 if (!XML11Char.isXML11InternalEntityContent(c)) {
 844                     fCurrentEntity.position--;
 845                     break;
 846                 }
 847             }
 848         }
 849         int length = fCurrentEntity.position - offset;
 850         fCurrentEntity.columnNumber += length - newlines;
 851         if (!counted) {
 852             checkEntityLimit(null, fCurrentEntity, offset, length);
 853         }
 854         content.setValues(fCurrentEntity.ch, offset, length);
 855 
 856         // return next character
 857         if (fCurrentEntity.position != fCurrentEntity.count) {
 858             c = fCurrentEntity.ch[fCurrentEntity.position];
 859             // REVISIT: Does this need to be updated to fix the
 860             //          #x0D ^#x0A newline normalization problem? -Ac
 861             if ((c == '\r' || c == 0x85 || c == 0x2028) && external) {
 862                 c = '\n';
 863             }
 864         }
 865         else {
 866             c = -1;
 867         }
 868         return c;
 869 
 870     } // scanContent(XMLString):int
 871 
 872     /**
 873      * Scans a range of attribute value data, setting the fields of the
 874      * XMLString structure, appropriately.
 875      * <p>
 876      * <strong>Note:</strong> The characters are consumed.
 877      * <p>
 878      * <strong>Note:</strong> This method does not guarantee to return
 879      * the longest run of attribute value data. This method may return
 880      * before the quote character due to reaching the end of the input
 881      * buffer or any other reason.
 882      * <p>
 883      * <strong>Note:</strong> The fields contained in the XMLString
 884      * structure are not guaranteed to remain valid upon subsequent calls
 885      * to the entity scanner. Therefore, the caller is responsible for
 886      * immediately using the returned character data or making a copy of
 887      * the character data.
 888      *
 889      * @param quote   The quote character that signifies the end of the
 890      *                attribute value data.
 891      * @param content The content structure to fill.
 892      * @param isNSURI a flag indicating whether the content is a Namespace URI
 893      *
 894      * @return Returns the next character on the input, if known. This
 895      *         value may be -1 but this does <em>note</em> designate
 896      *         end of file.
 897      *
 898      * @throws IOException  Thrown if i/o error occurs.
 899      * @throws EOFException Thrown on end of file.
 900      */
 901     protected int scanLiteral(int quote, XMLString content, boolean isNSURI)
 902         throws IOException {
 903         // load more characters, if needed
 904         if (fCurrentEntity.position == fCurrentEntity.count) {
 905             load(0, true, true);
 906         }
 907         else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 908             invokeListeners(1);
 909             fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 910             load(1, false, false);
 911             fCurrentEntity.startPosition = 0;
 912             fCurrentEntity.position = 0;
 913         }
 914 
 915         // normalize newlines
 916         int offset = fCurrentEntity.position;
 917         int c = fCurrentEntity.ch[offset];
 918         int newlines = 0;
 919         boolean external = fCurrentEntity.isExternal();
 920         if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
 921             do {
 922                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 923                 if ((c == '\r' ) && external) {
 924                     newlines++;
 925                     fCurrentEntity.lineNumber++;
 926                     fCurrentEntity.columnNumber = 1;
 927                     if (fCurrentEntity.position == fCurrentEntity.count) {
 928                         offset = 0;
 929                         fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 930                         fCurrentEntity.position = newlines;
 931                         fCurrentEntity.startPosition = newlines;
 932                         if (load(newlines, false, true)) {
 933                             break;
 934                         }
 935                     }
 936                     int cc = fCurrentEntity.ch[fCurrentEntity.position];
 937                     if (cc == '\n' || cc == 0x85) {
 938                         fCurrentEntity.position++;
 939                         offset++;
 940                     }
 941                     /*** NEWLINE NORMALIZATION ***/
 942                     else {
 943                         newlines++;
 944                     }
 945                 }
 946                 else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
 947                     newlines++;
 948                     fCurrentEntity.lineNumber++;
 949                     fCurrentEntity.columnNumber = 1;
 950                     if (fCurrentEntity.position == fCurrentEntity.count) {
 951                         offset = 0;
 952                         fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 953                         fCurrentEntity.position = newlines;
 954                         fCurrentEntity.startPosition = newlines;
 955                         if (load(newlines, false, true)) {
 956                             break;
 957                         }
 958                     }
 959                 }
 960                 else {
 961                     fCurrentEntity.position--;
 962                     break;
 963                 }
 964             } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 965             for (int i = offset; i < fCurrentEntity.position; i++) {
 966                 fCurrentEntity.ch[i] = '\n';
 967             }
 968             int length = fCurrentEntity.position - offset;
 969             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 970                 content.setValues(fCurrentEntity.ch, offset, length);
 971                 return -1;
 972             }
 973         }
 974 
 975         // scan literal value
 976         if (external) {
 977             while (fCurrentEntity.position < fCurrentEntity.count) {
 978                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 979                 if (c == quote || c == '%' || !XML11Char.isXML11Content(c)
 980                     || c == 0x85 || c == 0x2028) {
 981                     fCurrentEntity.position--;
 982                     break;
 983                 }
 984             }
 985         }
 986         else {
 987             while (fCurrentEntity.position < fCurrentEntity.count) {
 988                 c = fCurrentEntity.ch[fCurrentEntity.position++];
 989                 // In internal entities control characters are allowed to appear unescaped.
 990                 if ((c == quote && !fCurrentEntity.literal)
 991                     || c == '%' || !XML11Char.isXML11InternalEntityContent(c)) {
 992                     fCurrentEntity.position--;
 993                     break;
 994                 }
 995             }
 996         }
 997         int length = fCurrentEntity.position - offset;
 998         fCurrentEntity.columnNumber += length - newlines;
 999 
1000         checkEntityLimit(null, fCurrentEntity, offset, length);
1001         if (isNSURI) {
1002             checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
1003         }
1004         content.setValues(fCurrentEntity.ch, offset, length);
1005 
1006         // return next character
1007         if (fCurrentEntity.position != fCurrentEntity.count) {
1008             c = fCurrentEntity.ch[fCurrentEntity.position];
1009             // NOTE: We don't want to accidentally signal the
1010             //       end of the literal if we're expanding an
1011             //       entity appearing in the literal. -Ac
1012             if (c == quote && fCurrentEntity.literal) {
1013                 c = -1;
1014             }
1015         }
1016         else {
1017             c = -1;
1018         }
1019         return c;
1020 
1021     } // scanLiteral(int,XMLString):int
1022 
1023     /**
1024      * Scans a range of character data up to the specicied delimiter,
1025      * setting the fields of the XMLString structure, appropriately.
1026      * <p>
1027      * <strong>Note:</strong> The characters are consumed.
1028      * <p>
1029      * <strong>Note:</strong> This assumes that the internal buffer is
1030      * at least the same size, or bigger, than the length of the delimiter
1031      * and that the delimiter contains at least one character.
1032      * <p>
1033      * <strong>Note:</strong> This method does not guarantee to return
1034      * the longest run of character data. This method may return before
1035      * the delimiter due to reaching the end of the input buffer or any
1036      * other reason.
1037      * <p>
1038      * <strong>Note:</strong> The fields contained in the XMLString
1039      * structure are not guaranteed to remain valid upon subsequent calls
1040      * to the entity scanner. Therefore, the caller is responsible for
1041      * immediately using the returned character data or making a copy of
1042      * the character data.
1043      *
1044      * @param delimiter The string that signifies the end of the character
1045      *                  data to be scanned.
1046      * @param data      The data structure to fill.
1047      *
1048      * @return Returns true if there is more data to scan, false otherwise.
1049      *
1050      * @throws IOException  Thrown if i/o error occurs.
1051      * @throws EOFException Thrown on end of file.
1052      */
1053     protected boolean scanData(String delimiter, XMLStringBuffer buffer)
1054         throws IOException {
1055 
1056         boolean done = false;
1057         int delimLen = delimiter.length();
1058         char charAt0 = delimiter.charAt(0);
1059         boolean external = fCurrentEntity.isExternal();
1060         do {
1061             // load more characters, if needed
1062             if (fCurrentEntity.position == fCurrentEntity.count) {
1063                 load(0, true, false);
1064             }
1065 
1066             boolean bNextEntity = false;
1067 
1068             while ((fCurrentEntity.position >= fCurrentEntity.count - delimLen)
1069                 && (!bNextEntity))
1070             {
1071               System.arraycopy(fCurrentEntity.ch,
1072                                fCurrentEntity.position,
1073                                fCurrentEntity.ch,
1074                                0,
1075                                fCurrentEntity.count - fCurrentEntity.position);
1076 
1077               bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false);
1078               fCurrentEntity.position = 0;
1079               fCurrentEntity.startPosition = 0;
1080             }
1081 
1082             if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
1083                 // something must be wrong with the input:  e.g., file ends  an unterminated comment
1084                 int length = fCurrentEntity.count - fCurrentEntity.position;
1085                 checkEntityLimit(NameType.COMMENT, fCurrentEntity, fCurrentEntity.position, length);
1086                 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1087                 fCurrentEntity.columnNumber += fCurrentEntity.count;
1088                 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1089                 fCurrentEntity.position = fCurrentEntity.count;
1090                 fCurrentEntity.startPosition = fCurrentEntity.count;
1091                 load(0,true, false);
1092                 return false;
1093             }
1094 
1095             // normalize newlines
1096             int offset = fCurrentEntity.position;
1097             int c = fCurrentEntity.ch[offset];
1098             int newlines = 0;
1099             if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
1100                 do {
1101                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1102                     if ((c == '\r' ) && external) {
1103                         newlines++;
1104                         fCurrentEntity.lineNumber++;
1105                         fCurrentEntity.columnNumber = 1;
1106                         if (fCurrentEntity.position == fCurrentEntity.count) {
1107                             offset = 0;
1108                             fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1109                             fCurrentEntity.position = newlines;
1110                             fCurrentEntity.startPosition = newlines;
1111                             if (load(newlines, false, true)) {
1112                                 break;
1113                             }
1114                         }
1115                         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1116                         if (cc == '\n' || cc == 0x85) {
1117                             fCurrentEntity.position++;
1118                             offset++;
1119                         }
1120                         /*** NEWLINE NORMALIZATION ***/
1121                         else {
1122                             newlines++;
1123                         }
1124                     }
1125                     else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
1126                         newlines++;
1127                         fCurrentEntity.lineNumber++;
1128                         fCurrentEntity.columnNumber = 1;
1129                         if (fCurrentEntity.position == fCurrentEntity.count) {
1130                             offset = 0;
1131                             fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1132                             fCurrentEntity.position = newlines;
1133                             fCurrentEntity.startPosition = newlines;
1134                             fCurrentEntity.count = newlines;
1135                             if (load(newlines, false, true)) {
1136                                 break;
1137                             }
1138                         }
1139                     }
1140                     else {
1141                         fCurrentEntity.position--;
1142                         break;
1143                     }
1144                 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1145                 for (int i = offset; i < fCurrentEntity.position; i++) {
1146                     fCurrentEntity.ch[i] = '\n';
1147                 }
1148                 int length = fCurrentEntity.position - offset;
1149                 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1150                     checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1151                     buffer.append(fCurrentEntity.ch, offset, length);
1152                     return true;
1153                 }
1154             }
1155 
1156             // iterate over buffer looking for delimiter
1157             if (external) {
1158                 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1159                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1160                     if (c == charAt0) {
1161                         // looks like we just hit the delimiter
1162                         int delimOffset = fCurrentEntity.position - 1;
1163                         for (int i = 1; i < delimLen; i++) {
1164                             if (fCurrentEntity.position == fCurrentEntity.count) {
1165                                 fCurrentEntity.position -= i;
1166                                 break OUTER;
1167                             }
1168                             c = fCurrentEntity.ch[fCurrentEntity.position++];
1169                             if (delimiter.charAt(i) != c) {
1170                                 fCurrentEntity.position--;
1171                                 break;
1172                             }
1173                          }
1174                          if (fCurrentEntity.position == delimOffset + delimLen) {
1175                             done = true;
1176                             break;
1177                          }
1178                     }
1179                     else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
1180                         fCurrentEntity.position--;
1181                         break;
1182                     }
1183                     // In external entities control characters cannot appear
1184                     // as literals so do not skip over them.
1185                     else if (!XML11Char.isXML11ValidLiteral(c)) {
1186                         fCurrentEntity.position--;
1187                         int length = fCurrentEntity.position - offset;
1188                         fCurrentEntity.columnNumber += length - newlines;
1189                         checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1190                         buffer.append(fCurrentEntity.ch, offset, length);
1191                         return true;
1192                     }
1193                 }
1194             }
1195             else {
1196                 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1197                     c = fCurrentEntity.ch[fCurrentEntity.position++];
1198                     if (c == charAt0) {
1199                         // looks like we just hit the delimiter
1200                         int delimOffset = fCurrentEntity.position - 1;
1201                         for (int i = 1; i < delimLen; i++) {
1202                             if (fCurrentEntity.position == fCurrentEntity.count) {
1203                                 fCurrentEntity.position -= i;
1204                                 break OUTER;
1205                             }
1206                             c = fCurrentEntity.ch[fCurrentEntity.position++];
1207                             if (delimiter.charAt(i) != c) {
1208                                 fCurrentEntity.position--;
1209                                 break;
1210                             }
1211                         }
1212                         if (fCurrentEntity.position == delimOffset + delimLen) {
1213                             done = true;
1214                             break;
1215                         }
1216                     }
1217                     else if (c == '\n') {
1218                         fCurrentEntity.position--;
1219                         break;
1220                     }
1221                     // Control characters are allowed to appear as literals
1222                     // in internal entities.
1223                     else if (!XML11Char.isXML11Valid(c)) {
1224                         fCurrentEntity.position--;
1225                         int length = fCurrentEntity.position - offset;
1226                         fCurrentEntity.columnNumber += length - newlines;
1227                         checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1228                         buffer.append(fCurrentEntity.ch, offset, length);
1229                         return true;
1230                     }
1231                 }
1232             }
1233             int length = fCurrentEntity.position - offset;
1234             fCurrentEntity.columnNumber += length - newlines;
1235             checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1236             if (done) {
1237                 length -= delimLen;
1238             }
1239             buffer.append(fCurrentEntity.ch, offset, length);
1240 
1241             // return true if string was skipped
1242         } while (!done);
1243         return !done;
1244 
1245     } // scanData(String,XMLString)
1246 
1247     /**
1248      * Skips a character appearing immediately on the input.
1249      * <p>
1250      * <strong>Note:</strong> The character is consumed only if it matches
1251      * the specified character.
1252      *
1253      * @param c The character to skip.
1254      *
1255      * @return Returns true if the character was skipped.
1256      *
1257      * @throws IOException  Thrown if i/o error occurs.
1258      * @throws EOFException Thrown on end of file.
1259      */
1260     protected boolean skipChar(int c, NameType nt) throws IOException {
1261 
1262         // load more characters, if needed
1263         if (fCurrentEntity.position == fCurrentEntity.count) {
1264             load(0, true, true);
1265         }
1266 
1267         // skip character
1268         int offset = fCurrentEntity.position;
1269         int cc = fCurrentEntity.ch[fCurrentEntity.position];
1270         if (cc == c) {
1271             fCurrentEntity.position++;
1272             if (c == '\n') {
1273                 fCurrentEntity.lineNumber++;
1274                 fCurrentEntity.columnNumber = 1;
1275             }
1276             else {
1277                 fCurrentEntity.columnNumber++;
1278             }
1279             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
1280             return true;
1281         }
1282         else if (c == '\n' && ((cc == 0x2028 || cc == 0x85) && fCurrentEntity.isExternal())) {
1283             fCurrentEntity.position++;
1284             fCurrentEntity.lineNumber++;
1285             fCurrentEntity.columnNumber = 1;
1286             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
1287             return true;
1288         }
1289         else if (c == '\n' && (cc == '\r' ) && fCurrentEntity.isExternal()) {
1290             // handle newlines
1291             if (fCurrentEntity.position == fCurrentEntity.count) {
1292                 invokeListeners(1);
1293                 fCurrentEntity.ch[0] = (char)cc;
1294                 load(1, false, false);
1295             }
1296             int ccc = fCurrentEntity.ch[++fCurrentEntity.position];
1297             if (ccc == '\n' || ccc == 0x85) {
1298                 fCurrentEntity.position++;
1299             }
1300             fCurrentEntity.lineNumber++;
1301             fCurrentEntity.columnNumber = 1;
1302             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
1303             return true;
1304         }
1305 
1306         // character was not skipped
1307         return false;
1308 
1309     } // skipChar(int):boolean
1310 
1311     /**
1312      * Skips space characters appearing immediately on the input.
1313      * <p>
1314      * <strong>Note:</strong> The characters are consumed only if they are
1315      * space characters.
1316      *
1317      * @return Returns true if at least one space character was skipped.
1318      *
1319      * @throws IOException  Thrown if i/o error occurs.
1320      * @throws EOFException Thrown on end of file.
1321      *
1322      * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1323      * @see com.sun.org.apache.xerces.internal.util.XML11Char#isXML11Space
1324      */
1325     protected boolean skipSpaces() throws IOException {
1326 
1327         // load more characters, if needed
1328         if (fCurrentEntity.position == fCurrentEntity.count) {
1329             load(0, true, true);
1330         }
1331 
1332 
1333         //we are doing this check only in skipSpace() because it is called by
1334         //fMiscDispatcher and we want the parser to exit gracefully when document
1335         //is well-formed.
1336         //it is possible that end of document is reached and
1337         //fCurrentEntity becomes null
1338         //nothing was read so entity changed  'false' should be returned.
1339         if(fCurrentEntity == null){
1340             return false ;
1341         }
1342 
1343         // skip spaces
1344         int c = fCurrentEntity.ch[fCurrentEntity.position];
1345         int offset = fCurrentEntity.position - 1;
1346         // External --  Match: S + 0x85 + 0x2028, and perform end of line normalization
1347         if (fCurrentEntity.isExternal()) {
1348             if (XML11Char.isXML11Space(c)) {
1349                 do {
1350                     boolean entityChanged = false;
1351                     // handle newlines
1352                     if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
1353                         fCurrentEntity.lineNumber++;
1354                         fCurrentEntity.columnNumber = 1;
1355                         if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1356                             invokeListeners(1);
1357                             fCurrentEntity.ch[0] = (char)c;
1358                             entityChanged = load(1, true, false);
1359                             if (!entityChanged) {
1360                                 // the load change the position to be 1,
1361                                 // need to restore it when entity not changed
1362                                 fCurrentEntity.startPosition = 0;
1363                                 fCurrentEntity.position = 0;
1364                             } else if(fCurrentEntity == null){
1365                                 return true ;
1366                             }
1367 
1368                         }
1369                         if (c == '\r') {
1370                             // REVISIT: Does this need to be updated to fix the
1371                             //          #x0D ^#x0A newline normalization problem? -Ac
1372                             int cc = fCurrentEntity.ch[++fCurrentEntity.position];
1373                             if (cc != '\n' && cc != 0x85 ) {
1374                                 fCurrentEntity.position--;
1375                             }
1376                         }
1377                     }
1378                     else {
1379                         fCurrentEntity.columnNumber++;
1380                     }
1381 
1382                     //If this is a general entity, spaces within a start element should be counted
1383                     checkEntityLimit(null, fCurrentEntity, offset, fCurrentEntity.position - offset);
1384                     offset = fCurrentEntity.position;
1385 
1386                     // load more characters, if needed
1387                     if (!entityChanged)
1388                         fCurrentEntity.position++;
1389                     if (fCurrentEntity.position == fCurrentEntity.count) {
1390                         load(0, true, true);
1391 
1392                         if(fCurrentEntity == null){
1393                         return true ;
1394                         }
1395 
1396                     }
1397                 } while (XML11Char.isXML11Space(c = fCurrentEntity.ch[fCurrentEntity.position]));
1398                 return true;
1399             }
1400         }
1401         // Internal -- Match: S (only)
1402         else if (XMLChar.isSpace(c)) {
1403             do {
1404                 boolean entityChanged = false;
1405                 // handle newlines
1406                 if (c == '\n') {
1407                     fCurrentEntity.lineNumber++;
1408                     fCurrentEntity.columnNumber = 1;
1409                     if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1410                         invokeListeners(1);
1411                         fCurrentEntity.ch[0] = (char)c;
1412                         entityChanged = load(1, true, false);
1413                         if (!entityChanged) {
1414                             // the load change the position to be 1,
1415                             // need to restore it when entity not changed
1416                             fCurrentEntity.startPosition = 0;
1417                             fCurrentEntity.position = 0;
1418                         } else if(fCurrentEntity == null){
1419                         return true ;
1420                         }
1421                     }
1422                 }
1423                 else {
1424                     fCurrentEntity.columnNumber++;
1425                 }
1426 
1427                 //If this is a general entity, spaces within a start element should be counted
1428                 checkEntityLimit(null, fCurrentEntity, offset, fCurrentEntity.position - offset);
1429                 offset = fCurrentEntity.position;
1430 
1431                 // load more characters, if needed
1432                 if (!entityChanged)
1433                     fCurrentEntity.position++;
1434                 if (fCurrentEntity.position == fCurrentEntity.count) {
1435                     load(0, true, true);
1436 
1437                     if(fCurrentEntity == null){
1438                         return true ;
1439                     }
1440 
1441                 }
1442             } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1443             return true;
1444         }
1445 
1446         // no spaces were found
1447         return false;
1448 
1449     } // skipSpaces():boolean
1450 
1451     /**
1452      * Skips the specified string appearing immediately on the input.
1453      * <p>
1454      * <strong>Note:</strong> The characters are consumed only if they are
1455      * space characters.
1456      *
1457      * @param s The string to skip.
1458      *
1459      * @return Returns true if the string was skipped.
1460      *
1461      * @throws IOException  Thrown if i/o error occurs.
1462      * @throws EOFException Thrown on end of file.
1463      */
1464     protected boolean skipString(String s) throws IOException {
1465 
1466         // load more characters, if needed
1467         if (fCurrentEntity.position == fCurrentEntity.count) {
1468             load(0, true, true);
1469         }
1470 
1471         // skip string
1472         final int length = s.length();
1473         final int beforeSkip = fCurrentEntity.position ;
1474         for (int i = 0; i < length; i++) {
1475             char c = fCurrentEntity.ch[fCurrentEntity.position++];
1476             if (c != s.charAt(i)) {
1477                 fCurrentEntity.position -= i + 1;
1478                 return false;
1479             }
1480             if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
1481                 invokeListeners(0);
1482                 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
1483                 // REVISIT: Can a string to be skipped cross an
1484                 //          entity boundary? -Ac
1485                 if (load(i + 1, false, false)) {
1486                     fCurrentEntity.startPosition -= i + 1;
1487                     fCurrentEntity.position -= i + 1;
1488                     return false;
1489                 }
1490             }
1491         }
1492         fCurrentEntity.columnNumber += length;
1493         if (!detectingVersion) {
1494             checkEntityLimit(null, fCurrentEntity, beforeSkip, length);
1495         }
1496         return true;
1497 
1498     } // skipString(String):boolean
1499 
1500 } // class XML11EntityScanner