New src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java

   1 /*
   2  * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.internal.util.xml.impl;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import java.io.InputStreamReader;
  31 import java.io.Reader;
  32 import java.io.UnsupportedEncodingException;
  33 import java.util.HashMap;
  34 import java.util.Map;
  35 import jdk.internal.org.xml.sax.InputSource;
  36 import jdk.internal.org.xml.sax.SAXException;
  37 
  38 /**
  39  * XML non-validating parser engine.
  40  */
  41 public abstract class Parser {
  42 
  43     public static final String FAULT = "";
  44     protected static final int BUFFSIZE_READER = 512;
  45     protected static final int BUFFSIZE_PARSER = 128;
  46     /**
  47      * The end of stream character.
  48      */
  49     public static final char EOS = 0xffff;
  50     private Pair mNoNS; // there is no namespace
  51     private Pair mXml;  // the xml namespace
  52     private Map<String, Input> mEnt;  // the entities look up table
  53     private Map<String, Input> mPEnt; // the parmeter entities look up table
  54     protected boolean mIsSAlone;     // xml decl standalone flag
  55     protected boolean mIsSAloneSet;  // standalone is explicitely set
  56     protected boolean mIsNSAware;    // if true - namespace aware mode
  57     protected int mPh;  // current phase of document processing
  58     protected static final int PH_BEFORE_DOC = -1;  // before parsing
  59     protected static final int PH_DOC_START = 0;   // document start
  60     protected static final int PH_MISC_DTD = 1;   // misc before DTD
  61     protected static final int PH_DTD = 2;   // DTD
  62     protected static final int PH_DTD_MISC = 3;   // misc after DTD
  63     protected static final int PH_DOCELM = 4;   // document's element
  64     protected static final int PH_DOCELM_MISC = 5;   // misc after element
  65     protected static final int PH_AFTER_DOC = 6;   // after parsing
  66     protected int mEvt;  // current event type
  67     protected static final int EV_NULL = 0;   // unknown
  68     protected static final int EV_ELM = 1;   // empty element
  69     protected static final int EV_ELMS = 2;   // start element
  70     protected static final int EV_ELME = 3;   // end element
  71     protected static final int EV_TEXT = 4;   // textual content
  72     protected static final int EV_WSPC = 5;   // white space content
  73     protected static final int EV_PI = 6;   // processing instruction
  74     protected static final int EV_CDAT = 7;   // character data
  75     protected static final int EV_COMM = 8;   // comment
  76     protected static final int EV_DTD = 9;   // document type definition
  77     protected static final int EV_ENT = 10;  // skipped entity
  78     private char mESt; // built-in entity recognizer state
  79     // mESt values:
  80     //   0x100   : the initial state
  81     //   > 0x100 : unrecognized name
  82     //   < 0x100 : replacement character
  83     protected char[] mBuff;       // parser buffer
  84     protected int mBuffIdx;    // index of the last char
  85     protected Pair mPref;       // stack of prefixes
  86     protected Pair mElm;        // stack of elements
  87     // mAttL.chars - element qname
  88     // mAttL.next  - next element
  89     // mAttL.list  - list of attributes defined on this element
  90     // mAttL.list.chars - attribute qname
  91     // mAttL.list.id    - a char representing attribute's type see below
  92     // mAttL.list.next  - next attribute defined on the element
  93     // mAttL.list.list  - devault value structure or null
  94     // mAttL.list.list.chars - "name='value' " chars array for Input
  95     //
  96     // Attribute type character values:
  97     // 'i' - "ID"
  98     // 'r' - "IDREF"
  99     // 'R' - "IDREFS"
 100     // 'n' - "ENTITY"
 101     // 'N' - "ENTITIES"
 102     // 't' - "NMTOKEN"
 103     // 'T' - "NMTOKENS"
 104     // 'u' - enumeration type
 105     // 'o' - "NOTATION"
 106     // 'c' - "CDATA"
 107     // see also: bkeyword() and atype()
 108     //
 109     protected Pair mAttL;       // list of defined attrs by element name
 110     protected Input mDoc;        // document entity
 111     protected Input mInp;        // stack of entities
 112     private char[] mChars;      // reading buffer
 113     private int mChLen;      // current capacity
 114     private int mChIdx;      // index to the next char
 115     protected Attrs mAttrs;      // attributes of the curr. element
 116     private String[] mItems;      // attributes array of the curr. element
 117     private char mAttrIdx;    // attributes counter/index
 118     private String mUnent;  // unresolved entity name
 119     private Pair mDltd;   // deleted objects for reuse
 120     /**
 121      * Default prefixes
 122      */
 123     private static final char NONS[];
 124     private static final char XML[];
 125     private static final char XMLNS[];
 126 
 127     static {
 128         NONS = new char[1];
 129         NONS[0] = (char) 0;
 130 
 131         XML = new char[4];
 132         XML[0] = (char) 4;
 133         XML[1] = 'x';
 134         XML[2] = 'm';
 135         XML[3] = 'l';
 136 
 137         XMLNS = new char[6];
 138         XMLNS[0] = (char) 6;
 139         XMLNS[1] = 'x';
 140         XMLNS[2] = 'm';
 141         XMLNS[3] = 'l';
 142         XMLNS[4] = 'n';
 143         XMLNS[5] = 's';
 144     }
 145     /**
 146      * ASCII character type array.
 147      *
 148      * This array maps an ASCII (7 bit) character to the character type.<br>
 149      * Possible character type values are:<br> - ' ' for any kind of white
 150      * space character;<br> - 'a' for any lower case alphabetical character
 151      * value;<br> - 'A' for any upper case alphabetical character value;<br>
 152      * - 'd' for any decimal digit character value;<br> - 'z' for any
 153      * character less than ' ' except '\t', '\n', '\r';<br> An ASCII (7 bit)
 154      * character which does not fall in any category listed above is mapped to
 155      * it self.
 156      */
 157     private static final byte asctyp[];
 158     /**
 159      * NMTOKEN character type array.
 160      *
 161      * This array maps an ASCII (7 bit) character to the character type.<br>
 162      * Possible character type values are:<br> - 0 for underscore ('_') or any
 163      * lower and upper case alphabetical character value;<br> - 1 for colon
 164      * (':') character;<br> - 2 for dash ('-') and dot ('.') or any decimal
 165      * digit character value;<br> - 3 for any kind of white space character<br>
 166      * An ASCII (7 bit) character which does not fall in any category listed
 167      * above is mapped to 0xff.
 168      */
 169     private static final byte nmttyp[];
 170 
 171     /**
 172      * Static constructor.
 173      *
 174      * Sets up the ASCII character type array which is used by
 175      * {@link #asctyp asctyp} method and NMTOKEN character type array.
 176      */
 177     static {
 178         short i = 0;
 179 
 180         asctyp = new byte[0x80];
 181         while (i < ' ') {
 182             asctyp[i++] = (byte) 'z';
 183         }
 184         asctyp['\t'] = (byte) ' ';
 185         asctyp['\r'] = (byte) ' ';
 186         asctyp['\n'] = (byte) ' ';
 187         while (i < '0') {
 188             asctyp[i] = (byte) i++;
 189         }
 190         while (i <= '9') {
 191             asctyp[i++] = (byte) 'd';
 192         }
 193         while (i < 'A') {
 194             asctyp[i] = (byte) i++;
 195         }
 196         while (i <= 'Z') {
 197             asctyp[i++] = (byte) 'A';
 198         }
 199         while (i < 'a') {
 200             asctyp[i] = (byte) i++;
 201         }
 202         while (i <= 'z') {
 203             asctyp[i++] = (byte) 'a';
 204         }
 205         while (i < 0x80) {
 206             asctyp[i] = (byte) i++;
 207         }
 208 
 209         nmttyp = new byte[0x80];
 210         for (i = 0; i < '0'; i++) {
 211             nmttyp[i] = (byte) 0xff;
 212         }
 213         while (i <= '9') {
 214             nmttyp[i++] = (byte) 2;  // digits
 215         }
 216         while (i < 'A') {
 217             nmttyp[i++] = (byte) 0xff;
 218         }
 219         // skiped upper case alphabetical character are already 0
 220         for (i = '['; i < 'a'; i++) {
 221             nmttyp[i] = (byte) 0xff;
 222         }
 223         // skiped lower case alphabetical character are already 0
 224         for (i = '{'; i < 0x80; i++) {
 225             nmttyp[i] = (byte) 0xff;
 226         }
 227         nmttyp['_'] = 0;
 228         nmttyp[':'] = 1;
 229         nmttyp['.'] = 2;
 230         nmttyp['-'] = 2;
 231         nmttyp[' '] = 3;
 232         nmttyp['\t'] = 3;
 233         nmttyp['\r'] = 3;
 234         nmttyp['\n'] = 3;
 235     }
 236 
 237     /**
 238      * Constructor.
 239      */
 240     protected Parser() {
 241         mPh = PH_BEFORE_DOC;  // before parsing
 242 
 243         //              Initialize the parser
 244         mBuff = new char[BUFFSIZE_PARSER];
 245         mAttrs = new Attrs();
 246 
 247         //              Default namespace
 248         mPref = pair(mPref);
 249         mPref.name = "";
 250         mPref.value = "";
 251         mPref.chars = NONS;
 252         mNoNS = mPref;  // no namespace
 253         //              XML namespace
 254         mPref = pair(mPref);
 255         mPref.name = "xml";
 256         mPref.value = "http://www.w3.org/XML/1998/namespace";
 257         mPref.chars = XML;
 258         mXml = mPref;  // XML namespace
 259     }
 260 
 261     /**
 262      * Initializes parser's internals. Note, current input has to be set before
 263      * this method is called.
 264      */
 265     protected void init() {
 266         mUnent = null;
 267         mElm = null;
 268         mPref = mXml;
 269         mAttL = null;
 270         mPEnt = new HashMap<>();
 271         mEnt = new HashMap<>();
 272         mDoc = mInp;          // current input is document entity
 273         mChars = mInp.chars;    // use document entity buffer
 274         mPh = PH_DOC_START;  // the begining of the document
 275     }
 276 
 277     /**
 278      * Cleans up parser internal resources.
 279      */
 280     protected void cleanup() {
 281         //              Default attributes
 282         while (mAttL != null) {
 283             while (mAttL.list != null) {
 284                 if (mAttL.list.list != null) {
 285                     del(mAttL.list.list);
 286                 }
 287                 mAttL.list = del(mAttL.list);
 288             }
 289             mAttL = del(mAttL);
 290         }
 291         //              Element stack
 292         while (mElm != null) {
 293             mElm = del(mElm);
 294         }
 295         //              Namespace prefixes
 296         while (mPref != mXml) {
 297             mPref = del(mPref);
 298         }
 299         //              Inputs
 300         while (mInp != null) {
 301             pop();
 302         }
 303         //              Document reader
 304         if ((mDoc != null) && (mDoc.src != null)) {
 305             try {
 306                 mDoc.src.close();
 307             } catch (IOException ioe) {
 308             }
 309         }
 310         mPEnt = null;
 311         mEnt = null;
 312         mDoc = null;
 313         mPh = PH_AFTER_DOC;  // before documnet processing
 314     }
 315 
 316     /**
 317      * Processes a portion of document. This method returns one of EV_*
 318      * constants as an identifier of the portion of document have been read.
 319      *
 320      * @return Identifier of processed document portion.
 321      * @exception Exception is parser specific exception form panic method.
 322      * @exception IOException
 323      */
 324     @SuppressWarnings("fallthrough")
 325     protected int step() throws Exception {
 326         mEvt = EV_NULL;
 327         int st = 0;
 328         while (mEvt == EV_NULL) {
 329             char ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
 330             switch (st) {
 331                 case 0:     // all sorts of markup (dispetcher)
 332                     if (ch != '<') {
 333                         bkch();
 334                         mBuffIdx = -1;  // clean parser buffer
 335                         st = 1;
 336                         break;
 337                     }
 338                     switch (getch()) {
 339                         case '/':  // the end of the element content
 340                             mEvt = EV_ELME;
 341                             if (mElm == null) {
 342                                 panic(FAULT);
 343                             }
 344                             //          Check element's open/close tags balance
 345                             mBuffIdx = -1;  // clean parser buffer
 346                             bname(mIsNSAware);
 347                             char[] chars = mElm.chars;
 348                             if (chars.length == (mBuffIdx + 1)) {
 349                                 for (char i = 1; i <= mBuffIdx; i += 1) {
 350                                     if (chars[i] != mBuff[i]) {
 351                                         panic(FAULT);
 352                                     }
 353                                 }
 354                             } else {
 355                                 panic(FAULT);
 356                             }
 357                             //          Skip white spaces before '>'
 358                             if (wsskip() != '>') {
 359                                 panic(FAULT);
 360                             }
 361                             getch();  // read '>'
 362                             break;
 363 
 364                         case '!':  // a comment or a CDATA
 365                             ch = getch();
 366                             bkch();
 367                             switch (ch) {
 368                                 case '-':  // must be a comment
 369                                     mEvt = EV_COMM;
 370                                     comm();
 371                                     break;
 372 
 373                                 case '[':  // must be a CDATA section
 374                                     mEvt = EV_CDAT;
 375                                     cdat();
 376                                     break;
 377 
 378                                 default:   // must be 'DOCTYPE'
 379                                     mEvt = EV_DTD;
 380                                     dtd();
 381                                     break;
 382                             }
 383                             break;
 384 
 385                         case '?':  // processing instruction
 386                             mEvt = EV_PI;
 387                             pi();
 388                             break;
 389 
 390                         default:  // must be the first char of an xml name
 391                             bkch();
 392                             //          Read an element name and put it on top of the
 393                             //          element stack
 394                             mElm = pair(mElm);  // add new element to the stack
 395                             mElm.chars = qname(mIsNSAware);
 396                             mElm.name = mElm.local();
 397                             mElm.id = (mElm.next != null) ? mElm.next.id : 0;  // flags
 398                             mElm.num = 0;     // namespace counter
 399                             //          Find the list of defined attributs of the current
 400                             //          element
 401                             Pair elm = find(mAttL, mElm.chars);
 402                             mElm.list = (elm != null) ? elm.list : null;
 403                             //          Read attributes till the end of the element tag
 404                             mAttrIdx = 0;
 405                             Pair att = pair(null);
 406                             att.num = 0;  // clear attribute's flags
 407                             attr(att);     // get all attributes inc. defaults
 408                             del(att);
 409                             mElm.value = (mIsNSAware) ? rslv(mElm.chars) : null;
 410                             //          Skip white spaces before '>'
 411                             switch (wsskip()) {
 412                                 case '>':
 413                                     getch();  // read '>'
 414                                     mEvt = EV_ELMS;
 415                                     break;
 416 
 417                                 case '/':
 418                                     getch();  // read '/'
 419                                     if (getch() != '>') // read '>'
 420                                     {
 421                                         panic(FAULT);
 422                                     }
 423                                     mEvt = EV_ELM;
 424                                     break;
 425 
 426                                 default:
 427                                     panic(FAULT);
 428                             }
 429                             break;
 430                     }
 431                     break;
 432 
 433                 case 1:     // read white space
 434                     switch (ch) {
 435                         case ' ':
 436                         case '\t':
 437                         case '\n':
 438                             bappend(ch);
 439                             break;
 440 
 441                         case '\r':              // EOL processing [#2.11]
 442                             if (getch() != '\n') {
 443                                 bkch();
 444                             }
 445                             bappend('\n');
 446                             break;
 447 
 448                         case '<':
 449                             mEvt = EV_WSPC;
 450                             bkch();
 451                             bflash_ws();
 452                             break;
 453 
 454                         default:
 455                             bkch();
 456                             st = 2;
 457                             break;
 458                     }
 459                     break;
 460 
 461                 case 2:     // read the text content of the element
 462                     switch (ch) {
 463                         case '&':
 464                             if (mUnent == null) {
 465                                 //              There was no unresolved entity on previous step.
 466                                 if ((mUnent = ent('x')) != null) {
 467                                     mEvt = EV_TEXT;
 468                                     bkch();      // move back to ';' after entity name
 469                                     setch('&');  // parser must be back on next step
 470                                     bflash();
 471                                 }
 472                             } else {
 473                                 //              There was unresolved entity on previous step.
 474                                 mEvt = EV_ENT;
 475                                 skippedEnt(mUnent);
 476                                 mUnent = null;
 477                             }
 478                             break;
 479 
 480                         case '<':
 481                             mEvt = EV_TEXT;
 482                             bkch();
 483                             bflash();
 484                             break;
 485 
 486                         case '\r':  // EOL processing [#2.11]
 487                             if (getch() != '\n') {
 488                                 bkch();
 489                             }
 490                             bappend('\n');
 491                             break;
 492 
 493                         case EOS:
 494                             panic(FAULT);
 495 
 496                         default:
 497                             bappend(ch);
 498                             break;
 499                     }
 500                     break;
 501 
 502                 default:
 503                     panic(FAULT);
 504             }
 505         }
 506 
 507         return mEvt;
 508     }
 509 
 510     /**
 511      * Parses the document type declaration.
 512      *
 513      * @exception Exception is parser specific exception form panic method.
 514      * @exception IOException
 515      */
 516     private void dtd() throws Exception {
 517         char ch;
 518         String str = null;
 519         String name = null;
 520         Pair psid = null;
 521         // read 'DOCTYPE'
 522         if ("DOCTYPE".equals(name(false)) != true) {
 523             panic(FAULT);
 524         }
 525         mPh = PH_DTD;  // DTD
 526         for (short st = 0; st >= 0;) {
 527             ch = getch();
 528             switch (st) {
 529                 case 0:     // read the document type name
 530                     if (chtyp(ch) != ' ') {
 531                         bkch();
 532                         name = name(mIsNSAware);
 533                         wsskip();
 534                         st = 1;  // read 'PUPLIC' or 'SYSTEM'
 535                     }
 536                     break;
 537 
 538                 case 1:     // read 'PUPLIC' or 'SYSTEM'
 539                     switch (chtyp(ch)) {
 540                         case 'A':
 541                             bkch();
 542                             psid = pubsys(' ');
 543                             st = 2;  // skip spaces before internal subset
 544                             docType(name, psid.name, psid.value);
 545                             break;
 546 
 547                         case '[':
 548                             bkch();
 549                             st = 2;    // skip spaces before internal subset
 550                             docType(name, null, null);
 551                             break;
 552 
 553                         case '>':
 554                             bkch();
 555                             st = 3;    // skip spaces after internal subset
 556                             docType(name, null, null);
 557                             break;
 558 
 559                         default:
 560                             panic(FAULT);
 561                     }
 562                     break;
 563 
 564                 case 2:     // skip spaces before internal subset
 565                     switch (chtyp(ch)) {
 566                         case '[':
 567                             //          Process internal subset
 568                             dtdsub();
 569                             st = 3;  // skip spaces after internal subset
 570                             break;
 571 
 572                         case '>':
 573                             //          There is no internal subset
 574                             bkch();
 575                             st = 3;  // skip spaces after internal subset
 576                             break;
 577 
 578                         case ' ':
 579                             // skip white spaces
 580                             break;
 581 
 582                         default:
 583                             panic(FAULT);
 584                     }
 585                     break;
 586 
 587                 case 3:     // skip spaces after internal subset
 588                     switch (chtyp(ch)) {
 589                         case '>':
 590                             if (psid != null) {
 591                                 //              Report the DTD external subset
 592                                 InputSource is = resolveEnt(name, psid.name, psid.value);
 593                                 if (is != null) {
 594                                     if (mIsSAlone == false) {
 595                                         //              Set the end of DTD external subset char
 596                                         bkch();
 597                                         setch(']');
 598                                         //              Set the DTD external subset InputSource
 599                                         push(new Input(BUFFSIZE_READER));
 600                                         setinp(is);
 601                                         mInp.pubid = psid.name;
 602                                         mInp.sysid = psid.value;
 603                                         //              Parse the DTD external subset
 604                                         dtdsub();
 605                                     } else {
 606                                         //              Unresolved DTD external subset
 607                                         skippedEnt("[dtd]");
 608                                         //              Release reader and stream
 609                                         if (is.getCharacterStream() != null) {
 610                                             try {
 611                                                 is.getCharacterStream().close();
 612                                             } catch (IOException ioe) {
 613                                             }
 614                                         }
 615                                         if (is.getByteStream() != null) {
 616                                             try {
 617                                                 is.getByteStream().close();
 618                                             } catch (IOException ioe) {
 619                                             }
 620                                         }
 621                                     }
 622                                 } else {
 623                                     //          Unresolved DTD external subset
 624                                     skippedEnt("[dtd]");
 625                                 }
 626                                 del(psid);
 627                             }
 628                             st = -1;  // end of DTD
 629                             break;
 630 
 631                         case ' ':
 632                             // skip white spaces
 633                             break;
 634 
 635                         default:
 636                             panic(FAULT);
 637                     }
 638                     break;
 639 
 640                 default:
 641                     panic(FAULT);
 642             }
 643         }
 644     }
 645 
 646     /**
 647      * Parses the document type declaration subset.
 648      *
 649      * @exception Exception is parser specific exception form panic method.
 650      * @exception IOException
 651      */
 652     private void dtdsub() throws Exception {
 653         startInternalSub(); // reports the event before parsing the subset
 654 
 655         char ch;
 656         for (short st = 0; st >= 0;) {
 657             ch = getch();
 658             switch (st) {
 659                 case 0:     // skip white spaces before a declaration
 660                     switch (chtyp(ch)) {
 661                         case '<':
 662                             ch = getch();
 663                             switch (ch) {
 664                                 case '?':
 665                                     pi();
 666                                     break;
 667 
 668                                 case '!':
 669                                     ch = getch();
 670                                     bkch();
 671                                     if (ch == '-') {
 672                                         comm();
 673                                         break;
 674                                     }
 675                                     //          A markup or an entity declaration
 676                                     bntok();
 677                                     switch (bkeyword()) {
 678                                         case 'n':
 679                                             dtdent();
 680                                             break;
 681 
 682                                         case 'a':
 683                                             dtdattl();    // parse attributes declaration
 684                                             break;
 685 
 686                                         case 'e':
 687                                             dtdelm();     // parse element declaration
 688                                             break;
 689 
 690                                         case 'o':
 691                                             dtdnot();     // parse notation declaration
 692                                             break;
 693 
 694                                         default:
 695                                             panic(FAULT); // unsupported markup declaration
 696                                             break;
 697                                     }
 698                                     st = 1;  // read the end of declaration
 699                                     break;
 700 
 701                                 default:
 702                                     panic(FAULT);
 703                                     break;
 704                             }
 705                             break;
 706 
 707                         case '%':
 708                             //          A parameter entity reference
 709                             pent(' ');
 710                             break;
 711 
 712                         case ']':
 713                             //          End of DTD subset
 714                             st = -1;
 715                             break;
 716 
 717                         case ' ':
 718                             //          Skip white spaces
 719                             break;
 720 
 721                         case 'Z':
 722                             //          End of stream
 723                             if (getch() != ']') {
 724                                 panic(FAULT);
 725                             }
 726                             st = -1;
 727                             break;
 728 
 729                         default:
 730                             panic(FAULT);
 731                     }
 732                     break;
 733 
 734                 case 1:     // read the end of declaration
 735                     switch (ch) {
 736                         case '>':   // there is no notation
 737                             st = 0; // skip white spaces before a declaration
 738                             break;
 739 
 740                         case ' ':
 741                         case '\n':
 742                         case '\r':
 743                         case '\t':
 744                             //          Skip white spaces
 745                             break;
 746 
 747                         default:
 748                             panic(FAULT);
 749                             break;
 750                     }
 751                     break;
 752 
 753                 default:
 754                     panic(FAULT);
 755             }
 756         }
 757     }
 758 
 759     /**
 760      * Parses an entity declaration. This method fills the general (
 761      * <code>mEnt</code>) and parameter
 762      * (
 763      * <code>mPEnt</code>) entity look up table.
 764      *
 765      * @exception Exception is parser specific exception form panic method.
 766      * @exception IOException
 767      */
 768     @SuppressWarnings("fallthrough")
 769     private void dtdent() throws Exception {
 770         String str = null;
 771         char[] val = null;
 772         Input inp = null;
 773         Pair ids = null;
 774         char ch;
 775         for (short st = 0; st >= 0;) {
 776             ch = getch();
 777             switch (st) {
 778                 case 0:     // skip white spaces before entity name
 779                     switch (chtyp(ch)) {
 780                         case ' ':
 781                             //          Skip white spaces
 782                             break;
 783 
 784                         case '%':
 785                             //          Parameter entity or parameter entity declaration.
 786                             ch = getch();
 787                             bkch();
 788                             if (chtyp(ch) == ' ') {
 789                                 //              Parameter entity declaration.
 790                                 wsskip();
 791                                 str = name(false);
 792                                 switch (chtyp(wsskip())) {
 793                                     case 'A':
 794                                         //              Read the external identifier
 795                                         ids = pubsys(' ');
 796                                         if (wsskip() == '>') {
 797                                             //          External parsed entity
 798                                             if (mPEnt.containsKey(str) == false) {      // [#4.2]
 799                                                 inp = new Input();
 800                                                 inp.pubid = ids.name;
 801                                                 inp.sysid = ids.value;
 802                                                 mPEnt.put(str, inp);
 803                                             }
 804                                         } else {
 805                                             panic(FAULT);
 806                                         }
 807                                         del(ids);
 808                                         st = -1;  // the end of declaration
 809                                         break;
 810 
 811                                     case '\"':
 812                                     case '\'':
 813                                         //              Read the parameter entity value
 814                                         bqstr('d');
 815                                         //              Create the parameter entity value
 816                                         val = new char[mBuffIdx + 1];
 817                                         System.arraycopy(mBuff, 1, val, 1, val.length - 1);
 818                                         //              Add surrounding spaces [#4.4.8]
 819                                         val[0] = ' ';
 820                                         //              Add the entity to the entity look up table
 821                                         if (mPEnt.containsKey(str) == false) {  // [#4.2]
 822                                             inp = new Input(val);
 823                                             inp.pubid = mInp.pubid;
 824                                             inp.sysid = mInp.sysid;
 825                                             inp.xmlenc = mInp.xmlenc;
 826                                             inp.xmlver = mInp.xmlver;
 827                                             mPEnt.put(str, inp);
 828                                         }
 829                                         st = -1;  // the end of declaration
 830                                         break;
 831 
 832                                     default:
 833                                         panic(FAULT);
 834                                         break;
 835                                 }
 836                             } else {
 837                                 //              Parameter entity reference.
 838                                 pent(' ');
 839                             }
 840                             break;
 841 
 842                         default:
 843                             bkch();
 844                             str = name(false);
 845                             st = 1;  // read entity declaration value
 846                             break;
 847                     }
 848                     break;
 849 
 850                 case 1:     // read entity declaration value
 851                     switch (chtyp(ch)) {
 852                         case '\"':  // internal entity
 853                         case '\'':
 854                             bkch();
 855                             bqstr('d');  // read a string into the buffer
 856                             if (mEnt.get(str) == null) {
 857                                 //              Create general entity value
 858                                 val = new char[mBuffIdx];
 859                                 System.arraycopy(mBuff, 1, val, 0, val.length);
 860                                 //              Add the entity to the entity look up table
 861                                 if (mEnt.containsKey(str) == false) {   // [#4.2]
 862                                     inp = new Input(val);
 863                                     inp.pubid = mInp.pubid;
 864                                     inp.sysid = mInp.sysid;
 865                                     inp.xmlenc = mInp.xmlenc;
 866                                     inp.xmlver = mInp.xmlver;
 867                                     mEnt.put(str, inp);
 868                                 }
 869                             }
 870                             st = -1;  // the end of declaration
 871                             break;
 872 
 873                         case 'A':  // external entity
 874                             bkch();
 875                             ids = pubsys(' ');
 876                             switch (wsskip()) {
 877                                 case '>':  // external parsed entity
 878                                     if (mEnt.containsKey(str) == false) {  // [#4.2]
 879                                         inp = new Input();
 880                                         inp.pubid = ids.name;
 881                                         inp.sysid = ids.value;
 882                                         mEnt.put(str, inp);
 883                                     }
 884                                     break;
 885 
 886                                 case 'N':  // external general unparsed entity
 887                                     if ("NDATA".equals(name(false)) == true) {
 888                                         wsskip();
 889                                         unparsedEntDecl(str, ids.name, ids.value, name(false));
 890                                         break;
 891                                     }
 892                                 default:
 893                                     panic(FAULT);
 894                                     break;
 895                             }
 896                             del(ids);
 897                             st = -1;  // the end of declaration
 898                             break;
 899 
 900                         case ' ':
 901                             //          Skip white spaces
 902                             break;
 903 
 904                         default:
 905                             panic(FAULT);
 906                             break;
 907                     }
 908                     break;
 909 
 910                 default:
 911                     panic(FAULT);
 912             }
 913         }
 914     }
 915 
 916     /**
 917      * Parses an element declaration.
 918      *
 919      * This method parses the declaration up to the closing angle bracket.
 920      *
 921      * @exception Exception is parser specific exception form panic method.
 922      * @exception IOException
 923      */
 924     @SuppressWarnings("fallthrough")
 925     private void dtdelm() throws Exception {
 926         //              This is stub implementation which skips an element
 927         //              declaration.
 928         wsskip();
 929         name(mIsNSAware);
 930 
 931         char ch;
 932         while (true) {
 933             ch = getch();
 934             switch (ch) {
 935                 case '>':
 936                     bkch();
 937                     return;
 938 
 939                 case EOS:
 940                     panic(FAULT);
 941 
 942                 default:
 943                     break;
 944             }
 945         }
 946     }
 947 
 948     /**
 949      * Parses an attribute list declaration.
 950      *
 951      * This method parses the declaration up to the closing angle bracket.
 952      *
 953      * @exception Exception is parser specific exception form panic method.
 954      * @exception IOException
 955      */
 956     private void dtdattl() throws Exception {
 957         char elmqn[] = null;
 958         Pair elm = null;
 959         char ch;
 960         for (short st = 0; st >= 0;) {
 961             ch = getch();
 962             switch (st) {
 963                 case 0:     // read the element name
 964                     switch (chtyp(ch)) {
 965                         case 'a':
 966                         case 'A':
 967                         case '_':
 968                         case 'X':
 969                         case ':':
 970                             bkch();
 971                             //          Get the element from the list or add a new one.
 972                             elmqn = qname(mIsNSAware);
 973                             elm = find(mAttL, elmqn);
 974                             if (elm == null) {
 975                                 elm = pair(mAttL);
 976                                 elm.chars = elmqn;
 977                                 mAttL = elm;
 978                             }
 979                             st = 1;  // read an attribute declaration
 980                             break;
 981 
 982                         case ' ':
 983                             break;
 984 
 985                         case '%':
 986                             pent(' ');
 987                             break;
 988 
 989                         default:
 990                             panic(FAULT);
 991                             break;
 992                     }
 993                     break;
 994 
 995                 case 1:     // read an attribute declaration
 996                     switch (chtyp(ch)) {
 997                         case 'a':
 998                         case 'A':
 999                         case '_':
1000                         case 'X':
1001                         case ':':
1002                             bkch();
1003                             dtdatt(elm);
1004                             if (wsskip() == '>') {
1005                                 return;
1006                             }
1007                             break;
1008 
1009                         case ' ':
1010                             break;
1011 
1012                         case '%':
1013                             pent(' ');
1014                             break;
1015 
1016                         default:
1017                             panic(FAULT);
1018                             break;
1019                     }
1020                     break;
1021 
1022                 default:
1023                     panic(FAULT);
1024                     break;
1025             }
1026         }
1027     }
1028 
1029     /**
1030      * Parses an attribute declaration.
1031      *
1032      * The attribute uses the following fields of Pair object: chars - characters
1033      * of qualified name id - the type identifier of the attribute list - a pair
1034      * which holds the default value (chars field)
1035      *
1036      * @param elm An object which represents all defined attributes on an
1037      * element.
1038      * @exception Exception is parser specific exception form panic method.
1039      * @exception IOException
1040      */
1041     @SuppressWarnings("fallthrough")
1042     private void dtdatt(Pair elm) throws Exception {
1043         char attqn[] = null;
1044         Pair att = null;
1045         char ch;
1046         for (short st = 0; st >= 0;) {
1047             ch = getch();
1048             switch (st) {
1049                 case 0:     // the attribute name
1050                     switch (chtyp(ch)) {
1051                         case 'a':
1052                         case 'A':
1053                         case '_':
1054                         case 'X':
1055                         case ':':
1056                             bkch();
1057                             //          Get the attribute from the list or add a new one.
1058                             attqn = qname(mIsNSAware);
1059                             att = find(elm.list, attqn);
1060                             if (att == null) {
1061                                 //              New attribute declaration
1062                                 att = pair(elm.list);
1063                                 att.chars = attqn;
1064                                 elm.list = att;
1065                             } else {
1066                                 //              Do not override the attribute declaration [#3.3]
1067                                 att = pair(null);
1068                                 att.chars = attqn;
1069                                 att.id = 'c';
1070                             }
1071                             wsskip();
1072                             st = 1;
1073                             break;
1074 
1075                         case '%':
1076                             pent(' ');
1077                             break;
1078 
1079                         case ' ':
1080                             break;
1081 
1082                         default:
1083                             panic(FAULT);
1084                             break;
1085                     }
1086                     break;
1087 
1088                 case 1:     // the attribute type
1089                     switch (chtyp(ch)) {
1090                         case '(':
1091                             att.id = 'u';  // enumeration type
1092                             st = 2;        // read the first element of the list
1093                             break;
1094 
1095                         case '%':
1096                             pent(' ');
1097                             break;
1098 
1099                         case ' ':
1100                             break;
1101 
1102                         default:
1103                             bkch();
1104                             bntok();  // read type id
1105                             att.id = bkeyword();
1106                             switch (att.id) {
1107                                 case 'o':   // NOTATION
1108                                     if (wsskip() != '(') {
1109                                         panic(FAULT);
1110                                     }
1111                                     ch = getch();
1112                                     st = 2;  // read the first element of the list
1113                                     break;
1114 
1115                                 case 'i':     // ID
1116                                 case 'r':     // IDREF
1117                                 case 'R':     // IDREFS
1118                                 case 'n':     // ENTITY
1119                                 case 'N':     // ENTITIES
1120                                 case 't':     // NMTOKEN
1121                                 case 'T':     // NMTOKENS
1122                                 case 'c':     // CDATA
1123                                     wsskip();
1124                                     st = 4;  // read default declaration
1125                                     break;
1126 
1127                                 default:
1128                                     panic(FAULT);
1129                                     break;
1130                             }
1131                             break;
1132                     }
1133                     break;
1134 
1135                 case 2:     // read the first element of the list
1136                     switch (chtyp(ch)) {
1137                         case 'a':
1138                         case 'A':
1139                         case 'd':
1140                         case '.':
1141                         case ':':
1142                         case '-':
1143                         case '_':
1144                         case 'X':
1145                             bkch();
1146                             switch (att.id) {
1147                                 case 'u':  // enumeration type
1148                                     bntok();
1149                                     break;
1150 
1151                                 case 'o':  // NOTATION
1152                                     mBuffIdx = -1;
1153                                     bname(false);
1154                                     break;
1155 
1156                                 default:
1157                                     panic(FAULT);
1158                                     break;
1159                             }
1160                             wsskip();
1161                             st = 3;  // read next element of the list
1162                             break;
1163 
1164                         case '%':
1165                             pent(' ');
1166                             break;
1167 
1168                         case ' ':
1169                             break;
1170 
1171                         default:
1172                             panic(FAULT);
1173                             break;
1174                     }
1175                     break;
1176 
1177                 case 3:     // read next element of the list
1178                     switch (ch) {
1179                         case ')':
1180                             wsskip();
1181                             st = 4;  // read default declaration
1182                             break;
1183 
1184                         case '|':
1185                             wsskip();
1186                             switch (att.id) {
1187                                 case 'u':  // enumeration type
1188                                     bntok();
1189                                     break;
1190 
1191                                 case 'o':  // NOTATION
1192                                     mBuffIdx = -1;
1193                                     bname(false);
1194                                     break;
1195 
1196                                 default:
1197                                     panic(FAULT);
1198                                     break;
1199                             }
1200                             wsskip();
1201                             break;
1202 
1203                         case '%':
1204                             pent(' ');
1205                             break;
1206 
1207                         default:
1208                             panic(FAULT);
1209                             break;
1210                     }
1211                     break;
1212 
1213                 case 4:     // read default declaration
1214                     switch (ch) {
1215                         case '#':
1216                             bntok();
1217                             switch (bkeyword()) {
1218                                 case 'F':  // FIXED
1219                                     switch (wsskip()) {
1220                                         case '\"':
1221                                         case '\'':
1222                                             st = 5;  // read the default value
1223                                             break;
1224 
1225                                         case EOS:
1226                                             panic(FAULT);
1227 
1228                                         default:
1229                                             st = -1;
1230                                             break;
1231                                     }
1232                                     break;
1233 
1234                                 case 'Q':  // REQUIRED
1235                                 case 'I':  // IMPLIED
1236                                     st = -1;
1237                                     break;
1238 
1239                                 default:
1240                                     panic(FAULT);
1241                                     break;
1242                             }
1243                             break;
1244 
1245                         case '\"':
1246                         case '\'':
1247                             bkch();
1248                             st = 5;  // read the default value
1249                             break;
1250 
1251                         case ' ':
1252                         case '\n':
1253                         case '\r':
1254                         case '\t':
1255                             break;
1256 
1257                         case '%':
1258                             pent(' ');
1259                             break;
1260 
1261                         default:
1262                             bkch();
1263                             st = -1;
1264                             break;
1265                     }
1266                     break;
1267 
1268                 case 5:     // read the default value
1269                     switch (ch) {
1270                         case '\"':
1271                         case '\'':
1272                             bkch();
1273                             bqstr('d');  // the value in the mBuff now
1274                             att.list = pair(null);
1275                             //          Create a string like "attqname='value' "
1276                             att.list.chars = new char[att.chars.length + mBuffIdx + 3];
1277                             System.arraycopy(
1278                                     att.chars, 1, att.list.chars, 0, att.chars.length - 1);
1279                             att.list.chars[att.chars.length - 1] = '=';
1280                             att.list.chars[att.chars.length] = ch;
1281                             System.arraycopy(
1282                                     mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx);
1283                             att.list.chars[att.chars.length + mBuffIdx + 1] = ch;
1284                             att.list.chars[att.chars.length + mBuffIdx + 2] = ' ';
1285                             st = -1;
1286                             break;
1287 
1288                         default:
1289                             panic(FAULT);
1290                             break;
1291                     }
1292                     break;
1293 
1294                 default:
1295                     panic(FAULT);
1296                     break;
1297             }
1298         }
1299     }
1300 
1301     /**
1302      * Parses a notation declaration.
1303      *
1304      * This method parses the declaration up to the closing angle bracket.
1305      *
1306      * @exception Exception is parser specific exception form panic method.
1307      * @exception IOException
1308      */
1309     private void dtdnot() throws Exception {
1310         wsskip();
1311         String name = name(false);
1312         wsskip();
1313         Pair ids = pubsys('N');
1314         notDecl(name, ids.name, ids.value);
1315         del(ids);
1316     }
1317 
1318     /**
1319      * Parses an attribute.
1320      *
1321      * This recursive method is responsible for prefix addition
1322      * (
1323      * <code>mPref</code>) on the way down. The element's start tag end triggers
1324      * the return process. The method then on it's way back resolves prefixes
1325      * and accumulates attributes.
1326      *
1327      * <p><code>att.num</code> carries attribute flags where: 0x1 - attribute is
1328      * declared in DTD (attribute decalration had been read); 0x2 - attribute's
1329      * default value is used.</p>
1330      *
1331      * @param att An object which reprecents current attribute.
1332      * @exception Exception is parser specific exception form panic method.
1333      * @exception IOException
1334      */
1335     @SuppressWarnings("fallthrough")
1336     private void attr(Pair att) throws Exception {
1337         switch (wsskip()) {
1338             case '/':
1339             case '>':
1340                 if ((att.num & 0x2) == 0) {  // all attributes have been read
1341                     att.num |= 0x2;  // set default attribute flag
1342                     Input inp = mInp;
1343                     //          Go through all attributes defined on current element.
1344                     for (Pair def = mElm.list; def != null; def = def.next) {
1345                         if (def.list == null) // no default value
1346                         {
1347                             continue;
1348                         }
1349                         //              Go through all attributes defined on current
1350                         //              element and add defaults.
1351                         Pair act = find(att.next, def.chars);
1352                         if (act == null) {
1353                             push(new Input(def.list.chars));
1354                         }
1355                     }
1356                     if (mInp != inp) {  // defaults have been added
1357                         attr(att);
1358                         return;
1359                     }
1360                 }
1361                 //              Ensure the attribute string array capacity
1362                 mAttrs.setLength(mAttrIdx);
1363                 mItems = mAttrs.mItems;
1364                 return;
1365 
1366             case EOS:
1367                 panic(FAULT);
1368 
1369             default:
1370                 //              Read the attribute name and value
1371                 att.chars = qname(mIsNSAware);
1372                 att.name = att.local();
1373                 String type = atype(att);  // sets attribute's type on att.id
1374                 wsskip();
1375                 if (getch() != '=') {
1376                     panic(FAULT);
1377                 }
1378                 bqstr((char) att.id);   // read the value with normalization.
1379                 String val = new String(mBuff, 1, mBuffIdx);
1380                 Pair next = pair(att);
1381                 next.num = (att.num & ~0x1);  // inherit attribute flags
1382                 //              Put a namespace declaration on top of the prefix stack
1383                 if ((mIsNSAware == false) || (isdecl(att, val) == false)) {
1384                     //          An ordinary attribute
1385                     mAttrIdx++;
1386                     attr(next);     // recursive call to parse the next attribute
1387                     mAttrIdx--;
1388                     //          Add the attribute to the attributes string array
1389                     char idx = (char) (mAttrIdx << 3);
1390                     mItems[idx + 1] = att.qname();  // attr qname
1391                     mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name
1392                     mItems[idx + 3] = val;          // attr value
1393                     mItems[idx + 4] = type;         // attr type
1394                     switch (att.num & 0x3) {
1395                         case 0x0:
1396                             mItems[idx + 5] = null;
1397                             break;
1398 
1399                         case 0x1:  // declared attribute
1400                             mItems[idx + 5] = "d";
1401                             break;
1402 
1403                         default:  // 0x2, 0x3 - default attribute always declared
1404                             mItems[idx + 5] = "D";
1405                             break;
1406                     }
1407                     //          Resolve the prefix if any and report the attribute
1408                     //          NOTE: The attribute does not accept the default namespace.
1409                     mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars) : "";
1410                 } else {
1411                     //          A namespace declaration. mPref.name contains prefix and
1412                     //          mPref.value contains namespace URI set by isdecl method.
1413                     //          Report a start of the new mapping
1414                     newPrefix();
1415                     //          Recursive call to parse the next attribute
1416                     attr(next);
1417                     //          NOTE: The namespace declaration is not reported.
1418                 }
1419                 del(next);
1420                 break;
1421         }
1422     }
1423 
1424     /**
1425      * Retrieves attribute type.
1426      *
1427      * This method sets the type of normalization in the attribute
1428      * <code>id</code> field and returns the name of attribute type.
1429      *
1430      * @param att An object which represents current attribute.
1431      * @return The name of the attribute type.
1432      * @exception Exception is parser specific exception form panic method.
1433      */
1434     private String atype(Pair att)
1435             throws Exception {
1436         Pair attr;
1437 
1438         // CDATA-type normalization by default [#3.3.3]
1439         att.id = 'c';
1440         if (mElm.list == null || (attr = find(mElm.list, att.chars)) == null) {
1441             return "CDATA";
1442         }
1443 
1444         att.num |= 0x1;  // attribute is declared
1445 
1446         // Non-CDATA normalization except when the attribute type is CDATA.
1447         att.id = 'i';
1448         switch (attr.id) {
1449             case 'i':
1450                 return "ID";
1451 
1452             case 'r':
1453                 return "IDREF";
1454 
1455             case 'R':
1456                 return "IDREFS";
1457 
1458             case 'n':
1459                 return "ENTITY";
1460 
1461             case 'N':
1462                 return "ENTITIES";
1463 
1464             case 't':
1465                 return "NMTOKEN";
1466 
1467             case 'T':
1468                 return "NMTOKENS";
1469 
1470             case 'u':
1471                 return "NMTOKEN";
1472 
1473             case 'o':
1474                 return "NOTATION";
1475 
1476             case 'c':
1477                 att.id = 'c';
1478                 return "CDATA";
1479 
1480             default:
1481                 panic(FAULT);
1482         }
1483         return null;
1484     }
1485 
1486     /**
1487      * Parses a comment.
1488      *
1489      * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1490      * with first &apos;-&apos; after &apos;&lt;!&apos;.
1491      *
1492      * @exception Exception is parser specific exception form panic method.
1493      */
1494     @SuppressWarnings("fallthrough")
1495     private void comm() throws Exception {
1496         if (mPh == PH_DOC_START) {
1497             mPh = PH_MISC_DTD;  // misc before DTD
1498         }               // '<!' has been already read by dispetcher.
1499         char ch;
1500         mBuffIdx = -1;
1501         for (short st = 0; st >= 0;) {
1502             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1503             if (ch == EOS) {
1504                 panic(FAULT);
1505             }
1506             switch (st) {
1507                 case 0:     // first '-' of the comment open
1508                     if (ch == '-') {
1509                         st = 1;
1510                     } else {
1511                         panic(FAULT);
1512                     }
1513                     break;
1514 
1515                 case 1:     // secind '-' of the comment open
1516                     if (ch == '-') {
1517                         st = 2;
1518                     } else {
1519                         panic(FAULT);
1520                     }
1521                     break;
1522 
1523                 case 2:     // skip the comment body
1524                     switch (ch) {
1525                         case '-':
1526                             st = 3;
1527                             break;
1528 
1529                         default:
1530                             bappend(ch);
1531                             break;
1532                     }
1533                     break;
1534 
1535                 case 3:     // second '-' of the comment close
1536                     switch (ch) {
1537                         case '-':
1538                             st = 4;
1539                             break;
1540 
1541                         default:
1542                             bappend('-');
1543                             bappend(ch);
1544                             st = 2;
1545                             break;
1546                     }
1547                     break;
1548 
1549                 case 4:     // '>' of the comment close
1550                     if (ch == '>') {
1551                         comm(mBuff, mBuffIdx + 1);
1552                         st = -1;
1553                         break;
1554                     }
1555                 // else - panic [#2.5 compatibility note]
1556 
1557                 default:
1558                     panic(FAULT);
1559             }
1560         }
1561     }
1562 
1563     /**
1564      * Parses a processing instruction.
1565      *
1566      * The &apos;&lt;?&apos; is read in dispatcher so the method starts with
1567      * first character of PI target name after &apos;&lt;?&apos;.
1568      *
1569      * @exception Exception is parser specific exception form panic method.
1570      * @exception IOException
1571      */
1572     private void pi() throws Exception {
1573         // '<?' has been already read by dispetcher.
1574         char ch;
1575         String str = null;
1576         mBuffIdx = -1;
1577         for (short st = 0; st >= 0;) {
1578             ch = getch();
1579             if (ch == EOS) {
1580                 panic(FAULT);
1581             }
1582             switch (st) {
1583                 case 0:     // read the PI target name
1584                     switch (chtyp(ch)) {
1585                         case 'a':
1586                         case 'A':
1587                         case '_':
1588                         case ':':
1589                         case 'X':
1590                             bkch();
1591                             str = name(false);
1592                             //          PI target name may not be empty string [#2.6]
1593                             //          PI target name 'XML' is reserved [#2.6]
1594                             if ((str.length() == 0)
1595                                     || (mXml.name.equals(str.toLowerCase()) == true)) {
1596                                 panic(FAULT);
1597                             }
1598                             //          This is processing instruction
1599                             if (mPh == PH_DOC_START) // the begining of the document
1600                             {
1601                                 mPh = PH_MISC_DTD;    // misc before DTD
1602                             }
1603                             wsskip();  // skip spaces after the PI target name
1604                             st = 1;    // accumulate the PI body
1605                             mBuffIdx = -1;
1606                             break;
1607 
1608                         default:
1609                             panic(FAULT);
1610                     }
1611                     break;
1612 
1613                 case 1:     // accumulate the PI body
1614                     switch (ch) {
1615                         case '?':
1616                             st = 2;  // end of the PI body
1617                             break;
1618 
1619                         default:
1620                             bappend(ch);
1621                             break;
1622                     }
1623                     break;
1624 
1625                 case 2:     // end of the PI body
1626                     switch (ch) {
1627                         case '>':
1628                             //          PI has been read.
1629                             pi(str, new String(mBuff, 0, mBuffIdx + 1));
1630                             st = -1;
1631                             break;
1632 
1633                         case '?':
1634                             bappend('?');
1635                             break;
1636 
1637                         default:
1638                             bappend('?');
1639                             bappend(ch);
1640                             st = 1;  // accumulate the PI body
1641                             break;
1642                     }
1643                     break;
1644 
1645                 default:
1646                     panic(FAULT);
1647             }
1648         }
1649     }
1650 
1651     /**
1652      * Parses a character data.
1653      *
1654      * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1655      * with first &apos;[&apos; after &apos;&lt;!&apos;.
1656      *
1657      * @exception Exception is parser specific exception form panic method.
1658      * @exception IOException
1659      */
1660     private void cdat()
1661             throws Exception {
1662         // '<!' has been already read by dispetcher.
1663         char ch;
1664         mBuffIdx = -1;
1665         for (short st = 0; st >= 0;) {
1666             ch = getch();
1667             switch (st) {
1668                 case 0:     // the first '[' of the CDATA open
1669                     if (ch == '[') {
1670                         st = 1;
1671                     } else {
1672                         panic(FAULT);
1673                     }
1674                     break;
1675 
1676                 case 1:     // read "CDATA"
1677                     if (chtyp(ch) == 'A') {
1678                         bappend(ch);
1679                     } else {
1680                         if ("CDATA".equals(
1681                                 new String(mBuff, 0, mBuffIdx + 1)) != true) {
1682                             panic(FAULT);
1683                         }
1684                         bkch();
1685                         st = 2;
1686                     }
1687                     break;
1688 
1689                 case 2:     // the second '[' of the CDATA open
1690                     if (ch != '[') {
1691                         panic(FAULT);
1692                     }
1693                     mBuffIdx = -1;
1694                     st = 3;
1695                     break;
1696 
1697                 case 3:     // read data before the first ']'
1698                     if (ch != ']') {
1699                         bappend(ch);
1700                     } else {
1701                         st = 4;
1702                     }
1703                     break;
1704 
1705                 case 4:     // read the second ']' or continue to read the data
1706                     if (ch != ']') {
1707                         bappend(']');
1708                         bappend(ch);
1709                         st = 3;
1710                     } else {
1711                         st = 5;
1712                     }
1713                     break;
1714 
1715                 case 5:     // read '>' or continue to read the data
1716                     switch (ch) {
1717                         case ']':
1718                             bappend(']');
1719                             break;
1720 
1721                         case '>':
1722                             bflash();
1723                             st = -1;
1724                             break;
1725 
1726                         default:
1727                             bappend(']');
1728                             bappend(']');
1729                             bappend(ch);
1730                             st = 3;
1731                             break;
1732                     }
1733                     break;
1734 
1735                 default:
1736                     panic(FAULT);
1737             }
1738         }
1739     }
1740 
1741     /**
1742      * Reads a xml name.
1743      *
1744      * The xml name must conform "Namespaces in XML" specification. Therefore
1745      * the ':' character is not allowed in the name. This method should be used
1746      * for PI and entity names which may not have a namespace according to the
1747      * specification mentioned above.
1748      *
1749      * @param ns The true value turns namespace conformance on.
1750      * @return The name has been read.
1751      * @exception Exception When incorrect character appear in the name.
1752      * @exception IOException
1753      */
1754     protected String name(boolean ns)
1755             throws Exception {
1756         mBuffIdx = -1;
1757         bname(ns);
1758         return new String(mBuff, 1, mBuffIdx);
1759     }
1760 
1761     /**
1762      * Reads a qualified xml name.
1763      *
1764      * The characters of a qualified name is an array of characters. The first
1765      * (chars[0]) character is the index of the colon character which separates
1766      * the prefix from the local name. If the index is zero, the name does not
1767      * contain separator or the parser works in the namespace unaware mode. The
1768      * length of qualified name is the length of the array minus one.
1769      *
1770      * @param ns The true value turns namespace conformance on.
1771      * @return The characters of a qualified name.
1772      * @exception Exception When incorrect character appear in the name.
1773      * @exception IOException
1774      */
1775     protected char[] qname(boolean ns)
1776             throws Exception {
1777         mBuffIdx = -1;
1778         bname(ns);
1779         char chars[] = new char[mBuffIdx + 1];
1780         System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
1781         return chars;
1782     }
1783 
1784     /**
1785      * Reads the public or/and system identifiers.
1786      *
1787      * @param inp The input object.
1788      * @exception Exception is parser specific exception form panic method.
1789      * @exception IOException
1790      */
1791     private void pubsys(Input inp)
1792             throws Exception {
1793         Pair pair = pubsys(' ');
1794         inp.pubid = pair.name;
1795         inp.sysid = pair.value;
1796         del(pair);
1797     }
1798 
1799     /**
1800      * Reads the public or/and system identifiers.
1801      *
1802      * @param flag The 'N' allows public id be without system id.
1803      * @return The public or/and system identifiers pair.
1804      * @exception Exception is parser specific exception form panic method.
1805      * @exception IOException
1806      */
1807     @SuppressWarnings("fallthrough")
1808     private Pair pubsys(char flag) throws Exception {
1809         Pair ids = pair(null);
1810         String str = name(false);
1811         if ("PUBLIC".equals(str) == true) {
1812             bqstr('i');  // non-CDATA normalization [#4.2.2]
1813             ids.name = new String(mBuff, 1, mBuffIdx);
1814             switch (wsskip()) {
1815                 case '\"':
1816                 case '\'':
1817                     bqstr(' ');
1818                     ids.value = new String(mBuff, 1, mBuffIdx);
1819                     break;
1820 
1821                 case EOS:
1822                     panic(FAULT);
1823 
1824                 default:
1825                     if (flag != 'N') // [#4.7]
1826                     {
1827                         panic(FAULT);
1828                     }
1829                     ids.value = null;
1830                     break;
1831             }
1832             return ids;
1833         } else if ("SYSTEM".equals(str) == true) {
1834             ids.name = null;
1835             bqstr(' ');
1836             ids.value = new String(mBuff, 1, mBuffIdx);
1837             return ids;
1838         }
1839         panic(FAULT);
1840         return null;
1841     }
1842 
1843     /**
1844      * Reads an attribute value.
1845      *
1846      * The grammar this method can read is:
1847      * <pre>{@code
1848      * eqstr := S "=" qstr
1849      * qstr  := S ("'" string "'") | ('"' string '"')
1850      * }</pre>
1851      * This method resolves entities
1852      * inside a string unless the parser parses DTD.
1853      *
1854      * @param flag The '=' character forces the method to accept the '='
1855      * character before quoted string and read the following string as not an
1856      * attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization;
1857      * '-' - not an attribute value; 'd' - in DTD context.
1858      * @return The content of the quoted strign as a string.
1859      * @exception Exception is parser specific exception form panic method.
1860      * @exception IOException
1861      */
1862     protected String eqstr(char flag) throws Exception {
1863         if (flag == '=') {
1864             wsskip();
1865             if (getch() != '=') {
1866                 panic(FAULT);
1867             }
1868         }
1869         bqstr((flag == '=') ? '-' : flag);
1870         return new String(mBuff, 1, mBuffIdx);
1871     }
1872 
1873     /**
1874      * Resoves an entity.
1875      *
1876      * This method resolves built-in and character entity references. It is also
1877      * reports external entities to the application.
1878      *
1879      * @param flag The 'x' character forces the method to report a skipped
1880      * entity; 'i' character - indicates non-CDATA normalization.
1881      * @return Name of unresolved entity or <code>null</code> if entity had been
1882      * resolved successfully.
1883      * @exception Exception is parser specific exception form panic method.
1884      * @exception IOException
1885      */
1886     @SuppressWarnings("fallthrough")
1887     private String ent(char flag) throws Exception {
1888         char ch;
1889         int idx = mBuffIdx + 1;
1890         Input inp = null;
1891         String str = null;
1892         mESt = 0x100;  // reset the built-in entity recognizer
1893         bappend('&');
1894         for (short st = 0; st >= 0;) {
1895             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1896             switch (st) {
1897                 case 0:     // the first character of the entity name
1898                 case 1:     // read built-in entity name
1899                     switch (chtyp(ch)) {
1900                         case 'd':
1901                         case '.':
1902                         case '-':
1903                             if (st != 1) {
1904                                 panic(FAULT);
1905                             }
1906                         case 'a':
1907                         case 'A':
1908                         case '_':
1909                         case 'X':
1910                             bappend(ch);
1911                             eappend(ch);
1912                             st = 1;
1913                             break;
1914 
1915                         case ':':
1916                             if (mIsNSAware != false) {
1917                                 panic(FAULT);
1918                             }
1919                             bappend(ch);
1920                             eappend(ch);
1921                             st = 1;
1922                             break;
1923 
1924                         case ';':
1925                             if (mESt < 0x100) {
1926                                 //              The entity is a built-in entity
1927                                 mBuffIdx = idx - 1;
1928                                 bappend(mESt);
1929                                 st = -1;
1930                                 break;
1931                             } else if (mPh == PH_DTD) {
1932                                 //              In DTD entity declaration has to resolve character
1933                                 //              entities and include "as is" others. [#4.4.7]
1934                                 bappend(';');
1935                                 st = -1;
1936                                 break;
1937                             }
1938                             //          Convert an entity name to a string
1939                             str = new String(mBuff, idx + 1, mBuffIdx - idx);
1940                             inp = mEnt.get(str);
1941                             //          Restore the buffer offset
1942                             mBuffIdx = idx - 1;
1943                             if (inp != null) {
1944                                 if (inp.chars == null) {
1945                                     //          External entity
1946                                     InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
1947                                     if (is != null) {
1948                                         push(new Input(BUFFSIZE_READER));
1949                                         setinp(is);
1950                                         mInp.pubid = inp.pubid;
1951                                         mInp.sysid = inp.sysid;
1952                                         str = null;  // the entity is resolved
1953                                     } else {
1954                                         //              Unresolved external entity
1955                                         if (flag != 'x') {
1956                                             panic(FAULT);  // unknown entity within marckup
1957                                         }                                                               //              str is name of unresolved entity
1958                                     }
1959                                 } else {
1960                                     //          Internal entity
1961                                     push(inp);
1962                                     str = null;  // the entity is resolved
1963                                 }
1964                             } else {
1965                                 //              Unknown or general unparsed entity
1966                                 if (flag != 'x') {
1967                                     panic(FAULT);  // unknown entity within marckup
1968                                 }                                               //              str is name of unresolved entity
1969                             }
1970                             st = -1;
1971                             break;
1972 
1973                         case '#':
1974                             if (st != 0) {
1975                                 panic(FAULT);
1976                             }
1977                             st = 2;
1978                             break;
1979 
1980                         default:
1981                             panic(FAULT);
1982                     }
1983                     break;
1984 
1985                 case 2:     // read character entity
1986                     switch (chtyp(ch)) {
1987                         case 'd':
1988                             bappend(ch);
1989                             break;
1990 
1991                         case ';':
1992                             //          Convert the character entity to a character
1993                             try {
1994                                 int i = Integer.parseInt(
1995                                         new String(mBuff, idx + 1, mBuffIdx - idx), 10);
1996                                 if (i >= 0xffff) {
1997                                     panic(FAULT);
1998                                 }
1999                                 ch = (char) i;
2000                             } catch (NumberFormatException nfe) {
2001                                 panic(FAULT);
2002                             }
2003                             //          Restore the buffer offset
2004                             mBuffIdx = idx - 1;
2005                             if (ch == ' ' || mInp.next != null) {
2006                                 bappend(ch, flag);
2007                             } else {
2008                                 bappend(ch);
2009                             }
2010                             st = -1;
2011                             break;
2012 
2013                         case 'a':
2014                             //          If the entity buffer is empty and ch == 'x'
2015                             if ((mBuffIdx == idx) && (ch == 'x')) {
2016                                 st = 3;
2017                                 break;
2018                             }
2019                         default:
2020                             panic(FAULT);
2021                     }
2022                     break;
2023 
2024                 case 3:     // read hex character entity
2025                     switch (chtyp(ch)) {
2026                         case 'A':
2027                         case 'a':
2028                         case 'd':
2029                             bappend(ch);
2030                             break;
2031 
2032                         case ';':
2033                             //          Convert the character entity to a character
2034                             try {
2035                                 int i = Integer.parseInt(
2036                                         new String(mBuff, idx + 1, mBuffIdx - idx), 16);
2037                                 if (i >= 0xffff) {
2038                                     panic(FAULT);
2039                                 }
2040                                 ch = (char) i;
2041                             } catch (NumberFormatException nfe) {
2042                                 panic(FAULT);
2043                             }
2044                             //          Restore the buffer offset
2045                             mBuffIdx = idx - 1;
2046                             if (ch == ' ' || mInp.next != null) {
2047                                 bappend(ch, flag);
2048                             } else {
2049                                 bappend(ch);
2050                             }
2051                             st = -1;
2052                             break;
2053 
2054                         default:
2055                             panic(FAULT);
2056                     }
2057                     break;
2058 
2059                 default:
2060                     panic(FAULT);
2061             }
2062         }
2063 
2064         return str;
2065     }
2066 
2067     /**
2068      * Resoves a parameter entity.
2069      *
2070      * This method resolves a parameter entity references. It is also reports
2071      * external entities to the application.
2072      *
2073      * @param flag The '-' instruct the method to do not set up surrounding
2074      * spaces [#4.4.8].
2075      * @exception Exception is parser specific exception form panic method.
2076      * @exception IOException
2077      */
2078     @SuppressWarnings("fallthrough")
2079     private void pent(char flag) throws Exception {
2080         char ch;
2081         int idx = mBuffIdx + 1;
2082         Input inp = null;
2083         String str = null;
2084         bappend('%');
2085         if (mPh != PH_DTD) // the DTD internal subset
2086         {
2087             return;         // Not Recognized [#4.4.1]
2088         }               //              Read entity name
2089         bname(false);
2090         str = new String(mBuff, idx + 2, mBuffIdx - idx - 1);
2091         if (getch() != ';') {
2092             panic(FAULT);
2093         }
2094         inp = mPEnt.get(str);
2095         //              Restore the buffer offset
2096         mBuffIdx = idx - 1;
2097         if (inp != null) {
2098             if (inp.chars == null) {
2099                 //              External parameter entity
2100                 InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
2101                 if (is != null) {
2102                     if (flag != '-') {
2103                         bappend(' ');  // tail space
2104                     }
2105                     push(new Input(BUFFSIZE_READER));
2106                     // BUG: there is no leading space! [#4.4.8]
2107                     setinp(is);
2108                     mInp.pubid = inp.pubid;
2109                     mInp.sysid = inp.sysid;
2110                 } else {
2111                     //          Unresolved external parameter entity
2112                     skippedEnt("%" + str);
2113                 }
2114             } else {
2115                 //              Internal parameter entity
2116                 if (flag == '-') {
2117                     //          No surrounding spaces
2118                     inp.chIdx = 1;
2119                 } else {
2120                     //          Insert surrounding spaces
2121                     bappend(' ');  // tail space
2122                     inp.chIdx = 0;
2123                 }
2124                 push(inp);
2125             }
2126         } else {
2127             //          Unknown parameter entity
2128             skippedEnt("%" + str);
2129         }
2130     }
2131 
2132     /**
2133      * Recognizes and handles a namespace declaration.
2134      *
2135      * This method identifies a type of namespace declaration if any and puts
2136      * new mapping on top of prefix stack.
2137      *
2138      * @param name The attribute qualified name (<code>name.value</code> is a
2139      * <code>String</code> object which represents the attribute prefix).
2140      * @param value The attribute value.
2141      * @return <code>true</code> if a namespace declaration is recognized.
2142      */
2143     private boolean isdecl(Pair name, String value) {
2144         if (name.chars[0] == 0) {
2145             if ("xmlns".equals(name.name) == true) {
2146                 //              New default namespace declaration
2147                 mPref = pair(mPref);
2148                 mPref.list = mElm;  // prefix owner element
2149                 mPref.value = value;
2150                 mPref.name = "";
2151                 mPref.chars = NONS;
2152                 mElm.num++;  // namespace counter
2153                 return true;
2154             }
2155         } else {
2156             if (name.eqpref(XMLNS) == true) {
2157                 //              New prefix declaration
2158                 int len = name.name.length();
2159                 mPref = pair(mPref);
2160                 mPref.list = mElm;  // prefix owner element
2161                 mPref.value = value;
2162                 mPref.name = name.name;
2163                 mPref.chars = new char[len + 1];
2164                 mPref.chars[0] = (char) (len + 1);
2165                 name.name.getChars(0, len, mPref.chars, 1);
2166                 mElm.num++;  // namespace counter
2167                 return true;
2168             }
2169         }
2170         return false;
2171     }
2172 
2173     /**
2174      * Resolves a prefix.
2175      *
2176      * @return The namespace assigned to the prefix.
2177      * @exception Exception When mapping for specified prefix is not found.
2178      */
2179     private String rslv(char[] qname)
2180             throws Exception {
2181         for (Pair pref = mPref; pref != null; pref = pref.next) {
2182             if (pref.eqpref(qname) == true) {
2183                 return pref.value;
2184             }
2185         }
2186         if (qname[0] == 1) {  // QNames like ':local'
2187             for (Pair pref = mPref; pref != null; pref = pref.next) {
2188                 if (pref.chars[0] == 0) {
2189                     return pref.value;
2190                 }
2191             }
2192         }
2193         panic(FAULT);
2194         return null;
2195     }
2196 
2197     /**
2198      * Skips xml white space characters.
2199      *
2200      * This method skips white space characters (' ', '\t', '\n', '\r') and
2201      * looks ahead not white space character.
2202      *
2203      * @return The first not white space look ahead character.
2204      * @exception IOException
2205      */
2206     protected char wsskip()
2207             throws IOException {
2208         char ch;
2209         while (true) {
2210             //          Read next character
2211             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2212             if (ch < 0x80) {
2213                 if (nmttyp[ch] != 3) // [ \t\n\r]
2214                 {
2215                     break;
2216                 }
2217             } else {
2218                 break;
2219             }
2220         }
2221         mChIdx--;  // bkch();
2222         return ch;
2223     }
2224 
2225     /**
2226      * Reports document type.
2227      *
2228      * @param name The name of the entity.
2229      * @param pubid The public identifier of the entity or <code>null</code>.
2230      * @param sysid The system identifier of the entity or <code>null</code>.
2231      */
2232     protected abstract void docType(String name, String pubid, String sysid)
2233             throws SAXException;
2234 
2235     /**
2236      * Reports the start of DTD internal subset.
2237      *
2238      * @throws SAXException if the receiver throws SAXException
2239      */
2240     public abstract void startInternalSub ()  throws SAXException;
2241 
2242     /**
2243      * Reports a comment.
2244      *
2245      * @param text The comment text starting from first charcater.
2246      * @param length The number of characters in comment.
2247      */
2248     protected abstract void comm(char[] text, int length);
2249 
2250     /**
2251      * Reports a processing instruction.
2252      *
2253      * @param target The processing instruction target name.
2254      * @param body The processing instruction body text.
2255      */
2256     protected abstract void pi(String target, String body)
2257             throws Exception;
2258 
2259     /**
2260      * Reports new namespace prefix. The Namespace prefix (
2261      * <code>mPref.name</code>) being declared and the Namespace URI (
2262      * <code>mPref.value</code>) the prefix is mapped to. An empty string is
2263      * used for the default element namespace, which has no prefix.
2264      */
2265     protected abstract void newPrefix()
2266             throws Exception;
2267 
2268     /**
2269      * Reports skipped entity name.
2270      *
2271      * @param name The entity name.
2272      */
2273     protected abstract void skippedEnt(String name)
2274             throws Exception;
2275 
2276     /**
2277      * Returns an
2278      * <code>InputSource</code> for specified entity or
2279      * <code>null</code>.
2280      *
2281      * @param name The name of the entity.
2282      * @param pubid The public identifier of the entity.
2283      * @param sysid The system identifier of the entity.
2284      */
2285     protected abstract InputSource resolveEnt(
2286             String name, String pubid, String sysid)
2287             throws Exception;
2288 
2289     /**
2290      * Reports notation declaration.
2291      *
2292      * @param name The notation's name.
2293      * @param pubid The notation's public identifier, or null if none was given.
2294      * @param sysid The notation's system identifier, or null if none was given.
2295      */
2296     protected abstract void notDecl(String name, String pubid, String sysid)
2297             throws Exception;
2298 
2299     /**
2300      * Reports unparsed entity name.
2301      *
2302      * @param name The unparsed entity's name.
2303      * @param pubid The entity's public identifier, or null if none was given.
2304      * @param sysid The entity's system identifier.
2305      * @param notation The name of the associated notation.
2306      */
2307     protected abstract void unparsedEntDecl(
2308             String name, String pubid, String sysid, String notation)
2309             throws Exception;
2310 
2311     /**
2312      * Notifies the handler about fatal parsing error.
2313      *
2314      * @param msg The problem description message.
2315      */
2316     protected abstract void panic(String msg)
2317             throws Exception;
2318 
2319     /**
2320      * Reads a qualified xml name.
2321      *
2322      * This is low level routine which leaves a qName in the buffer. The
2323      * characters of a qualified name is an array of characters. The first
2324      * (chars[0]) character is the index of the colon character which separates
2325      * the prefix from the local name. If the index is zero, the name does not
2326      * contain separator or the parser works in the namespace unaware mode. The
2327      * length of qualified name is the length of the array minus one.
2328      *
2329      * @param ns The true value turns namespace conformance on.
2330      * @exception Exception is parser specific exception form panic method.
2331      * @exception IOException
2332      */
2333     private void bname(boolean ns)
2334             throws Exception {
2335         char ch;
2336         char type;
2337         mBuffIdx++;  // allocate a char for colon offset
2338         int bqname = mBuffIdx;
2339         int bcolon = bqname;
2340         int bchidx = bqname + 1;
2341         int bstart = bchidx;
2342         int cstart = mChIdx;
2343         short st = (short) ((ns == true) ? 0 : 2);
2344         while (true) {
2345             //          Read next character
2346             if (mChIdx >= mChLen) {
2347                 bcopy(cstart, bstart);
2348                 getch();
2349                 mChIdx--;  // bkch();
2350                 cstart = mChIdx;
2351                 bstart = bchidx;
2352             }
2353             ch = mChars[mChIdx++];
2354             type = (char) 0;  // [X]
2355             if (ch < 0x80) {
2356                 type = (char) nmttyp[ch];
2357             } else if (ch == EOS) {
2358                 panic(FAULT);
2359             }
2360             //          Parse QName
2361             switch (st) {
2362                 case 0:     // read the first char of the prefix
2363                 case 2:     // read the first char of the suffix
2364                     switch (type) {
2365                         case 0:  // [aA_X]
2366                             bchidx++;  // append char to the buffer
2367                             st++;      // (st == 0)? 1: 3;
2368                             break;
2369 
2370                         case 1:  // [:]
2371                             mChIdx--;  // bkch();
2372                             st++;      // (st == 0)? 1: 3;
2373                             break;
2374 
2375                         default:
2376                             panic(FAULT);
2377                     }
2378                     break;
2379 
2380                 case 1:     // read the prefix
2381                 case 3:     // read the suffix
2382                     switch (type) {
2383                         case 0:  // [aA_X]
2384                         case 2:  // [.-d]
2385                             bchidx++;  // append char to the buffer
2386                             break;
2387 
2388                         case 1:  // [:]
2389                             bchidx++;  // append char to the buffer
2390                             if (ns == true) {
2391                                 if (bcolon != bqname) {
2392                                     panic(FAULT);  // it must be only one colon
2393                                 }
2394                                 bcolon = bchidx - 1;
2395                                 if (st == 1) {
2396                                     st = 2;
2397                                 }
2398                             }
2399                             break;
2400 
2401                         default:
2402                             mChIdx--;  // bkch();
2403                             bcopy(cstart, bstart);
2404                             mBuff[bqname] = (char) (bcolon - bqname);
2405                             return;
2406                     }
2407                     break;
2408 
2409                 default:
2410                     panic(FAULT);
2411             }
2412         }
2413     }
2414 
2415     /**
2416      * Reads a nmtoken.
2417      *
2418      * This is low level routine which leaves a nmtoken in the buffer.
2419      *
2420      * @exception Exception is parser specific exception form panic method.
2421      * @exception IOException
2422      */
2423     @SuppressWarnings("fallthrough")
2424     private void bntok() throws Exception {
2425         char ch;
2426         mBuffIdx = -1;
2427         bappend((char) 0);  // default offset to the colon char
2428         while (true) {
2429             ch = getch();
2430             switch (chtyp(ch)) {
2431                 case 'a':
2432                 case 'A':
2433                 case 'd':
2434                 case '.':
2435                 case ':':
2436                 case '-':
2437                 case '_':
2438                 case 'X':
2439                     bappend(ch);
2440                     break;
2441 
2442                 case 'Z':
2443                     panic(FAULT);
2444 
2445                 default:
2446                     bkch();
2447                     return;
2448             }
2449         }
2450     }
2451 
2452     /**
2453      * Recognizes a keyword.
2454      *
2455      * This is low level routine which recognizes one of keywords in the buffer.
2456      * Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN -
2457      * t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED -
2458      * Q IMPLIED - I FIXED - F
2459      *
2460      * @return an id of a keyword or '?'.
2461      * @exception Exception is parser specific exception form panic method.
2462      * @exception IOException
2463      */
2464     private char bkeyword()
2465             throws Exception {
2466         String str = new String(mBuff, 1, mBuffIdx);
2467         switch (str.length()) {
2468             case 2:  // ID
2469                 return ("ID".equals(str) == true) ? 'i' : '?';
2470 
2471             case 5:  // IDREF, CDATA, FIXED
2472                 switch (mBuff[1]) {
2473                     case 'I':
2474                         return ("IDREF".equals(str) == true) ? 'r' : '?';
2475                     case 'C':
2476                         return ("CDATA".equals(str) == true) ? 'c' : '?';
2477                     case 'F':
2478                         return ("FIXED".equals(str) == true) ? 'F' : '?';
2479                     default:
2480                         break;
2481                 }
2482                 break;
2483 
2484             case 6:  // IDREFS, ENTITY
2485                 switch (mBuff[1]) {
2486                     case 'I':
2487                         return ("IDREFS".equals(str) == true) ? 'R' : '?';
2488                     case 'E':
2489                         return ("ENTITY".equals(str) == true) ? 'n' : '?';
2490                     default:
2491                         break;
2492                 }
2493                 break;
2494 
2495             case 7:  // NMTOKEN, IMPLIED, ATTLIST, ELEMENT
2496                 switch (mBuff[1]) {
2497                     case 'I':
2498                         return ("IMPLIED".equals(str) == true) ? 'I' : '?';
2499                     case 'N':
2500                         return ("NMTOKEN".equals(str) == true) ? 't' : '?';
2501                     case 'A':
2502                         return ("ATTLIST".equals(str) == true) ? 'a' : '?';
2503                     case 'E':
2504                         return ("ELEMENT".equals(str) == true) ? 'e' : '?';
2505                     default:
2506                         break;
2507                 }
2508                 break;
2509 
2510             case 8:  // ENTITIES, NMTOKENS, NOTATION, REQUIRED
2511                 switch (mBuff[2]) {
2512                     case 'N':
2513                         return ("ENTITIES".equals(str) == true) ? 'N' : '?';
2514                     case 'M':
2515                         return ("NMTOKENS".equals(str) == true) ? 'T' : '?';
2516                     case 'O':
2517                         return ("NOTATION".equals(str) == true) ? 'o' : '?';
2518                     case 'E':
2519                         return ("REQUIRED".equals(str) == true) ? 'Q' : '?';
2520                     default:
2521                         break;
2522                 }
2523                 break;
2524 
2525             default:
2526                 break;
2527         }
2528         return '?';
2529     }
2530 
2531     /**
2532      * Reads a single or double quotted string in to the buffer.
2533      *
2534      * This method resolves entities inside a string unless the parser parses
2535      * DTD.
2536      *
2537      * @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' -
2538      * not an attribute value; 'd' - in DTD context.
2539      * @exception Exception is parser specific exception form panic method.
2540      * @exception IOException
2541      */
2542     @SuppressWarnings("fallthrough")
2543     private void bqstr(char flag) throws Exception {
2544         Input inp = mInp;  // remember the original input
2545         mBuffIdx = -1;
2546         bappend((char) 0);  // default offset to the colon char
2547         char ch;
2548         for (short st = 0; st >= 0;) {
2549             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2550             switch (st) {
2551                 case 0:     // read a single or double quote
2552                     switch (ch) {
2553                         case ' ':
2554                         case '\n':
2555                         case '\r':
2556                         case '\t':
2557                             break;
2558 
2559                         case '\'':
2560                             st = 2;  // read a single quoted string
2561                             break;
2562 
2563                         case '\"':
2564                             st = 3;  // read a double quoted string
2565                             break;
2566 
2567                         default:
2568                             panic(FAULT);
2569                             break;
2570                     }
2571                     break;
2572 
2573                 case 2:     // read a single quoted string
2574                 case 3:     // read a double quoted string
2575                     switch (ch) {
2576                         case '\'':
2577                             if ((st == 2) && (mInp == inp)) {
2578                                 st = -1;
2579                             } else {
2580                                 bappend(ch);
2581                             }
2582                             break;
2583 
2584                         case '\"':
2585                             if ((st == 3) && (mInp == inp)) {
2586                                 st = -1;
2587                             } else {
2588                                 bappend(ch);
2589                             }
2590                             break;
2591 
2592                         case '&':
2593                             if (flag != 'd') {
2594                                 ent(flag);
2595                             } else {
2596                                 bappend(ch);
2597                             }
2598                             break;
2599 
2600                         case '%':
2601                             if (flag == 'd') {
2602                                 pent('-');
2603                             } else {
2604                                 bappend(ch);
2605                             }
2606                             break;
2607 
2608                         case '<':
2609                             if ((flag == '-') || (flag == 'd')) {
2610                                 bappend(ch);
2611                             } else {
2612                                 panic(FAULT);
2613                             }
2614                             break;
2615 
2616                         case EOS:               // EOS before single/double quote
2617                             panic(FAULT);
2618 
2619                         case '\r':     // EOL processing [#2.11 & #3.3.3]
2620                             if (flag != ' ' && mInp.next == null) {
2621                                 if (getch() != '\n') {
2622                                     bkch();
2623                                 }
2624                                 ch = '\n';
2625                             }
2626                         default:
2627                             bappend(ch, flag);
2628                             break;
2629                     }
2630                     break;
2631 
2632                 default:
2633                     panic(FAULT);
2634             }
2635         }
2636         //              There is maximum one space at the end of the string in
2637         //              i-mode (non CDATA normalization) and it has to be removed.
2638         if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) {
2639             mBuffIdx -= 1;
2640         }
2641     }
2642 
2643     /**
2644      * Reports characters and empties the parser's buffer. This method is called
2645      * only if parser is going to return control to the main loop. This means
2646      * that this method may use parser buffer to report white space without
2647      * copying characters to temporary buffer.
2648      */
2649     protected abstract void bflash()
2650             throws Exception;
2651 
2652     /**
2653      * Reports white space characters and empties the parser's buffer. This
2654      * method is called only if parser is going to return control to the main
2655      * loop. This means that this method may use parser buffer to report white
2656      * space without copying characters to temporary buffer.
2657      */
2658     protected abstract void bflash_ws()
2659             throws Exception;
2660 
2661     /**
2662      * Appends a character to parser's buffer with normalization.
2663      *
2664      * @param ch The character to append to the buffer.
2665      * @param mode The normalization mode.
2666      */
2667     private void bappend(char ch, char mode) {
2668         //              This implements attribute value normalization as
2669         //              described in the XML specification [#3.3.3].
2670         switch (mode) {
2671             case 'i':  // non CDATA normalization
2672                 switch (ch) {
2673                     case ' ':
2674                     case '\n':
2675                     case '\r':
2676                     case '\t':
2677                         if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) {
2678                             bappend(' ');
2679                         }
2680                         return;
2681 
2682                     default:
2683                         break;
2684                 }
2685                 break;
2686 
2687             case 'c':  // CDATA normalization
2688                 switch (ch) {
2689                     case '\n':
2690                     case '\r':
2691                     case '\t':
2692                         ch = ' ';
2693                         break;
2694 
2695                     default:
2696                         break;
2697                 }
2698                 break;
2699 
2700             default:  // no normalization
2701                 break;
2702         }
2703         mBuffIdx++;
2704         if (mBuffIdx < mBuff.length) {
2705             mBuff[mBuffIdx] = ch;
2706         } else {
2707             mBuffIdx--;
2708             bappend(ch);
2709         }
2710     }
2711 
2712     /**
2713      * Appends a character to parser's buffer.
2714      *
2715      * @param ch The character to append to the buffer.
2716      */
2717     private void bappend(char ch) {
2718         try {
2719             mBuff[++mBuffIdx] = ch;
2720         } catch (Exception exp) {
2721             //          Double the buffer size
2722             char buff[] = new char[mBuff.length << 1];
2723             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2724             mBuff = buff;
2725             mBuff[mBuffIdx] = ch;
2726         }
2727     }
2728 
2729     /**
2730      * Appends (mChIdx - cidx) characters from character buffer (mChars) to
2731      * parser's buffer (mBuff).
2732      *
2733      * @param cidx The character buffer (mChars) start index.
2734      * @param bidx The parser buffer (mBuff) start index.
2735      */
2736     private void bcopy(int cidx, int bidx) {
2737         int length = mChIdx - cidx;
2738         if ((bidx + length + 1) >= mBuff.length) {
2739             //          Expand the buffer
2740             char buff[] = new char[mBuff.length + length];
2741             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2742             mBuff = buff;
2743         }
2744         System.arraycopy(mChars, cidx, mBuff, bidx, length);
2745         mBuffIdx += length;
2746     }
2747 
2748     /**
2749      * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
2750      * <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe
2751      * 0x100 is a built-in entity replacement character.
2752      *
2753      * @param ch the next character of an entity name.
2754      */
2755     @SuppressWarnings("fallthrough")
2756     private void eappend(char ch) {
2757         switch (mESt) {
2758             case 0x100:  // "l" or "g" or "a" or "q"
2759                 switch (ch) {
2760                     case 'l':
2761                         mESt = 0x101;
2762                         break;
2763                     case 'g':
2764                         mESt = 0x102;
2765                         break;
2766                     case 'a':
2767                         mESt = 0x103;
2768                         break;
2769                     case 'q':
2770                         mESt = 0x107;
2771                         break;
2772                     default:
2773                         mESt = 0x200;
2774                         break;
2775                 }
2776                 break;
2777 
2778             case 0x101:  // "lt"
2779                 mESt = (ch == 't') ? '<' : (char) 0x200;
2780                 break;
2781 
2782             case 0x102:  // "gt"
2783                 mESt = (ch == 't') ? '>' : (char) 0x200;
2784                 break;
2785 
2786             case 0x103:  // "am" or "ap"
2787                 switch (ch) {
2788                     case 'm':
2789                         mESt = 0x104;
2790                         break;
2791                     case 'p':
2792                         mESt = 0x105;
2793                         break;
2794                     default:
2795                         mESt = 0x200;
2796                         break;
2797                 }
2798                 break;
2799 
2800             case 0x104:  // "amp"
2801                 mESt = (ch == 'p') ? '&' : (char) 0x200;
2802                 break;
2803 
2804             case 0x105:  // "apo"
2805                 mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200;
2806                 break;
2807 
2808             case 0x106:  // "apos"
2809                 mESt = (ch == 's') ? '\'' : (char) 0x200;
2810                 break;
2811 
2812             case 0x107:  // "qu"
2813                 mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200;
2814                 break;
2815 
2816             case 0x108:  // "quo"
2817                 mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200;
2818                 break;
2819 
2820             case 0x109:  // "quot"
2821                 mESt = (ch == 't') ? '\"' : (char) 0x200;
2822                 break;
2823 
2824             case '<':   // "lt"
2825             case '>':   // "gt"
2826             case '&':   // "amp"
2827             case '\'':  // "apos"
2828             case '\"':  // "quot"
2829                 mESt = 0x200;
2830             default:
2831                 break;
2832         }
2833     }
2834 
2835     /**
2836      * Sets up a new input source on the top of the input stack. Note, the first
2837      * byte returned by the entity's byte stream has to be the first byte in the
2838      * entity. However, the parser does not expect the byte order mask in both
2839      * cases when encoding is provided by the input source.
2840      *
2841      * @param is A new input source to set up.
2842      * @exception IOException If any IO errors occur.
2843      * @exception Exception is parser specific exception form panic method.
2844      */
2845     protected void setinp(InputSource is)
2846             throws Exception {
2847         Reader reader = null;
2848         mChIdx = 0;
2849         mChLen = 0;
2850         mChars = mInp.chars;
2851         mInp.src = null;
2852         if (mPh < PH_DOC_START) {
2853             mIsSAlone = false;  // default [#2.9]
2854         }
2855         mIsSAloneSet = false;
2856         if (is.getCharacterStream() != null) {
2857             //          Ignore encoding in the xml text decl.
2858             reader = is.getCharacterStream();
2859             xml(reader);
2860         } else if (is.getByteStream() != null) {
2861             String expenc;
2862             if (is.getEncoding() != null) {
2863                 //              Ignore encoding in the xml text decl.
2864                 expenc = is.getEncoding().toUpperCase();
2865                 if (expenc.equals("UTF-16")) {
2866                     reader = bom(is.getByteStream(), 'U');  // UTF-16 [#4.3.3]
2867                 } else {
2868                     reader = enc(expenc, is.getByteStream());
2869                 }
2870                 xml(reader);
2871             } else {
2872                 //              Get encoding from BOM or the xml text decl.
2873                 reader = bom(is.getByteStream(), ' ');
2874                 /**
2875                  * [#4.3.3] requires BOM for UTF-16, however, it's not uncommon
2876                  * that it may be missing. A mature technique exists in Xerces
2877                  * to further check for possible UTF-16 encoding
2878                  */
2879                 if (reader == null) {
2880                     reader = utf16(is.getByteStream());
2881                 }
2882 
2883                 if (reader == null) {
2884                     //          Encoding is defined by the xml text decl.
2885                     reader = enc("UTF-8", is.getByteStream());
2886                     expenc = xml(reader);
2887                     if (!expenc.equals("UTF-8")) {
2888                         if (expenc.startsWith("UTF-16")) {
2889                             panic(FAULT);  // UTF-16 must have BOM [#4.3.3]
2890                         }
2891                         reader = enc(expenc, is.getByteStream());
2892                     }
2893                 } else {
2894                     //          Encoding is defined by the BOM.
2895                     xml(reader);
2896                 }
2897             }
2898         } else {
2899             //          There is no support for public/system identifiers.
2900             panic(FAULT);
2901         }
2902         mInp.src = reader;
2903         mInp.pubid = is.getPublicId();
2904         mInp.sysid = is.getSystemId();
2905     }
2906 
2907     /**
2908      * Determines the entity encoding.
2909      *
2910      * This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the
2911      * first byte returned by the entity's byte stream has to be the first byte
2912      * in the entity. Also, there is no support for UCS-4.
2913      *
2914      * @param is A byte stream of the entity.
2915      * @param hint An encoding hint, character U means UTF-16.
2916      * @return a reader constructed from the BOM or UTF-8 by default.
2917      * @exception Exception is parser specific exception form panic method.
2918      * @exception IOException
2919      */
2920     private Reader bom(InputStream is, char hint)
2921             throws Exception {
2922         int val = is.read();
2923         switch (val) {
2924             case 0xef:     // UTF-8
2925                 if (hint == 'U') // must be UTF-16
2926                 {
2927                     panic(FAULT);
2928                 }
2929                 if (is.read() != 0xbb) {
2930                     panic(FAULT);
2931                 }
2932                 if (is.read() != 0xbf) {
2933                     panic(FAULT);
2934                 }
2935                 return new ReaderUTF8(is);
2936 
2937             case 0xfe:     // UTF-16, big-endian
2938                 if (is.read() != 0xff) {
2939                     panic(FAULT);
2940                 }
2941                 return new ReaderUTF16(is, 'b');
2942 
2943             case 0xff:     // UTF-16, little-endian
2944                 if (is.read() != 0xfe) {
2945                     panic(FAULT);
2946                 }
2947                 return new ReaderUTF16(is, 'l');
2948 
2949             case -1:
2950                 mChars[mChIdx++] = EOS;
2951                 return new ReaderUTF8(is);
2952 
2953             default:
2954                 if (hint == 'U') // must be UTF-16
2955                 {
2956                     panic(FAULT);
2957                 }
2958                 //              Read the rest of UTF-8 character
2959                 switch (val & 0xf0) {
2960                     case 0xc0:
2961                     case 0xd0:
2962                         mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f));
2963                         break;
2964 
2965                     case 0xe0:
2966                         mChars[mChIdx++] = (char) (((val & 0x0f) << 12)
2967                                 | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
2968                         break;
2969 
2970                     case 0xf0:  // UCS-4 character
2971                         throw new UnsupportedEncodingException();
2972 
2973                     default:
2974                         mChars[mChIdx++] = (char) val;
2975                         break;
2976                 }
2977                 return null;
2978         }
2979     }
2980 
2981 
2982     /**
2983      * Using a mature technique from Xerces, this method checks further after
2984      * the bom method above to see if the encoding is UTF-16
2985      *
2986      * @param is A byte stream of the entity.
2987      * @return a reader, may be null
2988      * @exception Exception is parser specific exception form panic method.
2989      * @exception IOException
2990      */
2991     private Reader utf16(InputStream is)
2992             throws Exception {
2993         if (mChIdx != 0) {
2994             //The bom method has read ONE byte into the buffer.
2995             byte b0 = (byte)mChars[0];
2996             if (b0 == 0x00 || b0 == 0x3C) {
2997                 int b1 = is.read();
2998                 int b2 = is.read();
2999                 int b3 = is.read();
3000                 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
3001                     // UTF-16, big-endian, no BOM
3002                     mChars[0] = (char)(b1);
3003                     mChars[mChIdx++] = (char)(b3);
3004                     return new ReaderUTF16(is, 'b');
3005                 } else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
3006                     // UTF-16, little-endian, no BOM
3007                     mChars[0] = (char)(b0);
3008                     mChars[mChIdx++] = (char)(b2);
3009                     return new ReaderUTF16(is, 'l');
3010                 } else {
3011                     /**not every InputStream supports reset, so we have to remember
3012                      * the state for further parsing
3013                     **/
3014                     mChars[0] = (char)(b0);
3015                     mChars[mChIdx++] = (char)(b1);
3016                     mChars[mChIdx++] = (char)(b2);
3017                     mChars[mChIdx++] = (char)(b3);
3018                 }
3019 
3020             }
3021         }
3022         return null;
3023     }
3024     /**
3025      * Parses the xml text declaration.
3026      *
3027      * This method gets encoding from the xml text declaration [#4.3.1] if any.
3028      * The method assumes the buffer (mChars) is big enough to accommodate whole
3029      * xml text declaration.
3030      *
3031      * @param reader is entity reader.
3032      * @return The xml text declaration encoding or default UTF-8 encoding.
3033      * @exception Exception is parser specific exception form panic method.
3034      * @exception IOException
3035      */
3036     private String xml(Reader reader)
3037             throws Exception {
3038         String str = null;
3039         String enc = "UTF-8";
3040         char ch;
3041         int val;
3042         short st = 0;
3043         int byteRead =  mChIdx; //number of bytes read prior to entering this method
3044 
3045         while (st >= 0 && mChIdx < mChars.length) {
3046             if (st < byteRead) {
3047                 ch = mChars[st];
3048             } else {
3049                 ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
3050                 mChars[mChIdx++] = ch;
3051             }
3052 
3053             switch (st) {
3054                 case 0:     // read '<' of xml declaration
3055                     switch (ch) {
3056                         case '<':
3057                             st = 1;
3058                             break;
3059 
3060                         case 0xfeff:    // the byte order mask
3061                             ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
3062                             mChars[mChIdx - 1] = ch;
3063                             st = (short) ((ch == '<') ? 1 : -1);
3064                             break;
3065 
3066                         default:
3067                             st = -1;
3068                             break;
3069                     }
3070                     break;
3071 
3072                 case 1:     // read '?' of xml declaration [#4.3.1]
3073                     st = (short) ((ch == '?') ? 2 : -1);
3074                     break;
3075 
3076                 case 2:     // read 'x' of xml declaration [#4.3.1]
3077                     st = (short) ((ch == 'x') ? 3 : -1);
3078                     break;
3079 
3080                 case 3:     // read 'm' of xml declaration [#4.3.1]
3081                     st = (short) ((ch == 'm') ? 4 : -1);
3082                     break;
3083 
3084                 case 4:     // read 'l' of xml declaration [#4.3.1]
3085                     st = (short) ((ch == 'l') ? 5 : -1);
3086                     break;
3087 
3088                 case 5:     // read white space after 'xml'
3089                     switch (ch) {
3090                         case ' ':
3091                         case '\t':
3092                         case '\r':
3093                         case '\n':
3094                             st = 6;
3095                             break;
3096 
3097                         default:
3098                             st = -1;
3099                             break;
3100                     }
3101                     break;
3102 
3103                 case 6:     // read content of xml declaration
3104                     switch (ch) {
3105                         case '?':
3106                             st = 7;
3107                             break;
3108 
3109                         case EOS:
3110                             st = -2;
3111                             break;
3112 
3113                         default:
3114                             break;
3115                     }
3116                     break;
3117 
3118                 case 7:     // read '>' after '?' of xml declaration
3119                     switch (ch) {
3120                         case '>':
3121                         case EOS:
3122                             st = -2;
3123                             break;
3124 
3125                         default:
3126                             st = 6;
3127                             break;
3128                     }
3129                     break;
3130 
3131                 default:
3132                     panic(FAULT);
3133                     break;
3134             }
3135         }
3136         mChLen = mChIdx;
3137         mChIdx = 0;
3138         //              If there is no xml text declaration, the encoding is default.
3139         if (st == -1) {
3140             return enc;
3141         }
3142         mChIdx = 5;  // the first white space after "<?xml"
3143         //              Parse the xml text declaration
3144         for (st = 0; st >= 0;) {
3145             ch = getch();
3146             switch (st) {
3147                 case 0:     // skip spaces after the xml declaration name
3148                     if (chtyp(ch) != ' ') {
3149                         bkch();
3150                         st = 1;
3151                     }
3152                     break;
3153 
3154                 case 1:     // read xml declaration version
3155                 case 2:     // read xml declaration encoding or standalone
3156                 case 3:     // read xml declaration standalone
3157                     switch (chtyp(ch)) {
3158                         case 'a':
3159                         case 'A':
3160                         case '_':
3161                             bkch();
3162                             str = name(false).toLowerCase();
3163                             if ("version".equals(str) == true) {
3164                                 if (st != 1) {
3165                                     panic(FAULT);
3166                                 }
3167                                 if ("1.0".equals(eqstr('=')) != true) {
3168                                     panic(FAULT);
3169                                 }
3170                                 mInp.xmlver = 0x0100;
3171                                 st = 2;
3172                             } else if ("encoding".equals(str) == true) {
3173                                 if (st != 2) {
3174                                     panic(FAULT);
3175                                 }
3176                                 mInp.xmlenc = eqstr('=').toUpperCase();
3177                                 enc = mInp.xmlenc;
3178                                 st = 3;
3179                             } else if ("standalone".equals(str) == true) {
3180                                 if ((st == 1) || (mPh >= PH_DOC_START)) // [#4.3.1]
3181                                 {
3182                                     panic(FAULT);
3183                                 }
3184                                 str = eqstr('=').toLowerCase();
3185                                 //              Check the 'standalone' value and use it [#5.1]
3186                                 if (str.equals("yes") == true) {
3187                                     mIsSAlone = true;
3188                                 } else if (str.equals("no") == true) {
3189                                     mIsSAlone = false;
3190                                 } else {
3191                                     panic(FAULT);
3192                                 }
3193                                 mIsSAloneSet = true;
3194                                 st = 4;
3195                             } else {
3196                                 panic(FAULT);
3197                             }
3198                             break;
3199 
3200                         case ' ':
3201                             break;
3202 
3203                         case '?':
3204                             if (st == 1) {
3205                                 panic(FAULT);
3206                             }
3207                             bkch();
3208                             st = 4;
3209                             break;
3210 
3211                         default:
3212                             panic(FAULT);
3213                     }
3214                     break;
3215 
3216                 case 4:     // end of xml declaration
3217                     switch (chtyp(ch)) {
3218                         case '?':
3219                             if (getch() != '>') {
3220                                 panic(FAULT);
3221                             }
3222                             if (mPh <= PH_DOC_START) {
3223                                 mPh = PH_MISC_DTD;  // misc before DTD
3224                             }
3225                             st = -1;
3226                             break;
3227 
3228                         case ' ':
3229                             break;
3230 
3231                         default:
3232                             panic(FAULT);
3233                     }
3234                     break;
3235 
3236                 default:
3237                     panic(FAULT);
3238             }
3239         }
3240         return enc;
3241     }
3242 
3243     /**
3244      * Sets up the document reader.
3245      *
3246      * @param name an encoding name.
3247      * @param is the document byte input stream.
3248      * @return a reader constructed from encoding name and input stream.
3249      * @exception UnsupportedEncodingException
3250      */
3251     private Reader enc(String name, InputStream is)
3252             throws UnsupportedEncodingException {
3253         //              DO NOT CLOSE current reader if any!
3254         if (name.equals("UTF-8")) {
3255             return new ReaderUTF8(is);
3256         } else if (name.equals("UTF-16LE")) {
3257             return new ReaderUTF16(is, 'l');
3258         } else if (name.equals("UTF-16BE")) {
3259             return new ReaderUTF16(is, 'b');
3260         } else {
3261             return new InputStreamReader(is, name);
3262         }
3263     }
3264 
3265     /**
3266      * Sets up current input on the top of the input stack.
3267      *
3268      * @param inp A new input to set up.
3269      */
3270     protected void push(Input inp) {
3271         mInp.chLen = mChLen;
3272         mInp.chIdx = mChIdx;
3273         inp.next = mInp;
3274         mInp = inp;
3275         mChars = inp.chars;
3276         mChLen = inp.chLen;
3277         mChIdx = inp.chIdx;
3278     }
3279 
3280     /**
3281      * Restores previous input on the top of the input stack.
3282      */
3283     protected void pop() {
3284         if (mInp.src != null) {
3285             try {
3286                 mInp.src.close();
3287             } catch (IOException ioe) {
3288             }
3289             mInp.src = null;
3290         }
3291         mInp = mInp.next;
3292         if (mInp != null) {
3293             mChars = mInp.chars;
3294             mChLen = mInp.chLen;
3295             mChIdx = mInp.chIdx;
3296         } else {
3297             mChars = null;
3298             mChLen = 0;
3299             mChIdx = 0;
3300         }
3301     }
3302 
3303     /**
3304      * Maps a character to its type.
3305      *
3306      * Possible character type values are:
3307      * <ul>
3308      * <li>' ' - for any kind of whitespace character;</li>
3309      * <li>'a' - for any lower case alphabetical character value;</li>
3310      * <li>'A' - for any upper case alphabetical character value;</li>
3311      * <li>'d' - for any decimal digit character value;</li>
3312      * <li>'z' - for any character less than ' ' except '\t', '\n', '\r';</li>
3313      * <li>'X' - for any not ASCII character;</li>
3314      * <li>'Z' - for EOS character.</li>
3315      * </ul>
3316      * An ASCII (7 bit) character which does not fall in any category
3317      * listed above is mapped to itself.
3318      *
3319      * @param ch The character to map.
3320      * @return The type of character.
3321      */
3322     protected char chtyp(char ch) {
3323         if (ch < 0x80) {
3324             return (char) asctyp[ch];
3325         }
3326         return (ch != EOS) ? 'X' : 'Z';
3327     }
3328 
3329     /**
3330      * Retrives the next character in the document.
3331      *
3332      * @return The next character in the document.
3333      */
3334     protected char getch()
3335             throws IOException {
3336         if (mChIdx >= mChLen) {
3337             if (mInp.src == null) {
3338                 pop();  // remove internal entity
3339                 return getch();
3340             }
3341             //          Read new portion of the document characters
3342             int Num = mInp.src.read(mChars, 0, mChars.length);
3343             if (Num < 0) {
3344                 if (mInp != mDoc) {
3345                     pop();  // restore the previous input
3346                     return getch();
3347                 } else {
3348                     mChars[0] = EOS;
3349                     mChLen = 1;
3350                 }
3351             } else {
3352                 mChLen = Num;
3353             }
3354             mChIdx = 0;
3355         }
3356         return mChars[mChIdx++];
3357     }
3358 
3359     /**
3360      * Puts back the last read character.
3361      *
3362      * This method <strong>MUST NOT</strong> be called more then once after each
3363      * call of {@link #getch getch} method.
3364      */
3365     protected void bkch()
3366             throws Exception {
3367         if (mChIdx <= 0) {
3368             panic(FAULT);
3369         }
3370         mChIdx--;
3371     }
3372 
3373     /**
3374      * Sets the current character.
3375      *
3376      * @param ch The character to set.
3377      */
3378     protected void setch(char ch) {
3379         mChars[mChIdx] = ch;
3380     }
3381 
3382     /**
3383      * Finds a pair in the pair chain by a qualified name.
3384      *
3385      * @param chain The first element of the chain of pairs.
3386      * @param qname The qualified name.
3387      * @return A pair with the specified qualified name or null.
3388      */
3389     protected Pair find(Pair chain, char[] qname) {
3390         for (Pair pair = chain; pair != null; pair = pair.next) {
3391             if (pair.eqname(qname) == true) {
3392                 return pair;
3393             }
3394         }
3395         return null;
3396     }
3397 
3398     /**
3399      * Provedes an instance of a pair.
3400      *
3401      * @param next The reference to a next pair.
3402      * @return An instance of a pair.
3403      */
3404     protected Pair pair(Pair next) {
3405         Pair pair;
3406 
3407         if (mDltd != null) {
3408             pair = mDltd;
3409             mDltd = pair.next;
3410         } else {
3411             pair = new Pair();
3412         }
3413         pair.next = next;
3414 
3415         return pair;
3416     }
3417 
3418     /**
3419      * Deletes an instance of a pair.
3420      *
3421      * @param pair The pair to delete.
3422      * @return A reference to the next pair in a chain.
3423      */
3424     protected Pair del(Pair pair) {
3425         Pair next = pair.next;
3426 
3427         pair.name = null;
3428         pair.value = null;
3429         pair.chars = null;
3430         pair.list = null;
3431         pair.next = mDltd;
3432         mDltd = pair;
3433 
3434         return next;
3435     }
3436 }