1 /*
   2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.internal.util.xml.impl;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import java.io.InputStreamReader;
  31 import java.io.Reader;
  32 import java.io.UnsupportedEncodingException;
  33 import java.util.HashMap;
  34 import java.util.Map;
  35 import jdk.internal.org.xml.sax.InputSource;
  36 import jdk.internal.org.xml.sax.SAXException;
  37 
  38 /**
  39  * XML non-validating parser engine.
  40  */
  41 public abstract class Parser {
  42 
  43     public static final String FAULT = "";
  44     protected static final int BUFFSIZE_READER = 512;
  45     protected static final int BUFFSIZE_PARSER = 128;
  46     /**
  47      * The end of stream character.
  48      */
  49     public static final char EOS = 0xffff;
  50     private Pair mNoNS; // there is no namespace
  51     private Pair mXml;  // the xml namespace
  52     private Map<String, Input> mEnt;  // the entities look up table
  53     private Map<String, Input> mPEnt; // the parmeter entities look up table
  54     protected boolean mIsSAlone;     // xml decl standalone flag
  55     protected boolean mIsSAloneSet;  // standalone is explicitely set
  56     protected boolean mIsNSAware;    // if true - namespace aware mode
  57     protected int mPh;  // current phase of document processing
  58     protected static final int PH_BEFORE_DOC = -1;  // before parsing
  59     protected static final int PH_DOC_START = 0;   // document start
  60     protected static final int PH_MISC_DTD = 1;   // misc before DTD
  61     protected static final int PH_DTD = 2;   // DTD
  62     protected static final int PH_DTD_MISC = 3;   // misc after DTD
  63     protected static final int PH_DOCELM = 4;   // document's element
  64     protected static final int PH_DOCELM_MISC = 5;   // misc after element
  65     protected static final int PH_AFTER_DOC = 6;   // after parsing
  66     protected int mEvt;  // current event type
  67     protected static final int EV_NULL = 0;   // unknown
  68     protected static final int EV_ELM = 1;   // empty element
  69     protected static final int EV_ELMS = 2;   // start element
  70     protected static final int EV_ELME = 3;   // end element
  71     protected static final int EV_TEXT = 4;   // textual content
  72     protected static final int EV_WSPC = 5;   // white space content
  73     protected static final int EV_PI = 6;   // processing instruction
  74     protected static final int EV_CDAT = 7;   // character data
  75     protected static final int EV_COMM = 8;   // comment
  76     protected static final int EV_DTD = 9;   // document type definition
  77     protected static final int EV_ENT = 10;  // skipped entity
  78     private char mESt; // built-in entity recognizer state
  79     // mESt values:
  80     //   0x100   : the initial state
  81     //   > 0x100 : unrecognized name
  82     //   < 0x100 : replacement character
  83     protected char[] mBuff;       // parser buffer
  84     protected int mBuffIdx;    // index of the last char
  85     protected Pair mPref;       // stack of prefixes
  86     protected Pair mElm;        // stack of elements
  87     // mAttL.chars - element qname
  88     // mAttL.next  - next element
  89     // mAttL.list  - list of attributes defined on this element
  90     // mAttL.list.chars - attribute qname
  91     // mAttL.list.id    - a char representing attribute's type see below
  92     // mAttL.list.next  - next attribute defined on the element
  93     // mAttL.list.list  - devault value structure or null
  94     // mAttL.list.list.chars - "name='value' " chars array for Input
  95     //
  96     // Attribute type character values:
  97     // 'i' - "ID"
  98     // 'r' - "IDREF"
  99     // 'R' - "IDREFS"
 100     // 'n' - "ENTITY"
 101     // 'N' - "ENTITIES"
 102     // 't' - "NMTOKEN"
 103     // 'T' - "NMTOKENS"
 104     // 'u' - enumeration type
 105     // 'o' - "NOTATION"
 106     // 'c' - "CDATA"
 107     // see also: bkeyword() and atype()
 108     //
 109     protected Pair mAttL;       // list of defined attrs by element name
 110     protected Input mDoc;        // document entity
 111     protected Input mInp;        // stack of entities
 112     private char[] mChars;      // reading buffer
 113     private int mChLen;      // current capacity
 114     private int mChIdx;      // index to the next char
 115     protected Attrs mAttrs;      // attributes of the curr. element
 116     private String[] mItems;      // attributes array of the curr. element
 117     private char mAttrIdx;    // attributes counter/index
 118     private String mUnent;  // unresolved entity name
 119     private Pair mDltd;   // deleted objects for reuse
 120     /**
 121      * Default prefixes
 122      */
 123     private static final char NONS[];
 124     private static final char XML[];
 125     private static final char XMLNS[];
 126 
 127     static {
 128         NONS = new char[1];
 129         NONS[0] = (char) 0;
 130 
 131         XML = new char[4];
 132         XML[0] = (char) 4;
 133         XML[1] = 'x';
 134         XML[2] = 'm';
 135         XML[3] = 'l';
 136 
 137         XMLNS = new char[6];
 138         XMLNS[0] = (char) 6;
 139         XMLNS[1] = 'x';
 140         XMLNS[2] = 'm';
 141         XMLNS[3] = 'l';
 142         XMLNS[4] = 'n';
 143         XMLNS[5] = 's';
 144     }
 145     /**
 146      * ASCII character type array.
 147      *
 148      * This array maps an ASCII (7 bit) character to the character type.<br>
 149      * Possible character type values are:<br> - ' ' for any kind of white
 150      * space character;<br> - 'a' for any lower case alphabetical character
 151      * value;<br> - 'A' for any upper case alphabetical character value;<br>
 152      * - 'd' for any decimal digit character value;<br> - 'z' for any
 153      * character less than ' ' except '\t', '\n', '\r';<br> An ASCII (7 bit)
 154      * character which does not fall in any category listed above is mapped to
 155      * it self.
 156      */
 157     private static final byte asctyp[];
 158     /**
 159      * NMTOKEN character type array.
 160      *
 161      * This array maps an ASCII (7 bit) character to the character type.<br>
 162      * Possible character type values are:<br> - 0 for underscore ('_') or any
 163      * lower and upper case alphabetical character value;<br> - 1 for colon
 164      * (':') character;<br> - 2 for dash ('-') and dot ('.') or any decimal
 165      * digit character value;<br> - 3 for any kind of white space character<br>
 166      * An ASCII (7 bit) character which does not fall in any category listed
 167      * above is mapped to 0xff.
 168      */
 169     private static final byte nmttyp[];
 170 
 171     /**
 172      * Static constructor.
 173      *
 174      * Sets up the ASCII character type array which is used by
 175      * {@link #asctyp asctyp} method and NMTOKEN character type array.
 176      */
 177     static {
 178         short i = 0;
 179 
 180         asctyp = new byte[0x80];
 181         while (i < ' ') {
 182             asctyp[i++] = (byte) 'z';
 183         }
 184         asctyp['\t'] = (byte) ' ';
 185         asctyp['\r'] = (byte) ' ';
 186         asctyp['\n'] = (byte) ' ';
 187         while (i < '0') {
 188             asctyp[i] = (byte) i++;
 189         }
 190         while (i <= '9') {
 191             asctyp[i++] = (byte) 'd';
 192         }
 193         while (i < 'A') {
 194             asctyp[i] = (byte) i++;
 195         }
 196         while (i <= 'Z') {
 197             asctyp[i++] = (byte) 'A';
 198         }
 199         while (i < 'a') {
 200             asctyp[i] = (byte) i++;
 201         }
 202         while (i <= 'z') {
 203             asctyp[i++] = (byte) 'a';
 204         }
 205         while (i < 0x80) {
 206             asctyp[i] = (byte) i++;
 207         }
 208 
 209         nmttyp = new byte[0x80];
 210         for (i = 0; i < '0'; i++) {
 211             nmttyp[i] = (byte) 0xff;
 212         }
 213         while (i <= '9') {
 214             nmttyp[i++] = (byte) 2;  // digits
 215         }
 216         while (i < 'A') {
 217             nmttyp[i++] = (byte) 0xff;
 218         }
 219         // skiped upper case alphabetical character are already 0
 220         for (i = '['; i < 'a'; i++) {
 221             nmttyp[i] = (byte) 0xff;
 222         }
 223         // skiped lower case alphabetical character are already 0
 224         for (i = '{'; i < 0x80; i++) {
 225             nmttyp[i] = (byte) 0xff;
 226         }
 227         nmttyp['_'] = 0;
 228         nmttyp[':'] = 1;
 229         nmttyp['.'] = 2;
 230         nmttyp['-'] = 2;
 231         nmttyp[' '] = 3;
 232         nmttyp['\t'] = 3;
 233         nmttyp['\r'] = 3;
 234         nmttyp['\n'] = 3;
 235     }
 236 
 237     /**
 238      * Constructor.
 239      */
 240     protected Parser() {
 241         mPh = PH_BEFORE_DOC;  // before parsing
 242 
 243         //              Initialize the parser
 244         mBuff = new char[BUFFSIZE_PARSER];
 245         mAttrs = new Attrs();
 246 
 247         //              Default namespace
 248         mPref = pair(mPref);
 249         mPref.name = "";
 250         mPref.value = "";
 251         mPref.chars = NONS;
 252         mNoNS = mPref;  // no namespace
 253         //              XML namespace
 254         mPref = pair(mPref);
 255         mPref.name = "xml";
 256         mPref.value = "http://www.w3.org/XML/1998/namespace";
 257         mPref.chars = XML;
 258         mXml = mPref;  // XML namespace
 259     }
 260 
 261     /**
 262      * Initializes parser's internals. Note, current input has to be set before
 263      * this method is called.
 264      */
 265     protected void init() {
 266         mUnent = null;
 267         mElm = null;
 268         mPref = mXml;
 269         mAttL = null;
 270         mPEnt = new HashMap<>();
 271         mEnt = new HashMap<>();
 272         mDoc = mInp;          // current input is document entity
 273         mChars = mInp.chars;    // use document entity buffer
 274         mPh = PH_DOC_START;  // the begining of the document
 275     }
 276 
 277     /**
 278      * Cleans up parser internal resources.
 279      */
 280     protected void cleanup() {
 281         //              Default attributes
 282         while (mAttL != null) {
 283             while (mAttL.list != null) {
 284                 if (mAttL.list.list != null) {
 285                     del(mAttL.list.list);
 286                 }
 287                 mAttL.list = del(mAttL.list);
 288             }
 289             mAttL = del(mAttL);
 290         }
 291         //              Element stack
 292         while (mElm != null) {
 293             mElm = del(mElm);
 294         }
 295         //              Namespace prefixes
 296         while (mPref != mXml) {
 297             mPref = del(mPref);
 298         }
 299         //              Inputs
 300         while (mInp != null) {
 301             pop();
 302         }
 303         //              Document reader
 304         if ((mDoc != null) && (mDoc.src != null)) {
 305             try {
 306                 mDoc.src.close();
 307             } catch (IOException ioe) {
 308             }
 309         }
 310         mPEnt = null;
 311         mEnt = null;
 312         mDoc = null;
 313         mPh = PH_AFTER_DOC;  // before documnet processing
 314     }
 315 
 316     /**
 317      * Processes a portion of document. This method returns one of EV_*
 318      * constants as an identifier of the portion of document have been read.
 319      *
 320      * @return Identifier of processed document portion.
 321      * @exception Exception is parser specific exception form panic method.
 322      * @exception IOException
 323      */
 324     @SuppressWarnings("fallthrough")
 325     protected int step() throws Exception {
 326         mEvt = EV_NULL;
 327         int st = 0;
 328         while (mEvt == EV_NULL) {
 329             char ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
 330             switch (st) {
 331                 case 0:     // all sorts of markup (dispetcher)
 332                     if (ch != '<') {
 333                         bkch();
 334                         mBuffIdx = -1;  // clean parser buffer
 335                         st = 1;
 336                         break;
 337                     }
 338                     switch (getch()) {
 339                         case '/':  // the end of the element content
 340                             mEvt = EV_ELME;
 341                             if (mElm == null) {
 342                                 panic(FAULT);
 343                             }
 344                             //          Check element's open/close tags balance
 345                             mBuffIdx = -1;  // clean parser buffer
 346                             bname(mIsNSAware);
 347                             char[] chars = mElm.chars;
 348                             if (chars.length == (mBuffIdx + 1)) {
 349                                 for (char i = 1; i <= mBuffIdx; i += 1) {
 350                                     if (chars[i] != mBuff[i]) {
 351                                         panic(FAULT);
 352                                     }
 353                                 }
 354                             } else {
 355                                 panic(FAULT);
 356                             }
 357                             //          Skip white spaces before '>'
 358                             if (wsskip() != '>') {
 359                                 panic(FAULT);
 360                             }
 361                             getch();  // read '>'
 362                             break;
 363 
 364                         case '!':  // a comment or a CDATA
 365                             ch = getch();
 366                             bkch();
 367                             switch (ch) {
 368                                 case '-':  // must be a comment
 369                                     mEvt = EV_COMM;
 370                                     comm();
 371                                     break;
 372 
 373                                 case '[':  // must be a CDATA section
 374                                     mEvt = EV_CDAT;
 375                                     cdat();
 376                                     break;
 377 
 378                                 default:   // must be 'DOCTYPE'
 379                                     mEvt = EV_DTD;
 380                                     dtd();
 381                                     break;
 382                             }
 383                             break;
 384 
 385                         case '?':  // processing instruction
 386                             mEvt = EV_PI;
 387                             pi();
 388                             break;
 389 
 390                         default:  // must be the first char of an xml name
 391                             bkch();
 392                             //          Read an element name and put it on top of the
 393                             //          element stack
 394                             mElm = pair(mElm);  // add new element to the stack
 395                             mElm.chars = qname(mIsNSAware);
 396                             mElm.name = mElm.local();
 397                             mElm.id = (mElm.next != null) ? mElm.next.id : 0;  // flags
 398                             mElm.num = 0;     // namespace counter
 399                             //          Find the list of defined attributs of the current
 400                             //          element
 401                             Pair elm = find(mAttL, mElm.chars);
 402                             mElm.list = (elm != null) ? elm.list : null;
 403                             //          Read attributes till the end of the element tag
 404                             mAttrIdx = 0;
 405                             Pair att = pair(null);
 406                             att.num = 0;  // clear attribute's flags
 407                             attr(att);     // get all attributes inc. defaults
 408                             del(att);
 409                             mElm.value = (mIsNSAware) ? rslv(mElm.chars) : null;
 410                             //          Skip white spaces before '>'
 411                             switch (wsskip()) {
 412                                 case '>':
 413                                     getch();  // read '>'
 414                                     mEvt = EV_ELMS;
 415                                     break;
 416 
 417                                 case '/':
 418                                     getch();  // read '/'
 419                                     if (getch() != '>') // read '>'
 420                                     {
 421                                         panic(FAULT);
 422                                     }
 423                                     mEvt = EV_ELM;
 424                                     break;
 425 
 426                                 default:
 427                                     panic(FAULT);
 428                             }
 429                             break;
 430                     }
 431                     break;
 432 
 433                 case 1:     // read white space
 434                     switch (ch) {
 435                         case ' ':
 436                         case '\t':
 437                         case '\n':
 438                             bappend(ch);
 439                             break;
 440 
 441                         case '\r':              // EOL processing [#2.11]
 442                             if (getch() != '\n') {
 443                                 bkch();
 444                             }
 445                             bappend('\n');
 446                             break;
 447 
 448                         case '<':
 449                             mEvt = EV_WSPC;
 450                             bkch();
 451                             bflash_ws();
 452                             break;
 453 
 454                         default:
 455                             bkch();
 456                             st = 2;
 457                             break;
 458                     }
 459                     break;
 460 
 461                 case 2:     // read the text content of the element
 462                     switch (ch) {
 463                         case '&':
 464                             if (mUnent == null) {
 465                                 //              There was no unresolved entity on previous step.
 466                                 if ((mUnent = ent('x')) != null) {
 467                                     mEvt = EV_TEXT;
 468                                     bkch();      // move back to ';' after entity name
 469                                     setch('&');  // parser must be back on next step
 470                                     bflash();
 471                                 }
 472                             } else {
 473                                 //              There was unresolved entity on previous step.
 474                                 mEvt = EV_ENT;
 475                                 skippedEnt(mUnent);
 476                                 mUnent = null;
 477                             }
 478                             break;
 479 
 480                         case '<':
 481                             mEvt = EV_TEXT;
 482                             bkch();
 483                             bflash();
 484                             break;
 485 
 486                         case '\r':  // EOL processing [#2.11]
 487                             if (getch() != '\n') {
 488                                 bkch();
 489                             }
 490                             bappend('\n');
 491                             break;
 492 
 493                         case EOS:
 494                             panic(FAULT);
 495 
 496                         default:
 497                             bappend(ch);
 498                             break;
 499                     }
 500                     break;
 501 
 502                 default:
 503                     panic(FAULT);
 504             }
 505         }
 506 
 507         return mEvt;
 508     }
 509 
 510     /**
 511      * Parses the document type declaration.
 512      *
 513      * @exception Exception is parser specific exception form panic method.
 514      * @exception IOException
 515      */
 516     private void dtd() throws Exception {
 517         char ch;
 518         String str = null;
 519         String name = null;
 520         Pair psid = null;
 521         // read 'DOCTYPE'
 522         if ("DOCTYPE".equals(name(false)) != true) {
 523             panic(FAULT);
 524         }
 525         mPh = PH_DTD;  // DTD
 526         for (short st = 0; st >= 0;) {
 527             ch = getch();
 528             switch (st) {
 529                 case 0:     // read the document type name
 530                     if (chtyp(ch) != ' ') {
 531                         bkch();
 532                         name = name(mIsNSAware);
 533                         wsskip();
 534                         st = 1;  // read 'PUPLIC' or 'SYSTEM'
 535                     }
 536                     break;
 537 
 538                 case 1:     // read 'PUPLIC' or 'SYSTEM'
 539                     switch (chtyp(ch)) {
 540                         case 'A':
 541                             bkch();
 542                             psid = pubsys(' ');
 543                             st = 2;  // skip spaces before internal subset
 544                             docType(name, psid.name, psid.value);
 545                             break;
 546 
 547                         case '[':
 548                             bkch();
 549                             st = 2;    // skip spaces before internal subset
 550                             docType(name, null, null);
 551                             break;
 552 
 553                         case '>':
 554                             bkch();
 555                             st = 3;    // skip spaces after internal subset
 556                             docType(name, null, null);
 557                             break;
 558 
 559                         default:
 560                             panic(FAULT);
 561                     }
 562                     break;
 563 
 564                 case 2:     // skip spaces before internal subset
 565                     switch (chtyp(ch)) {
 566                         case '[':
 567                             //          Process internal subset
 568                             dtdsub();
 569                             st = 3;  // skip spaces after internal subset
 570                             break;
 571 
 572                         case '>':
 573                             //          There is no internal subset
 574                             bkch();
 575                             st = 3;  // skip spaces after internal subset
 576                             break;
 577 
 578                         case ' ':
 579                             // skip white spaces
 580                             break;
 581 
 582                         default:
 583                             panic(FAULT);
 584                     }
 585                     break;
 586 
 587                 case 3:     // skip spaces after internal subset
 588                     switch (chtyp(ch)) {
 589                         case '>':
 590                             if (psid != null) {
 591                                 //              Report the DTD external subset
 592                                 InputSource is = resolveEnt(name, psid.name, psid.value);
 593                                 if (is != null) {
 594                                     if (mIsSAlone == false) {
 595                                         //              Set the end of DTD external subset char
 596                                         bkch();
 597                                         setch(']');
 598                                         //              Set the DTD external subset InputSource
 599                                         push(new Input(BUFFSIZE_READER));
 600                                         setinp(is);
 601                                         mInp.pubid = psid.name;
 602                                         mInp.sysid = psid.value;
 603                                         //              Parse the DTD external subset
 604                                         dtdsub();
 605                                     } else {
 606                                         //              Unresolved DTD external subset
 607                                         skippedEnt("[dtd]");
 608                                         //              Release reader and stream
 609                                         if (is.getCharacterStream() != null) {
 610                                             try {
 611                                                 is.getCharacterStream().close();
 612                                             } catch (IOException ioe) {
 613                                             }
 614                                         }
 615                                         if (is.getByteStream() != null) {
 616                                             try {
 617                                                 is.getByteStream().close();
 618                                             } catch (IOException ioe) {
 619                                             }
 620                                         }
 621                                     }
 622                                 } else {
 623                                     //          Unresolved DTD external subset
 624                                     skippedEnt("[dtd]");
 625                                 }
 626                                 del(psid);
 627                             }
 628                             st = -1;  // end of DTD
 629                             break;
 630 
 631                         case ' ':
 632                             // skip white spaces
 633                             break;
 634 
 635                         default:
 636                             panic(FAULT);
 637                     }
 638                     break;
 639 
 640                 default:
 641                     panic(FAULT);
 642             }
 643         }
 644     }
 645 
 646     /**
 647      * Parses the document type declaration subset.
 648      *
 649      * @exception Exception is parser specific exception form panic method.
 650      * @exception IOException
 651      */
 652     private void dtdsub() throws Exception {
 653         char ch;
 654         for (short st = 0; st >= 0;) {
 655             ch = getch();
 656             switch (st) {
 657                 case 0:     // skip white spaces before a declaration
 658                     switch (chtyp(ch)) {
 659                         case '<':
 660                             ch = getch();
 661                             switch (ch) {
 662                                 case '?':
 663                                     pi();
 664                                     break;
 665 
 666                                 case '!':
 667                                     ch = getch();
 668                                     bkch();
 669                                     if (ch == '-') {
 670                                         comm();
 671                                         break;
 672                                     }
 673                                     //          A markup or an entity declaration
 674                                     bntok();
 675                                     switch (bkeyword()) {
 676                                         case 'n':
 677                                             dtdent();
 678                                             break;
 679 
 680                                         case 'a':
 681                                             dtdattl();    // parse attributes declaration
 682                                             break;
 683 
 684                                         case 'e':
 685                                             dtdelm();     // parse element declaration
 686                                             break;
 687 
 688                                         case 'o':
 689                                             dtdnot();     // parse notation declaration
 690                                             break;
 691 
 692                                         default:
 693                                             panic(FAULT); // unsupported markup declaration
 694                                             break;
 695                                     }
 696                                     st = 1;  // read the end of declaration
 697                                     break;
 698 
 699                                 default:
 700                                     panic(FAULT);
 701                                     break;
 702                             }
 703                             break;
 704 
 705                         case '%':
 706                             //          A parameter entity reference
 707                             pent(' ');
 708                             break;
 709 
 710                         case ']':
 711                             //          End of DTD subset
 712                             st = -1;
 713                             break;
 714 
 715                         case ' ':
 716                             //          Skip white spaces
 717                             break;
 718 
 719                         case 'Z':
 720                             //          End of stream
 721                             if (getch() != ']') {
 722                                 panic(FAULT);
 723                             }
 724                             st = -1;
 725                             break;
 726 
 727                         default:
 728                             panic(FAULT);
 729                     }
 730                     break;
 731 
 732                 case 1:     // read the end of declaration
 733                     switch (ch) {
 734                         case '>':   // there is no notation
 735                             st = 0; // skip white spaces before a declaration
 736                             break;
 737 
 738                         case ' ':
 739                         case '\n':
 740                         case '\r':
 741                         case '\t':
 742                             //          Skip white spaces
 743                             break;
 744 
 745                         default:
 746                             panic(FAULT);
 747                             break;
 748                     }
 749                     break;
 750 
 751                 default:
 752                     panic(FAULT);
 753             }
 754         }
 755     }
 756 
 757     /**
 758      * Parses an entity declaration. This method fills the general (
 759      * <code>mEnt</code>) and parameter
 760      * (
 761      * <code>mPEnt</code>) entity look up table.
 762      *
 763      * @exception Exception is parser specific exception form panic method.
 764      * @exception IOException
 765      */
 766     @SuppressWarnings("fallthrough")
 767     private void dtdent() throws Exception {
 768         String str = null;
 769         char[] val = null;
 770         Input inp = null;
 771         Pair ids = null;
 772         char ch;
 773         for (short st = 0; st >= 0;) {
 774             ch = getch();
 775             switch (st) {
 776                 case 0:     // skip white spaces before entity name
 777                     switch (chtyp(ch)) {
 778                         case ' ':
 779                             //          Skip white spaces
 780                             break;
 781 
 782                         case '%':
 783                             //          Parameter entity or parameter entity declaration.
 784                             ch = getch();
 785                             bkch();
 786                             if (chtyp(ch) == ' ') {
 787                                 //              Parameter entity declaration.
 788                                 wsskip();
 789                                 str = name(false);
 790                                 switch (chtyp(wsskip())) {
 791                                     case 'A':
 792                                         //              Read the external identifier
 793                                         ids = pubsys(' ');
 794                                         if (wsskip() == '>') {
 795                                             //          External parsed entity
 796                                             if (mPEnt.containsKey(str) == false) {      // [#4.2]
 797                                                 inp = new Input();
 798                                                 inp.pubid = ids.name;
 799                                                 inp.sysid = ids.value;
 800                                                 mPEnt.put(str, inp);
 801                                             }
 802                                         } else {
 803                                             panic(FAULT);
 804                                         }
 805                                         del(ids);
 806                                         st = -1;  // the end of declaration
 807                                         break;
 808 
 809                                     case '\"':
 810                                     case '\'':
 811                                         //              Read the parameter entity value
 812                                         bqstr('d');
 813                                         //              Create the parameter entity value
 814                                         val = new char[mBuffIdx + 1];
 815                                         System.arraycopy(mBuff, 1, val, 1, val.length - 1);
 816                                         //              Add surrounding spaces [#4.4.8]
 817                                         val[0] = ' ';
 818                                         //              Add the entity to the entity look up table
 819                                         if (mPEnt.containsKey(str) == false) {  // [#4.2]
 820                                             inp = new Input(val);
 821                                             inp.pubid = mInp.pubid;
 822                                             inp.sysid = mInp.sysid;
 823                                             inp.xmlenc = mInp.xmlenc;
 824                                             inp.xmlver = mInp.xmlver;
 825                                             mPEnt.put(str, inp);
 826                                         }
 827                                         st = -1;  // the end of declaration
 828                                         break;
 829 
 830                                     default:
 831                                         panic(FAULT);
 832                                         break;
 833                                 }
 834                             } else {
 835                                 //              Parameter entity reference.
 836                                 pent(' ');
 837                             }
 838                             break;
 839 
 840                         default:
 841                             bkch();
 842                             str = name(false);
 843                             st = 1;  // read entity declaration value
 844                             break;
 845                     }
 846                     break;
 847 
 848                 case 1:     // read entity declaration value
 849                     switch (chtyp(ch)) {
 850                         case '\"':  // internal entity
 851                         case '\'':
 852                             bkch();
 853                             bqstr('d');  // read a string into the buffer
 854                             if (mEnt.get(str) == null) {
 855                                 //              Create general entity value
 856                                 val = new char[mBuffIdx];
 857                                 System.arraycopy(mBuff, 1, val, 0, val.length);
 858                                 //              Add the entity to the entity look up table
 859                                 if (mEnt.containsKey(str) == false) {   // [#4.2]
 860                                     inp = new Input(val);
 861                                     inp.pubid = mInp.pubid;
 862                                     inp.sysid = mInp.sysid;
 863                                     inp.xmlenc = mInp.xmlenc;
 864                                     inp.xmlver = mInp.xmlver;
 865                                     mEnt.put(str, inp);
 866                                 }
 867                             }
 868                             st = -1;  // the end of declaration
 869                             break;
 870 
 871                         case 'A':  // external entity
 872                             bkch();
 873                             ids = pubsys(' ');
 874                             switch (wsskip()) {
 875                                 case '>':  // external parsed entity
 876                                     if (mEnt.containsKey(str) == false) {  // [#4.2]
 877                                         inp = new Input();
 878                                         inp.pubid = ids.name;
 879                                         inp.sysid = ids.value;
 880                                         mEnt.put(str, inp);
 881                                     }
 882                                     break;
 883 
 884                                 case 'N':  // external general unparsed entity
 885                                     if ("NDATA".equals(name(false)) == true) {
 886                                         wsskip();
 887                                         unparsedEntDecl(str, ids.name, ids.value, name(false));
 888                                         break;
 889                                     }
 890                                 default:
 891                                     panic(FAULT);
 892                                     break;
 893                             }
 894                             del(ids);
 895                             st = -1;  // the end of declaration
 896                             break;
 897 
 898                         case ' ':
 899                             //          Skip white spaces
 900                             break;
 901 
 902                         default:
 903                             panic(FAULT);
 904                             break;
 905                     }
 906                     break;
 907 
 908                 default:
 909                     panic(FAULT);
 910             }
 911         }
 912     }
 913 
 914     /**
 915      * Parses an element declaration.
 916      *
 917      * This method parses the declaration up to the closing angle bracket.
 918      *
 919      * @exception Exception is parser specific exception form panic method.
 920      * @exception IOException
 921      */
 922     @SuppressWarnings("fallthrough")
 923     private void dtdelm() throws Exception {
 924         //              This is stub implementation which skips an element
 925         //              declaration.
 926         wsskip();
 927         name(mIsNSAware);
 928 
 929         char ch;
 930         while (true) {
 931             ch = getch();
 932             switch (ch) {
 933                 case '>':
 934                     bkch();
 935                     return;
 936 
 937                 case EOS:
 938                     panic(FAULT);
 939 
 940                 default:
 941                     break;
 942             }
 943         }
 944     }
 945 
 946     /**
 947      * Parses an attribute list declaration.
 948      *
 949      * This method parses the declaration up to the closing angle bracket.
 950      *
 951      * @exception Exception is parser specific exception form panic method.
 952      * @exception IOException
 953      */
 954     private void dtdattl() throws Exception {
 955         char elmqn[] = null;
 956         Pair elm = null;
 957         char ch;
 958         for (short st = 0; st >= 0;) {
 959             ch = getch();
 960             switch (st) {
 961                 case 0:     // read the element name
 962                     switch (chtyp(ch)) {
 963                         case 'a':
 964                         case 'A':
 965                         case '_':
 966                         case 'X':
 967                         case ':':
 968                             bkch();
 969                             //          Get the element from the list or add a new one.
 970                             elmqn = qname(mIsNSAware);
 971                             elm = find(mAttL, elmqn);
 972                             if (elm == null) {
 973                                 elm = pair(mAttL);
 974                                 elm.chars = elmqn;
 975                                 mAttL = elm;
 976                             }
 977                             st = 1;  // read an attribute declaration
 978                             break;
 979 
 980                         case ' ':
 981                             break;
 982 
 983                         case '%':
 984                             pent(' ');
 985                             break;
 986 
 987                         default:
 988                             panic(FAULT);
 989                             break;
 990                     }
 991                     break;
 992 
 993                 case 1:     // read an attribute declaration
 994                     switch (chtyp(ch)) {
 995                         case 'a':
 996                         case 'A':
 997                         case '_':
 998                         case 'X':
 999                         case ':':
1000                             bkch();
1001                             dtdatt(elm);
1002                             if (wsskip() == '>') {
1003                                 return;
1004                             }
1005                             break;
1006 
1007                         case ' ':
1008                             break;
1009 
1010                         case '%':
1011                             pent(' ');
1012                             break;
1013 
1014                         default:
1015                             panic(FAULT);
1016                             break;
1017                     }
1018                     break;
1019 
1020                 default:
1021                     panic(FAULT);
1022                     break;
1023             }
1024         }
1025     }
1026 
1027     /**
1028      * Parses an attribute declaration.
1029      *
1030      * The attribute uses the following fields of Pair object: chars - characters
1031      * of qualified name id - the type identifier of the attribute list - a pair
1032      * which holds the default value (chars field)
1033      *
1034      * @param elm An object which represents all defined attributes on an
1035      * element.
1036      * @exception Exception is parser specific exception form panic method.
1037      * @exception IOException
1038      */
1039     @SuppressWarnings("fallthrough")
1040     private void dtdatt(Pair elm) throws Exception {
1041         char attqn[] = null;
1042         Pair att = null;
1043         char ch;
1044         for (short st = 0; st >= 0;) {
1045             ch = getch();
1046             switch (st) {
1047                 case 0:     // the attribute name
1048                     switch (chtyp(ch)) {
1049                         case 'a':
1050                         case 'A':
1051                         case '_':
1052                         case 'X':
1053                         case ':':
1054                             bkch();
1055                             //          Get the attribute from the list or add a new one.
1056                             attqn = qname(mIsNSAware);
1057                             att = find(elm.list, attqn);
1058                             if (att == null) {
1059                                 //              New attribute declaration
1060                                 att = pair(elm.list);
1061                                 att.chars = attqn;
1062                                 elm.list = att;
1063                             } else {
1064                                 //              Do not override the attribute declaration [#3.3]
1065                                 att = pair(null);
1066                                 att.chars = attqn;
1067                                 att.id = 'c';
1068                             }
1069                             wsskip();
1070                             st = 1;
1071                             break;
1072 
1073                         case '%':
1074                             pent(' ');
1075                             break;
1076 
1077                         case ' ':
1078                             break;
1079 
1080                         default:
1081                             panic(FAULT);
1082                             break;
1083                     }
1084                     break;
1085 
1086                 case 1:     // the attribute type
1087                     switch (chtyp(ch)) {
1088                         case '(':
1089                             att.id = 'u';  // enumeration type
1090                             st = 2;        // read the first element of the list
1091                             break;
1092 
1093                         case '%':
1094                             pent(' ');
1095                             break;
1096 
1097                         case ' ':
1098                             break;
1099 
1100                         default:
1101                             bkch();
1102                             bntok();  // read type id
1103                             att.id = bkeyword();
1104                             switch (att.id) {
1105                                 case 'o':   // NOTATION
1106                                     if (wsskip() != '(') {
1107                                         panic(FAULT);
1108                                     }
1109                                     ch = getch();
1110                                     st = 2;  // read the first element of the list
1111                                     break;
1112 
1113                                 case 'i':     // ID
1114                                 case 'r':     // IDREF
1115                                 case 'R':     // IDREFS
1116                                 case 'n':     // ENTITY
1117                                 case 'N':     // ENTITIES
1118                                 case 't':     // NMTOKEN
1119                                 case 'T':     // NMTOKENS
1120                                 case 'c':     // CDATA
1121                                     wsskip();
1122                                     st = 4;  // read default declaration
1123                                     break;
1124 
1125                                 default:
1126                                     panic(FAULT);
1127                                     break;
1128                             }
1129                             break;
1130                     }
1131                     break;
1132 
1133                 case 2:     // read the first element of the list
1134                     switch (chtyp(ch)) {
1135                         case 'a':
1136                         case 'A':
1137                         case 'd':
1138                         case '.':
1139                         case ':':
1140                         case '-':
1141                         case '_':
1142                         case 'X':
1143                             bkch();
1144                             switch (att.id) {
1145                                 case 'u':  // enumeration type
1146                                     bntok();
1147                                     break;
1148 
1149                                 case 'o':  // NOTATION
1150                                     mBuffIdx = -1;
1151                                     bname(false);
1152                                     break;
1153 
1154                                 default:
1155                                     panic(FAULT);
1156                                     break;
1157                             }
1158                             wsskip();
1159                             st = 3;  // read next element of the list
1160                             break;
1161 
1162                         case '%':
1163                             pent(' ');
1164                             break;
1165 
1166                         case ' ':
1167                             break;
1168 
1169                         default:
1170                             panic(FAULT);
1171                             break;
1172                     }
1173                     break;
1174 
1175                 case 3:     // read next element of the list
1176                     switch (ch) {
1177                         case ')':
1178                             wsskip();
1179                             st = 4;  // read default declaration
1180                             break;
1181 
1182                         case '|':
1183                             wsskip();
1184                             switch (att.id) {
1185                                 case 'u':  // enumeration type
1186                                     bntok();
1187                                     break;
1188 
1189                                 case 'o':  // NOTATION
1190                                     mBuffIdx = -1;
1191                                     bname(false);
1192                                     break;
1193 
1194                                 default:
1195                                     panic(FAULT);
1196                                     break;
1197                             }
1198                             wsskip();
1199                             break;
1200 
1201                         case '%':
1202                             pent(' ');
1203                             break;
1204 
1205                         default:
1206                             panic(FAULT);
1207                             break;
1208                     }
1209                     break;
1210 
1211                 case 4:     // read default declaration
1212                     switch (ch) {
1213                         case '#':
1214                             bntok();
1215                             switch (bkeyword()) {
1216                                 case 'F':  // FIXED
1217                                     switch (wsskip()) {
1218                                         case '\"':
1219                                         case '\'':
1220                                             st = 5;  // read the default value
1221                                             break;
1222 
1223                                         case EOS:
1224                                             panic(FAULT);
1225 
1226                                         default:
1227                                             st = -1;
1228                                             break;
1229                                     }
1230                                     break;
1231 
1232                                 case 'Q':  // REQUIRED
1233                                 case 'I':  // IMPLIED
1234                                     st = -1;
1235                                     break;
1236 
1237                                 default:
1238                                     panic(FAULT);
1239                                     break;
1240                             }
1241                             break;
1242 
1243                         case '\"':
1244                         case '\'':
1245                             bkch();
1246                             st = 5;  // read the default value
1247                             break;
1248 
1249                         case ' ':
1250                         case '\n':
1251                         case '\r':
1252                         case '\t':
1253                             break;
1254 
1255                         case '%':
1256                             pent(' ');
1257                             break;
1258 
1259                         default:
1260                             bkch();
1261                             st = -1;
1262                             break;
1263                     }
1264                     break;
1265 
1266                 case 5:     // read the default value
1267                     switch (ch) {
1268                         case '\"':
1269                         case '\'':
1270                             bkch();
1271                             bqstr('d');  // the value in the mBuff now
1272                             att.list = pair(null);
1273                             //          Create a string like "attqname='value' "
1274                             att.list.chars = new char[att.chars.length + mBuffIdx + 3];
1275                             System.arraycopy(
1276                                     att.chars, 1, att.list.chars, 0, att.chars.length - 1);
1277                             att.list.chars[att.chars.length - 1] = '=';
1278                             att.list.chars[att.chars.length] = ch;
1279                             System.arraycopy(
1280                                     mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx);
1281                             att.list.chars[att.chars.length + mBuffIdx + 1] = ch;
1282                             att.list.chars[att.chars.length + mBuffIdx + 2] = ' ';
1283                             st = -1;
1284                             break;
1285 
1286                         default:
1287                             panic(FAULT);
1288                             break;
1289                     }
1290                     break;
1291 
1292                 default:
1293                     panic(FAULT);
1294                     break;
1295             }
1296         }
1297     }
1298 
1299     /**
1300      * Parses a notation declaration.
1301      *
1302      * This method parses the declaration up to the closing angle bracket.
1303      *
1304      * @exception Exception is parser specific exception form panic method.
1305      * @exception IOException
1306      */
1307     private void dtdnot() throws Exception {
1308         wsskip();
1309         String name = name(false);
1310         wsskip();
1311         Pair ids = pubsys('N');
1312         notDecl(name, ids.name, ids.value);
1313         del(ids);
1314     }
1315 
1316     /**
1317      * Parses an attribute.
1318      *
1319      * This recursive method is responsible for prefix addition
1320      * (
1321      * <code>mPref</code>) on the way down. The element's start tag end triggers
1322      * the return process. The method then on it's way back resolves prefixes
1323      * and accumulates attributes.
1324      *
1325      * <p><code>att.num</code> carries attribute flags where: 0x1 - attribute is
1326      * declared in DTD (attribute decalration had been read); 0x2 - attribute's
1327      * default value is used.</p>
1328      *
1329      * @param att An object which reprecents current attribute.
1330      * @exception Exception is parser specific exception form panic method.
1331      * @exception IOException
1332      */
1333     @SuppressWarnings("fallthrough")
1334     private void attr(Pair att) throws Exception {
1335         switch (wsskip()) {
1336             case '/':
1337             case '>':
1338                 if ((att.num & 0x2) == 0) {  // all attributes have been read
1339                     att.num |= 0x2;  // set default attribute flag
1340                     Input inp = mInp;
1341                     //          Go through all attributes defined on current element.
1342                     for (Pair def = mElm.list; def != null; def = def.next) {
1343                         if (def.list == null) // no default value
1344                         {
1345                             continue;
1346                         }
1347                         //              Go through all attributes defined on current
1348                         //              element and add defaults.
1349                         Pair act = find(att.next, def.chars);
1350                         if (act == null) {
1351                             push(new Input(def.list.chars));
1352                         }
1353                     }
1354                     if (mInp != inp) {  // defaults have been added
1355                         attr(att);
1356                         return;
1357                     }
1358                 }
1359                 //              Ensure the attribute string array capacity
1360                 mAttrs.setLength(mAttrIdx);
1361                 mItems = mAttrs.mItems;
1362                 return;
1363 
1364             case EOS:
1365                 panic(FAULT);
1366 
1367             default:
1368                 //              Read the attribute name and value
1369                 att.chars = qname(mIsNSAware);
1370                 att.name = att.local();
1371                 String type = atype(att);  // sets attribute's type on att.id
1372                 wsskip();
1373                 if (getch() != '=') {
1374                     panic(FAULT);
1375                 }
1376                 bqstr((char) att.id);   // read the value with normalization.
1377                 String val = new String(mBuff, 1, mBuffIdx);
1378                 Pair next = pair(att);
1379                 next.num = (att.num & ~0x1);  // inherit attribute flags
1380                 //              Put a namespace declaration on top of the prefix stack
1381                 if ((mIsNSAware == false) || (isdecl(att, val) == false)) {
1382                     //          An ordinary attribute
1383                     mAttrIdx++;
1384                     attr(next);     // recursive call to parse the next attribute
1385                     mAttrIdx--;
1386                     //          Add the attribute to the attributes string array
1387                     char idx = (char) (mAttrIdx << 3);
1388                     mItems[idx + 1] = att.qname();  // attr qname
1389                     mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name
1390                     mItems[idx + 3] = val;          // attr value
1391                     mItems[idx + 4] = type;         // attr type
1392                     switch (att.num & 0x3) {
1393                         case 0x0:
1394                             mItems[idx + 5] = null;
1395                             break;
1396 
1397                         case 0x1:  // declared attribute
1398                             mItems[idx + 5] = "d";
1399                             break;
1400 
1401                         default:  // 0x2, 0x3 - default attribute always declared
1402                             mItems[idx + 5] = "D";
1403                             break;
1404                     }
1405                     //          Resolve the prefix if any and report the attribute
1406                     //          NOTE: The attribute does not accept the default namespace.
1407                     mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars) : "";
1408                 } else {
1409                     //          A namespace declaration. mPref.name contains prefix and
1410                     //          mPref.value contains namespace URI set by isdecl method.
1411                     //          Report a start of the new mapping
1412                     newPrefix();
1413                     //          Recursive call to parse the next attribute
1414                     attr(next);
1415                     //          NOTE: The namespace declaration is not reported.
1416                 }
1417                 del(next);
1418                 break;
1419         }
1420     }
1421 
1422     /**
1423      * Retrieves attribute type.
1424      *
1425      * This method sets the type of normalization in the attribute
1426      * <code>id</code> field and returns the name of attribute type.
1427      *
1428      * @param att An object which represents current attribute.
1429      * @return The name of the attribute type.
1430      * @exception Exception is parser specific exception form panic method.
1431      */
1432     private String atype(Pair att)
1433             throws Exception {
1434         Pair attr;
1435 
1436         // CDATA-type normalization by default [#3.3.3]
1437         att.id = 'c';
1438         if (mElm.list == null || (attr = find(mElm.list, att.chars)) == null) {
1439             return "CDATA";
1440         }
1441 
1442         att.num |= 0x1;  // attribute is declared
1443 
1444         // Non-CDATA normalization except when the attribute type is CDATA.
1445         att.id = 'i';
1446         switch (attr.id) {
1447             case 'i':
1448                 return "ID";
1449 
1450             case 'r':
1451                 return "IDREF";
1452 
1453             case 'R':
1454                 return "IDREFS";
1455 
1456             case 'n':
1457                 return "ENTITY";
1458 
1459             case 'N':
1460                 return "ENTITIES";
1461 
1462             case 't':
1463                 return "NMTOKEN";
1464 
1465             case 'T':
1466                 return "NMTOKENS";
1467 
1468             case 'u':
1469                 return "NMTOKEN";
1470 
1471             case 'o':
1472                 return "NOTATION";
1473 
1474             case 'c':
1475                 att.id = 'c';
1476                 return "CDATA";
1477 
1478             default:
1479                 panic(FAULT);
1480         }
1481         return null;
1482     }
1483 
1484     /**
1485      * Parses a comment.
1486      *
1487      * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1488      * with first &apos;-&apos; after &apos;&lt;!&apos;.
1489      *
1490      * @exception Exception is parser specific exception form panic method.
1491      */
1492     @SuppressWarnings("fallthrough")
1493     private void comm() throws Exception {
1494         if (mPh == PH_DOC_START) {
1495             mPh = PH_MISC_DTD;  // misc before DTD
1496         }               // '<!' has been already read by dispetcher.
1497         char ch;
1498         mBuffIdx = -1;
1499         for (short st = 0; st >= 0;) {
1500             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1501             if (ch == EOS) {
1502                 panic(FAULT);
1503             }
1504             switch (st) {
1505                 case 0:     // first '-' of the comment open
1506                     if (ch == '-') {
1507                         st = 1;
1508                     } else {
1509                         panic(FAULT);
1510                     }
1511                     break;
1512 
1513                 case 1:     // secind '-' of the comment open
1514                     if (ch == '-') {
1515                         st = 2;
1516                     } else {
1517                         panic(FAULT);
1518                     }
1519                     break;
1520 
1521                 case 2:     // skip the comment body
1522                     switch (ch) {
1523                         case '-':
1524                             st = 3;
1525                             break;
1526 
1527                         default:
1528                             bappend(ch);
1529                             break;
1530                     }
1531                     break;
1532 
1533                 case 3:     // second '-' of the comment close
1534                     switch (ch) {
1535                         case '-':
1536                             st = 4;
1537                             break;
1538 
1539                         default:
1540                             bappend('-');
1541                             bappend(ch);
1542                             st = 2;
1543                             break;
1544                     }
1545                     break;
1546 
1547                 case 4:     // '>' of the comment close
1548                     if (ch == '>') {
1549                         comm(mBuff, mBuffIdx + 1);
1550                         st = -1;
1551                         break;
1552                     }
1553                 // else - panic [#2.5 compatibility note]
1554 
1555                 default:
1556                     panic(FAULT);
1557             }
1558         }
1559     }
1560 
1561     /**
1562      * Parses a processing instruction.
1563      *
1564      * The &apos;&lt;?&apos; is read in dispatcher so the method starts with
1565      * first character of PI target name after &apos;&lt;?&apos;.
1566      *
1567      * @exception Exception is parser specific exception form panic method.
1568      * @exception IOException
1569      */
1570     private void pi() throws Exception {
1571         // '<?' has been already read by dispetcher.
1572         char ch;
1573         String str = null;
1574         mBuffIdx = -1;
1575         for (short st = 0; st >= 0;) {
1576             ch = getch();
1577             if (ch == EOS) {
1578                 panic(FAULT);
1579             }
1580             switch (st) {
1581                 case 0:     // read the PI target name
1582                     switch (chtyp(ch)) {
1583                         case 'a':
1584                         case 'A':
1585                         case '_':
1586                         case ':':
1587                         case 'X':
1588                             bkch();
1589                             str = name(false);
1590                             //          PI target name may not be empty string [#2.6]
1591                             //          PI target name 'XML' is reserved [#2.6]
1592                             if ((str.length() == 0)
1593                                     || (mXml.name.equals(str.toLowerCase()) == true)) {
1594                                 panic(FAULT);
1595                             }
1596                             //          This is processing instruction
1597                             if (mPh == PH_DOC_START) // the begining of the document
1598                             {
1599                                 mPh = PH_MISC_DTD;    // misc before DTD
1600                             }
1601                             wsskip();  // skip spaces after the PI target name
1602                             st = 1;    // accumulate the PI body
1603                             mBuffIdx = -1;
1604                             break;
1605 
1606                         default:
1607                             panic(FAULT);
1608                     }
1609                     break;
1610 
1611                 case 1:     // accumulate the PI body
1612                     switch (ch) {
1613                         case '?':
1614                             st = 2;  // end of the PI body
1615                             break;
1616 
1617                         default:
1618                             bappend(ch);
1619                             break;
1620                     }
1621                     break;
1622 
1623                 case 2:     // end of the PI body
1624                     switch (ch) {
1625                         case '>':
1626                             //          PI has been read.
1627                             pi(str, new String(mBuff, 0, mBuffIdx + 1));
1628                             st = -1;
1629                             break;
1630 
1631                         case '?':
1632                             bappend('?');
1633                             break;
1634 
1635                         default:
1636                             bappend('?');
1637                             bappend(ch);
1638                             st = 1;  // accumulate the PI body
1639                             break;
1640                     }
1641                     break;
1642 
1643                 default:
1644                     panic(FAULT);
1645             }
1646         }
1647     }
1648 
1649     /**
1650      * Parses a character data.
1651      *
1652      * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1653      * with first &apos;[&apos; after &apos;&lt;!&apos;.
1654      *
1655      * @exception Exception is parser specific exception form panic method.
1656      * @exception IOException
1657      */
1658     private void cdat()
1659             throws Exception {
1660         // '<!' has been already read by dispetcher.
1661         char ch;
1662         mBuffIdx = -1;
1663         for (short st = 0; st >= 0;) {
1664             ch = getch();
1665             switch (st) {
1666                 case 0:     // the first '[' of the CDATA open
1667                     if (ch == '[') {
1668                         st = 1;
1669                     } else {
1670                         panic(FAULT);
1671                     }
1672                     break;
1673 
1674                 case 1:     // read "CDATA"
1675                     if (chtyp(ch) == 'A') {
1676                         bappend(ch);
1677                     } else {
1678                         if ("CDATA".equals(
1679                                 new String(mBuff, 0, mBuffIdx + 1)) != true) {
1680                             panic(FAULT);
1681                         }
1682                         bkch();
1683                         st = 2;
1684                     }
1685                     break;
1686 
1687                 case 2:     // the second '[' of the CDATA open
1688                     if (ch != '[') {
1689                         panic(FAULT);
1690                     }
1691                     mBuffIdx = -1;
1692                     st = 3;
1693                     break;
1694 
1695                 case 3:     // read data before the first ']'
1696                     if (ch != ']') {
1697                         bappend(ch);
1698                     } else {
1699                         st = 4;
1700                     }
1701                     break;
1702 
1703                 case 4:     // read the second ']' or continue to read the data
1704                     if (ch != ']') {
1705                         bappend(']');
1706                         bappend(ch);
1707                         st = 3;
1708                     } else {
1709                         st = 5;
1710                     }
1711                     break;
1712 
1713                 case 5:     // read '>' or continue to read the data
1714                     switch (ch) {
1715                         case ']':
1716                             bappend(']');
1717                             break;
1718 
1719                         case '>':
1720                             bflash();
1721                             st = -1;
1722                             break;
1723 
1724                         default:
1725                             bappend(']');
1726                             bappend(']');
1727                             bappend(ch);
1728                             st = 3;
1729                             break;
1730                     }
1731                     break;
1732 
1733                 default:
1734                     panic(FAULT);
1735             }
1736         }
1737     }
1738 
1739     /**
1740      * Reads a xml name.
1741      *
1742      * The xml name must conform "Namespaces in XML" specification. Therefore
1743      * the ':' character is not allowed in the name. This method should be used
1744      * for PI and entity names which may not have a namespace according to the
1745      * specification mentioned above.
1746      *
1747      * @param ns The true value turns namespace conformance on.
1748      * @return The name has been read.
1749      * @exception Exception When incorrect character appear in the name.
1750      * @exception IOException
1751      */
1752     protected String name(boolean ns)
1753             throws Exception {
1754         mBuffIdx = -1;
1755         bname(ns);
1756         return new String(mBuff, 1, mBuffIdx);
1757     }
1758 
1759     /**
1760      * Reads a qualified xml name.
1761      *
1762      * The characters of a qualified name is an array of characters. The first
1763      * (chars[0]) character is the index of the colon character which separates
1764      * the prefix from the local name. If the index is zero, the name does not
1765      * contain separator or the parser works in the namespace unaware mode. The
1766      * length of qualified name is the length of the array minus one.
1767      *
1768      * @param ns The true value turns namespace conformance on.
1769      * @return The characters of a qualified name.
1770      * @exception Exception When incorrect character appear in the name.
1771      * @exception IOException
1772      */
1773     protected char[] qname(boolean ns)
1774             throws Exception {
1775         mBuffIdx = -1;
1776         bname(ns);
1777         char chars[] = new char[mBuffIdx + 1];
1778         System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
1779         return chars;
1780     }
1781 
1782     /**
1783      * Reads the public or/and system identifiers.
1784      *
1785      * @param inp The input object.
1786      * @exception Exception is parser specific exception form panic method.
1787      * @exception IOException
1788      */
1789     private void pubsys(Input inp)
1790             throws Exception {
1791         Pair pair = pubsys(' ');
1792         inp.pubid = pair.name;
1793         inp.sysid = pair.value;
1794         del(pair);
1795     }
1796 
1797     /**
1798      * Reads the public or/and system identifiers.
1799      *
1800      * @param flag The 'N' allows public id be without system id.
1801      * @return The public or/and system identifiers pair.
1802      * @exception Exception is parser specific exception form panic method.
1803      * @exception IOException
1804      */
1805     @SuppressWarnings("fallthrough")
1806     private Pair pubsys(char flag) throws Exception {
1807         Pair ids = pair(null);
1808         String str = name(false);
1809         if ("PUBLIC".equals(str) == true) {
1810             bqstr('i');  // non-CDATA normalization [#4.2.2]
1811             ids.name = new String(mBuff, 1, mBuffIdx);
1812             switch (wsskip()) {
1813                 case '\"':
1814                 case '\'':
1815                     bqstr(' ');
1816                     ids.value = new String(mBuff, 1, mBuffIdx);
1817                     break;
1818 
1819                 case EOS:
1820                     panic(FAULT);
1821 
1822                 default:
1823                     if (flag != 'N') // [#4.7]
1824                     {
1825                         panic(FAULT);
1826                     }
1827                     ids.value = null;
1828                     break;
1829             }
1830             return ids;
1831         } else if ("SYSTEM".equals(str) == true) {
1832             ids.name = null;
1833             bqstr(' ');
1834             ids.value = new String(mBuff, 1, mBuffIdx);
1835             return ids;
1836         }
1837         panic(FAULT);
1838         return null;
1839     }
1840 
1841     /**
1842      * Reads an attribute value.
1843      *
1844      * The grammar this method can read is:
1845      * <pre>{@code
1846      * eqstr := S "=" qstr
1847      * qstr  := S ("'" string "'") | ('"' string '"')
1848      * }</pre>
1849      * This method resolves entities
1850      * inside a string unless the parser parses DTD.
1851      *
1852      * @param flag The '=' character forces the method to accept the '='
1853      * character before quoted string and read the following string as not an
1854      * attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization;
1855      * '-' - not an attribute value; 'd' - in DTD context.
1856      * @return The content of the quoted strign as a string.
1857      * @exception Exception is parser specific exception form panic method.
1858      * @exception IOException
1859      */
1860     protected String eqstr(char flag) throws Exception {
1861         if (flag == '=') {
1862             wsskip();
1863             if (getch() != '=') {
1864                 panic(FAULT);
1865             }
1866         }
1867         bqstr((flag == '=') ? '-' : flag);
1868         return new String(mBuff, 1, mBuffIdx);
1869     }
1870 
1871     /**
1872      * Resoves an entity.
1873      *
1874      * This method resolves built-in and character entity references. It is also
1875      * reports external entities to the application.
1876      *
1877      * @param flag The 'x' character forces the method to report a skipped
1878      * entity; 'i' character - indicates non-CDATA normalization.
1879      * @return Name of unresolved entity or <code>null</code> if entity had been
1880      * resolved successfully.
1881      * @exception Exception is parser specific exception form panic method.
1882      * @exception IOException
1883      */
1884     @SuppressWarnings("fallthrough")
1885     private String ent(char flag) throws Exception {
1886         char ch;
1887         int idx = mBuffIdx + 1;
1888         Input inp = null;
1889         String str = null;
1890         mESt = 0x100;  // reset the built-in entity recognizer
1891         bappend('&');
1892         for (short st = 0; st >= 0;) {
1893             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1894             switch (st) {
1895                 case 0:     // the first character of the entity name
1896                 case 1:     // read built-in entity name
1897                     switch (chtyp(ch)) {
1898                         case 'd':
1899                         case '.':
1900                         case '-':
1901                             if (st != 1) {
1902                                 panic(FAULT);
1903                             }
1904                         case 'a':
1905                         case 'A':
1906                         case '_':
1907                         case 'X':
1908                             bappend(ch);
1909                             eappend(ch);
1910                             st = 1;
1911                             break;
1912 
1913                         case ':':
1914                             if (mIsNSAware != false) {
1915                                 panic(FAULT);
1916                             }
1917                             bappend(ch);
1918                             eappend(ch);
1919                             st = 1;
1920                             break;
1921 
1922                         case ';':
1923                             if (mESt < 0x100) {
1924                                 //              The entity is a built-in entity
1925                                 mBuffIdx = idx - 1;
1926                                 bappend(mESt);
1927                                 st = -1;
1928                                 break;
1929                             } else if (mPh == PH_DTD) {
1930                                 //              In DTD entity declaration has to resolve character
1931                                 //              entities and include "as is" others. [#4.4.7]
1932                                 bappend(';');
1933                                 st = -1;
1934                                 break;
1935                             }
1936                             //          Convert an entity name to a string
1937                             str = new String(mBuff, idx + 1, mBuffIdx - idx);
1938                             inp = mEnt.get(str);
1939                             //          Restore the buffer offset
1940                             mBuffIdx = idx - 1;
1941                             if (inp != null) {
1942                                 if (inp.chars == null) {
1943                                     //          External entity
1944                                     InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
1945                                     if (is != null) {
1946                                         push(new Input(BUFFSIZE_READER));
1947                                         setinp(is);
1948                                         mInp.pubid = inp.pubid;
1949                                         mInp.sysid = inp.sysid;
1950                                         str = null;  // the entity is resolved
1951                                     } else {
1952                                         //              Unresolved external entity
1953                                         if (flag != 'x') {
1954                                             panic(FAULT);  // unknown entity within marckup
1955                                         }                                                               //              str is name of unresolved entity
1956                                     }
1957                                 } else {
1958                                     //          Internal entity
1959                                     push(inp);
1960                                     str = null;  // the entity is resolved
1961                                 }
1962                             } else {
1963                                 //              Unknown or general unparsed entity
1964                                 if (flag != 'x') {
1965                                     panic(FAULT);  // unknown entity within marckup
1966                                 }                                               //              str is name of unresolved entity
1967                             }
1968                             st = -1;
1969                             break;
1970 
1971                         case '#':
1972                             if (st != 0) {
1973                                 panic(FAULT);
1974                             }
1975                             st = 2;
1976                             break;
1977 
1978                         default:
1979                             panic(FAULT);
1980                     }
1981                     break;
1982 
1983                 case 2:     // read character entity
1984                     switch (chtyp(ch)) {
1985                         case 'd':
1986                             bappend(ch);
1987                             break;
1988 
1989                         case ';':
1990                             //          Convert the character entity to a character
1991                             try {
1992                                 int i = Integer.parseInt(
1993                                         new String(mBuff, idx + 1, mBuffIdx - idx), 10);
1994                                 if (i >= 0xffff) {
1995                                     panic(FAULT);
1996                                 }
1997                                 ch = (char) i;
1998                             } catch (NumberFormatException nfe) {
1999                                 panic(FAULT);
2000                             }
2001                             //          Restore the buffer offset
2002                             mBuffIdx = idx - 1;
2003                             if (ch == ' ' || mInp.next != null) {
2004                                 bappend(ch, flag);
2005                             } else {
2006                                 bappend(ch);
2007                             }
2008                             st = -1;
2009                             break;
2010 
2011                         case 'a':
2012                             //          If the entity buffer is empty and ch == 'x'
2013                             if ((mBuffIdx == idx) && (ch == 'x')) {
2014                                 st = 3;
2015                                 break;
2016                             }
2017                         default:
2018                             panic(FAULT);
2019                     }
2020                     break;
2021 
2022                 case 3:     // read hex character entity
2023                     switch (chtyp(ch)) {
2024                         case 'A':
2025                         case 'a':
2026                         case 'd':
2027                             bappend(ch);
2028                             break;
2029 
2030                         case ';':
2031                             //          Convert the character entity to a character
2032                             try {
2033                                 int i = Integer.parseInt(
2034                                         new String(mBuff, idx + 1, mBuffIdx - idx), 16);
2035                                 if (i >= 0xffff) {
2036                                     panic(FAULT);
2037                                 }
2038                                 ch = (char) i;
2039                             } catch (NumberFormatException nfe) {
2040                                 panic(FAULT);
2041                             }
2042                             //          Restore the buffer offset
2043                             mBuffIdx = idx - 1;
2044                             if (ch == ' ' || mInp.next != null) {
2045                                 bappend(ch, flag);
2046                             } else {
2047                                 bappend(ch);
2048                             }
2049                             st = -1;
2050                             break;
2051 
2052                         default:
2053                             panic(FAULT);
2054                     }
2055                     break;
2056 
2057                 default:
2058                     panic(FAULT);
2059             }
2060         }
2061 
2062         return str;
2063     }
2064 
2065     /**
2066      * Resoves a parameter entity.
2067      *
2068      * This method resolves a parameter entity references. It is also reports
2069      * external entities to the application.
2070      *
2071      * @param flag The '-' instruct the method to do not set up surrounding
2072      * spaces [#4.4.8].
2073      * @exception Exception is parser specific exception form panic method.
2074      * @exception IOException
2075      */
2076     @SuppressWarnings("fallthrough")
2077     private void pent(char flag) throws Exception {
2078         char ch;
2079         int idx = mBuffIdx + 1;
2080         Input inp = null;
2081         String str = null;
2082         bappend('%');
2083         if (mPh != PH_DTD) // the DTD internal subset
2084         {
2085             return;         // Not Recognized [#4.4.1]
2086         }               //              Read entity name
2087         bname(false);
2088         str = new String(mBuff, idx + 2, mBuffIdx - idx - 1);
2089         if (getch() != ';') {
2090             panic(FAULT);
2091         }
2092         inp = mPEnt.get(str);
2093         //              Restore the buffer offset
2094         mBuffIdx = idx - 1;
2095         if (inp != null) {
2096             if (inp.chars == null) {
2097                 //              External parameter entity
2098                 InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
2099                 if (is != null) {
2100                     if (flag != '-') {
2101                         bappend(' ');  // tail space
2102                     }
2103                     push(new Input(BUFFSIZE_READER));
2104                     // BUG: there is no leading space! [#4.4.8]
2105                     setinp(is);
2106                     mInp.pubid = inp.pubid;
2107                     mInp.sysid = inp.sysid;
2108                 } else {
2109                     //          Unresolved external parameter entity
2110                     skippedEnt("%" + str);
2111                 }
2112             } else {
2113                 //              Internal parameter entity
2114                 if (flag == '-') {
2115                     //          No surrounding spaces
2116                     inp.chIdx = 1;
2117                 } else {
2118                     //          Insert surrounding spaces
2119                     bappend(' ');  // tail space
2120                     inp.chIdx = 0;
2121                 }
2122                 push(inp);
2123             }
2124         } else {
2125             //          Unknown parameter entity
2126             skippedEnt("%" + str);
2127         }
2128     }
2129 
2130     /**
2131      * Recognizes and handles a namespace declaration.
2132      *
2133      * This method identifies a type of namespace declaration if any and puts
2134      * new mapping on top of prefix stack.
2135      *
2136      * @param name The attribute qualified name (<code>name.value</code> is a
2137      * <code>String</code> object which represents the attribute prefix).
2138      * @param value The attribute value.
2139      * @return <code>true</code> if a namespace declaration is recognized.
2140      */
2141     private boolean isdecl(Pair name, String value) {
2142         if (name.chars[0] == 0) {
2143             if ("xmlns".equals(name.name) == true) {
2144                 //              New default namespace declaration
2145                 mPref = pair(mPref);
2146                 mPref.list = mElm;  // prefix owner element
2147                 mPref.value = value;
2148                 mPref.name = "";
2149                 mPref.chars = NONS;
2150                 mElm.num++;  // namespace counter
2151                 return true;
2152             }
2153         } else {
2154             if (name.eqpref(XMLNS) == true) {
2155                 //              New prefix declaration
2156                 int len = name.name.length();
2157                 mPref = pair(mPref);
2158                 mPref.list = mElm;  // prefix owner element
2159                 mPref.value = value;
2160                 mPref.name = name.name;
2161                 mPref.chars = new char[len + 1];
2162                 mPref.chars[0] = (char) (len + 1);
2163                 name.name.getChars(0, len, mPref.chars, 1);
2164                 mElm.num++;  // namespace counter
2165                 return true;
2166             }
2167         }
2168         return false;
2169     }
2170 
2171     /**
2172      * Resolves a prefix.
2173      *
2174      * @return The namespace assigned to the prefix.
2175      * @exception Exception When mapping for specified prefix is not found.
2176      */
2177     private String rslv(char[] qname)
2178             throws Exception {
2179         for (Pair pref = mPref; pref != null; pref = pref.next) {
2180             if (pref.eqpref(qname) == true) {
2181                 return pref.value;
2182             }
2183         }
2184         if (qname[0] == 1) {  // QNames like ':local'
2185             for (Pair pref = mPref; pref != null; pref = pref.next) {
2186                 if (pref.chars[0] == 0) {
2187                     return pref.value;
2188                 }
2189             }
2190         }
2191         panic(FAULT);
2192         return null;
2193     }
2194 
2195     /**
2196      * Skips xml white space characters.
2197      *
2198      * This method skips white space characters (' ', '\t', '\n', '\r') and
2199      * looks ahead not white space character.
2200      *
2201      * @return The first not white space look ahead character.
2202      * @exception IOException
2203      */
2204     protected char wsskip()
2205             throws IOException {
2206         char ch;
2207         while (true) {
2208             //          Read next character
2209             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2210             if (ch < 0x80) {
2211                 if (nmttyp[ch] != 3) // [ \t\n\r]
2212                 {
2213                     break;
2214                 }
2215             } else {
2216                 break;
2217             }
2218         }
2219         mChIdx--;  // bkch();
2220         return ch;
2221     }
2222 
2223     /**
2224      * Reports document type.
2225      *
2226      * @param name The name of the entity.
2227      * @param pubid The public identifier of the entity or <code>null</code>.
2228      * @param sysid The system identifier of the entity or <code>null</code>.
2229      */
2230     protected abstract void docType(String name, String pubid, String sysid)
2231             throws SAXException;
2232 
2233     /**
2234      * Reports a comment.
2235      *
2236      * @param text The comment text starting from first charcater.
2237      * @param length The number of characters in comment.
2238      */
2239     protected abstract void comm(char[] text, int length);
2240 
2241     /**
2242      * Reports a processing instruction.
2243      *
2244      * @param target The processing instruction target name.
2245      * @param body The processing instruction body text.
2246      */
2247     protected abstract void pi(String target, String body)
2248             throws Exception;
2249 
2250     /**
2251      * Reports new namespace prefix. The Namespace prefix (
2252      * <code>mPref.name</code>) being declared and the Namespace URI (
2253      * <code>mPref.value</code>) the prefix is mapped to. An empty string is
2254      * used for the default element namespace, which has no prefix.
2255      */
2256     protected abstract void newPrefix()
2257             throws Exception;
2258 
2259     /**
2260      * Reports skipped entity name.
2261      *
2262      * @param name The entity name.
2263      */
2264     protected abstract void skippedEnt(String name)
2265             throws Exception;
2266 
2267     /**
2268      * Returns an
2269      * <code>InputSource</code> for specified entity or
2270      * <code>null</code>.
2271      *
2272      * @param name The name of the entity.
2273      * @param pubid The public identifier of the entity.
2274      * @param sysid The system identifier of the entity.
2275      */
2276     protected abstract InputSource resolveEnt(
2277             String name, String pubid, String sysid)
2278             throws Exception;
2279 
2280     /**
2281      * Reports notation declaration.
2282      *
2283      * @param name The notation's name.
2284      * @param pubid The notation's public identifier, or null if none was given.
2285      * @param sysid The notation's system identifier, or null if none was given.
2286      */
2287     protected abstract void notDecl(String name, String pubid, String sysid)
2288             throws Exception;
2289 
2290     /**
2291      * Reports unparsed entity name.
2292      *
2293      * @param name The unparsed entity's name.
2294      * @param pubid The entity's public identifier, or null if none was given.
2295      * @param sysid The entity's system identifier.
2296      * @param notation The name of the associated notation.
2297      */
2298     protected abstract void unparsedEntDecl(
2299             String name, String pubid, String sysid, String notation)
2300             throws Exception;
2301 
2302     /**
2303      * Notifies the handler about fatal parsing error.
2304      *
2305      * @param msg The problem description message.
2306      */
2307     protected abstract void panic(String msg)
2308             throws Exception;
2309 
2310     /**
2311      * Reads a qualified xml name.
2312      *
2313      * This is low level routine which leaves a qName in the buffer. The
2314      * characters of a qualified name is an array of characters. The first
2315      * (chars[0]) character is the index of the colon character which separates
2316      * the prefix from the local name. If the index is zero, the name does not
2317      * contain separator or the parser works in the namespace unaware mode. The
2318      * length of qualified name is the length of the array minus one.
2319      *
2320      * @param ns The true value turns namespace conformance on.
2321      * @exception Exception is parser specific exception form panic method.
2322      * @exception IOException
2323      */
2324     private void bname(boolean ns)
2325             throws Exception {
2326         char ch;
2327         char type;
2328         mBuffIdx++;  // allocate a char for colon offset
2329         int bqname = mBuffIdx;
2330         int bcolon = bqname;
2331         int bchidx = bqname + 1;
2332         int bstart = bchidx;
2333         int cstart = mChIdx;
2334         short st = (short) ((ns == true) ? 0 : 2);
2335         while (true) {
2336             //          Read next character
2337             if (mChIdx >= mChLen) {
2338                 bcopy(cstart, bstart);
2339                 getch();
2340                 mChIdx--;  // bkch();
2341                 cstart = mChIdx;
2342                 bstart = bchidx;
2343             }
2344             ch = mChars[mChIdx++];
2345             type = (char) 0;  // [X]
2346             if (ch < 0x80) {
2347                 type = (char) nmttyp[ch];
2348             } else if (ch == EOS) {
2349                 panic(FAULT);
2350             }
2351             //          Parse QName
2352             switch (st) {
2353                 case 0:     // read the first char of the prefix
2354                 case 2:     // read the first char of the suffix
2355                     switch (type) {
2356                         case 0:  // [aA_X]
2357                             bchidx++;  // append char to the buffer
2358                             st++;      // (st == 0)? 1: 3;
2359                             break;
2360 
2361                         case 1:  // [:]
2362                             mChIdx--;  // bkch();
2363                             st++;      // (st == 0)? 1: 3;
2364                             break;
2365 
2366                         default:
2367                             panic(FAULT);
2368                     }
2369                     break;
2370 
2371                 case 1:     // read the prefix
2372                 case 3:     // read the suffix
2373                     switch (type) {
2374                         case 0:  // [aA_X]
2375                         case 2:  // [.-d]
2376                             bchidx++;  // append char to the buffer
2377                             break;
2378 
2379                         case 1:  // [:]
2380                             bchidx++;  // append char to the buffer
2381                             if (ns == true) {
2382                                 if (bcolon != bqname) {
2383                                     panic(FAULT);  // it must be only one colon
2384                                 }
2385                                 bcolon = bchidx - 1;
2386                                 if (st == 1) {
2387                                     st = 2;
2388                                 }
2389                             }
2390                             break;
2391 
2392                         default:
2393                             mChIdx--;  // bkch();
2394                             bcopy(cstart, bstart);
2395                             mBuff[bqname] = (char) (bcolon - bqname);
2396                             return;
2397                     }
2398                     break;
2399 
2400                 default:
2401                     panic(FAULT);
2402             }
2403         }
2404     }
2405 
2406     /**
2407      * Reads a nmtoken.
2408      *
2409      * This is low level routine which leaves a nmtoken in the buffer.
2410      *
2411      * @exception Exception is parser specific exception form panic method.
2412      * @exception IOException
2413      */
2414     @SuppressWarnings("fallthrough")
2415     private void bntok() throws Exception {
2416         char ch;
2417         mBuffIdx = -1;
2418         bappend((char) 0);  // default offset to the colon char
2419         while (true) {
2420             ch = getch();
2421             switch (chtyp(ch)) {
2422                 case 'a':
2423                 case 'A':
2424                 case 'd':
2425                 case '.':
2426                 case ':':
2427                 case '-':
2428                 case '_':
2429                 case 'X':
2430                     bappend(ch);
2431                     break;
2432 
2433                 case 'Z':
2434                     panic(FAULT);
2435 
2436                 default:
2437                     bkch();
2438                     return;
2439             }
2440         }
2441     }
2442 
2443     /**
2444      * Recognizes a keyword.
2445      *
2446      * This is low level routine which recognizes one of keywords in the buffer.
2447      * Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN -
2448      * t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED -
2449      * Q IMPLIED - I FIXED - F
2450      *
2451      * @return an id of a keyword or '?'.
2452      * @exception Exception is parser specific exception form panic method.
2453      * @exception IOException
2454      */
2455     private char bkeyword()
2456             throws Exception {
2457         String str = new String(mBuff, 1, mBuffIdx);
2458         switch (str.length()) {
2459             case 2:  // ID
2460                 return ("ID".equals(str) == true) ? 'i' : '?';
2461 
2462             case 5:  // IDREF, CDATA, FIXED
2463                 switch (mBuff[1]) {
2464                     case 'I':
2465                         return ("IDREF".equals(str) == true) ? 'r' : '?';
2466                     case 'C':
2467                         return ("CDATA".equals(str) == true) ? 'c' : '?';
2468                     case 'F':
2469                         return ("FIXED".equals(str) == true) ? 'F' : '?';
2470                     default:
2471                         break;
2472                 }
2473                 break;
2474 
2475             case 6:  // IDREFS, ENTITY
2476                 switch (mBuff[1]) {
2477                     case 'I':
2478                         return ("IDREFS".equals(str) == true) ? 'R' : '?';
2479                     case 'E':
2480                         return ("ENTITY".equals(str) == true) ? 'n' : '?';
2481                     default:
2482                         break;
2483                 }
2484                 break;
2485 
2486             case 7:  // NMTOKEN, IMPLIED, ATTLIST, ELEMENT
2487                 switch (mBuff[1]) {
2488                     case 'I':
2489                         return ("IMPLIED".equals(str) == true) ? 'I' : '?';
2490                     case 'N':
2491                         return ("NMTOKEN".equals(str) == true) ? 't' : '?';
2492                     case 'A':
2493                         return ("ATTLIST".equals(str) == true) ? 'a' : '?';
2494                     case 'E':
2495                         return ("ELEMENT".equals(str) == true) ? 'e' : '?';
2496                     default:
2497                         break;
2498                 }
2499                 break;
2500 
2501             case 8:  // ENTITIES, NMTOKENS, NOTATION, REQUIRED
2502                 switch (mBuff[2]) {
2503                     case 'N':
2504                         return ("ENTITIES".equals(str) == true) ? 'N' : '?';
2505                     case 'M':
2506                         return ("NMTOKENS".equals(str) == true) ? 'T' : '?';
2507                     case 'O':
2508                         return ("NOTATION".equals(str) == true) ? 'o' : '?';
2509                     case 'E':
2510                         return ("REQUIRED".equals(str) == true) ? 'Q' : '?';
2511                     default:
2512                         break;
2513                 }
2514                 break;
2515 
2516             default:
2517                 break;
2518         }
2519         return '?';
2520     }
2521 
2522     /**
2523      * Reads a single or double quotted string in to the buffer.
2524      *
2525      * This method resolves entities inside a string unless the parser parses
2526      * DTD.
2527      *
2528      * @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' -
2529      * not an attribute value; 'd' - in DTD context.
2530      * @exception Exception is parser specific exception form panic method.
2531      * @exception IOException
2532      */
2533     @SuppressWarnings("fallthrough")
2534     private void bqstr(char flag) throws Exception {
2535         Input inp = mInp;  // remember the original input
2536         mBuffIdx = -1;
2537         bappend((char) 0);  // default offset to the colon char
2538         char ch;
2539         for (short st = 0; st >= 0;) {
2540             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2541             switch (st) {
2542                 case 0:     // read a single or double quote
2543                     switch (ch) {
2544                         case ' ':
2545                         case '\n':
2546                         case '\r':
2547                         case '\t':
2548                             break;
2549 
2550                         case '\'':
2551                             st = 2;  // read a single quoted string
2552                             break;
2553 
2554                         case '\"':
2555                             st = 3;  // read a double quoted string
2556                             break;
2557 
2558                         default:
2559                             panic(FAULT);
2560                             break;
2561                     }
2562                     break;
2563 
2564                 case 2:     // read a single quoted string
2565                 case 3:     // read a double quoted string
2566                     switch (ch) {
2567                         case '\'':
2568                             if ((st == 2) && (mInp == inp)) {
2569                                 st = -1;
2570                             } else {
2571                                 bappend(ch);
2572                             }
2573                             break;
2574 
2575                         case '\"':
2576                             if ((st == 3) && (mInp == inp)) {
2577                                 st = -1;
2578                             } else {
2579                                 bappend(ch);
2580                             }
2581                             break;
2582 
2583                         case '&':
2584                             if (flag != 'd') {
2585                                 ent(flag);
2586                             } else {
2587                                 bappend(ch);
2588                             }
2589                             break;
2590 
2591                         case '%':
2592                             if (flag == 'd') {
2593                                 pent('-');
2594                             } else {
2595                                 bappend(ch);
2596                             }
2597                             break;
2598 
2599                         case '<':
2600                             if ((flag == '-') || (flag == 'd')) {
2601                                 bappend(ch);
2602                             } else {
2603                                 panic(FAULT);
2604                             }
2605                             break;
2606 
2607                         case EOS:               // EOS before single/double quote
2608                             panic(FAULT);
2609 
2610                         case '\r':     // EOL processing [#2.11 & #3.3.3]
2611                             if (flag != ' ' && mInp.next == null) {
2612                                 if (getch() != '\n') {
2613                                     bkch();
2614                                 }
2615                                 ch = '\n';
2616                             }
2617                         default:
2618                             bappend(ch, flag);
2619                             break;
2620                     }
2621                     break;
2622 
2623                 default:
2624                     panic(FAULT);
2625             }
2626         }
2627         //              There is maximum one space at the end of the string in
2628         //              i-mode (non CDATA normalization) and it has to be removed.
2629         if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) {
2630             mBuffIdx -= 1;
2631         }
2632     }
2633 
2634     /**
2635      * Reports characters and empties the parser's buffer. This method is called
2636      * only if parser is going to return control to the main loop. This means
2637      * that this method may use parser buffer to report white space without
2638      * copying characters to temporary buffer.
2639      */
2640     protected abstract void bflash()
2641             throws Exception;
2642 
2643     /**
2644      * Reports white space characters and empties the parser's buffer. This
2645      * method is called only if parser is going to return control to the main
2646      * loop. This means that this method may use parser buffer to report white
2647      * space without copying characters to temporary buffer.
2648      */
2649     protected abstract void bflash_ws()
2650             throws Exception;
2651 
2652     /**
2653      * Appends a character to parser's buffer with normalization.
2654      *
2655      * @param ch The character to append to the buffer.
2656      * @param mode The normalization mode.
2657      */
2658     private void bappend(char ch, char mode) {
2659         //              This implements attribute value normalization as
2660         //              described in the XML specification [#3.3.3].
2661         switch (mode) {
2662             case 'i':  // non CDATA normalization
2663                 switch (ch) {
2664                     case ' ':
2665                     case '\n':
2666                     case '\r':
2667                     case '\t':
2668                         if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) {
2669                             bappend(' ');
2670                         }
2671                         return;
2672 
2673                     default:
2674                         break;
2675                 }
2676                 break;
2677 
2678             case 'c':  // CDATA normalization
2679                 switch (ch) {
2680                     case '\n':
2681                     case '\r':
2682                     case '\t':
2683                         ch = ' ';
2684                         break;
2685 
2686                     default:
2687                         break;
2688                 }
2689                 break;
2690 
2691             default:  // no normalization
2692                 break;
2693         }
2694         mBuffIdx++;
2695         if (mBuffIdx < mBuff.length) {
2696             mBuff[mBuffIdx] = ch;
2697         } else {
2698             mBuffIdx--;
2699             bappend(ch);
2700         }
2701     }
2702 
2703     /**
2704      * Appends a character to parser's buffer.
2705      *
2706      * @param ch The character to append to the buffer.
2707      */
2708     private void bappend(char ch) {
2709         try {
2710             mBuff[++mBuffIdx] = ch;
2711         } catch (Exception exp) {
2712             //          Double the buffer size
2713             char buff[] = new char[mBuff.length << 1];
2714             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2715             mBuff = buff;
2716             mBuff[mBuffIdx] = ch;
2717         }
2718     }
2719 
2720     /**
2721      * Appends (mChIdx - cidx) characters from character buffer (mChars) to
2722      * parser's buffer (mBuff).
2723      *
2724      * @param cidx The character buffer (mChars) start index.
2725      * @param bidx The parser buffer (mBuff) start index.
2726      */
2727     private void bcopy(int cidx, int bidx) {
2728         int length = mChIdx - cidx;
2729         if ((bidx + length + 1) >= mBuff.length) {
2730             //          Expand the buffer
2731             char buff[] = new char[mBuff.length + length];
2732             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2733             mBuff = buff;
2734         }
2735         System.arraycopy(mChars, cidx, mBuff, bidx, length);
2736         mBuffIdx += length;
2737     }
2738 
2739     /**
2740      * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
2741      * <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe
2742      * 0x100 is a built-in entity replacement character.
2743      *
2744      * @param ch the next character of an entity name.
2745      */
2746     @SuppressWarnings("fallthrough")
2747     private void eappend(char ch) {
2748         switch (mESt) {
2749             case 0x100:  // "l" or "g" or "a" or "q"
2750                 switch (ch) {
2751                     case 'l':
2752                         mESt = 0x101;
2753                         break;
2754                     case 'g':
2755                         mESt = 0x102;
2756                         break;
2757                     case 'a':
2758                         mESt = 0x103;
2759                         break;
2760                     case 'q':
2761                         mESt = 0x107;
2762                         break;
2763                     default:
2764                         mESt = 0x200;
2765                         break;
2766                 }
2767                 break;
2768 
2769             case 0x101:  // "lt"
2770                 mESt = (ch == 't') ? '<' : (char) 0x200;
2771                 break;
2772 
2773             case 0x102:  // "gt"
2774                 mESt = (ch == 't') ? '>' : (char) 0x200;
2775                 break;
2776 
2777             case 0x103:  // "am" or "ap"
2778                 switch (ch) {
2779                     case 'm':
2780                         mESt = 0x104;
2781                         break;
2782                     case 'p':
2783                         mESt = 0x105;
2784                         break;
2785                     default:
2786                         mESt = 0x200;
2787                         break;
2788                 }
2789                 break;
2790 
2791             case 0x104:  // "amp"
2792                 mESt = (ch == 'p') ? '&' : (char) 0x200;
2793                 break;
2794 
2795             case 0x105:  // "apo"
2796                 mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200;
2797                 break;
2798 
2799             case 0x106:  // "apos"
2800                 mESt = (ch == 's') ? '\'' : (char) 0x200;
2801                 break;
2802 
2803             case 0x107:  // "qu"
2804                 mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200;
2805                 break;
2806 
2807             case 0x108:  // "quo"
2808                 mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200;
2809                 break;
2810 
2811             case 0x109:  // "quot"
2812                 mESt = (ch == 't') ? '\"' : (char) 0x200;
2813                 break;
2814 
2815             case '<':   // "lt"
2816             case '>':   // "gt"
2817             case '&':   // "amp"
2818             case '\'':  // "apos"
2819             case '\"':  // "quot"
2820                 mESt = 0x200;
2821             default:
2822                 break;
2823         }
2824     }
2825 
2826     /**
2827      * Sets up a new input source on the top of the input stack. Note, the first
2828      * byte returned by the entity's byte stream has to be the first byte in the
2829      * entity. However, the parser does not expect the byte order mask in both
2830      * cases when encoding is provided by the input source.
2831      *
2832      * @param is A new input source to set up.
2833      * @exception IOException If any IO errors occur.
2834      * @exception Exception is parser specific exception form panic method.
2835      */
2836     protected void setinp(InputSource is)
2837             throws Exception {
2838         Reader reader = null;
2839         mChIdx = 0;
2840         mChLen = 0;
2841         mChars = mInp.chars;
2842         mInp.src = null;
2843         if (mPh < PH_DOC_START) {
2844             mIsSAlone = false;  // default [#2.9]
2845         }
2846         mIsSAloneSet = false;
2847         if (is.getCharacterStream() != null) {
2848             //          Ignore encoding in the xml text decl.
2849             reader = is.getCharacterStream();
2850             xml(reader);
2851         } else if (is.getByteStream() != null) {
2852             String expenc;
2853             if (is.getEncoding() != null) {
2854                 //              Ignore encoding in the xml text decl.
2855                 expenc = is.getEncoding().toUpperCase();
2856                 if (expenc.equals("UTF-16")) {
2857                     reader = bom(is.getByteStream(), 'U');  // UTF-16 [#4.3.3]
2858                 } else {
2859                     reader = enc(expenc, is.getByteStream());
2860                 }
2861                 xml(reader);
2862             } else {
2863                 //              Get encoding from BOM or the xml text decl.
2864                 reader = bom(is.getByteStream(), ' ');
2865                 /**
2866                  * [#4.3.3] requires BOM for UTF-16, however, it's not uncommon
2867                  * that it may be missing. A mature technique exists in Xerces
2868                  * to further check for possible UTF-16 encoding
2869                  */
2870                 if (reader == null) {
2871                     reader = utf16(is.getByteStream());
2872                 }
2873 
2874                 if (reader == null) {
2875                     //          Encoding is defined by the xml text decl.
2876                     reader = enc("UTF-8", is.getByteStream());
2877                     expenc = xml(reader);
2878                     if (!expenc.equals("UTF-8")) {
2879                         if (expenc.startsWith("UTF-16")) {
2880                             panic(FAULT);  // UTF-16 must have BOM [#4.3.3]
2881                         }
2882                         reader = enc(expenc, is.getByteStream());
2883                     }
2884                 } else {
2885                     //          Encoding is defined by the BOM.
2886                     xml(reader);
2887                 }
2888             }
2889         } else {
2890             //          There is no support for public/system identifiers.
2891             panic(FAULT);
2892         }
2893         mInp.src = reader;
2894         mInp.pubid = is.getPublicId();
2895         mInp.sysid = is.getSystemId();
2896     }
2897 
2898     /**
2899      * Determines the entity encoding.
2900      *
2901      * This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the
2902      * first byte returned by the entity's byte stream has to be the first byte
2903      * in the entity. Also, there is no support for UCS-4.
2904      *
2905      * @param is A byte stream of the entity.
2906      * @param hint An encoding hint, character U means UTF-16.
2907      * @return a reader constructed from the BOM or UTF-8 by default.
2908      * @exception Exception is parser specific exception form panic method.
2909      * @exception IOException
2910      */
2911     private Reader bom(InputStream is, char hint)
2912             throws Exception {
2913         int val = is.read();
2914         switch (val) {
2915             case 0xef:     // UTF-8
2916                 if (hint == 'U') // must be UTF-16
2917                 {
2918                     panic(FAULT);
2919                 }
2920                 if (is.read() != 0xbb) {
2921                     panic(FAULT);
2922                 }
2923                 if (is.read() != 0xbf) {
2924                     panic(FAULT);
2925                 }
2926                 return new ReaderUTF8(is);
2927 
2928             case 0xfe:     // UTF-16, big-endian
2929                 if (is.read() != 0xff) {
2930                     panic(FAULT);
2931                 }
2932                 return new ReaderUTF16(is, 'b');
2933 
2934             case 0xff:     // UTF-16, little-endian
2935                 if (is.read() != 0xfe) {
2936                     panic(FAULT);
2937                 }
2938                 return new ReaderUTF16(is, 'l');
2939 
2940             case -1:
2941                 mChars[mChIdx++] = EOS;
2942                 return new ReaderUTF8(is);
2943 
2944             default:
2945                 if (hint == 'U') // must be UTF-16
2946                 {
2947                     panic(FAULT);
2948                 }
2949                 //              Read the rest of UTF-8 character
2950                 switch (val & 0xf0) {
2951                     case 0xc0:
2952                     case 0xd0:
2953                         mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f));
2954                         break;
2955 
2956                     case 0xe0:
2957                         mChars[mChIdx++] = (char) (((val & 0x0f) << 12)
2958                                 | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
2959                         break;
2960 
2961                     case 0xf0:  // UCS-4 character
2962                         throw new UnsupportedEncodingException();
2963 
2964                     default:
2965                         mChars[mChIdx++] = (char) val;
2966                         break;
2967                 }
2968                 return null;
2969         }
2970     }
2971 
2972 
2973     /**
2974      * Using a mature technique from Xerces, this method checks further after
2975      * the bom method above to see if the encoding is UTF-16
2976      *
2977      * @param is A byte stream of the entity.
2978      * @return a reader, may be null
2979      * @exception Exception is parser specific exception form panic method.
2980      * @exception IOException
2981      */
2982     private Reader utf16(InputStream is)
2983             throws Exception {
2984         if (mChIdx != 0) {
2985             //The bom method has read ONE byte into the buffer.
2986             byte b0 = (byte)mChars[0];
2987             if (b0 == 0x00 || b0 == 0x3C) {
2988                 int b1 = is.read();
2989                 int b2 = is.read();
2990                 int b3 = is.read();
2991                 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2992                     // UTF-16, big-endian, no BOM
2993                     mChars[0] = (char)(b1);
2994                     mChars[mChIdx++] = (char)(b3);
2995                     return new ReaderUTF16(is, 'b');
2996                 } else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2997                     // UTF-16, little-endian, no BOM
2998                     mChars[0] = (char)(b0);
2999                     mChars[mChIdx++] = (char)(b2);
3000                     return new ReaderUTF16(is, 'l');
3001                 } else {
3002                     /**not every InputStream supports reset, so we have to remember
3003                      * the state for further parsing
3004                     **/
3005                     mChars[0] = (char)(b0);
3006                     mChars[mChIdx++] = (char)(b1);
3007                     mChars[mChIdx++] = (char)(b2);
3008                     mChars[mChIdx++] = (char)(b3);
3009                 }
3010 
3011             }
3012         }
3013         return null;
3014     }
3015     /**
3016      * Parses the xml text declaration.
3017      *
3018      * This method gets encoding from the xml text declaration [#4.3.1] if any.
3019      * The method assumes the buffer (mChars) is big enough to accommodate whole
3020      * xml text declaration.
3021      *
3022      * @param reader is entity reader.
3023      * @return The xml text declaration encoding or default UTF-8 encoding.
3024      * @exception Exception is parser specific exception form panic method.
3025      * @exception IOException
3026      */
3027     private String xml(Reader reader)
3028             throws Exception {
3029         String str = null;
3030         String enc = "UTF-8";
3031         char ch;
3032         int val;
3033         short st = 0;
3034         int byteRead =  mChIdx; //number of bytes read prior to entering this method
3035 
3036         while (st >= 0 && mChIdx < mChars.length) {
3037             if (st < byteRead) {
3038                 ch = mChars[st];
3039             } else {
3040                 ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
3041                 mChars[mChIdx++] = ch;
3042             }
3043 
3044             switch (st) {
3045                 case 0:     // read '<' of xml declaration
3046                     switch (ch) {
3047                         case '<':
3048                             st = 1;
3049                             break;
3050 
3051                         case 0xfeff:    // the byte order mask
3052                             ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
3053                             mChars[mChIdx - 1] = ch;
3054                             st = (short) ((ch == '<') ? 1 : -1);
3055                             break;
3056 
3057                         default:
3058                             st = -1;
3059                             break;
3060                     }
3061                     break;
3062 
3063                 case 1:     // read '?' of xml declaration [#4.3.1]
3064                     st = (short) ((ch == '?') ? 2 : -1);
3065                     break;
3066 
3067                 case 2:     // read 'x' of xml declaration [#4.3.1]
3068                     st = (short) ((ch == 'x') ? 3 : -1);
3069                     break;
3070 
3071                 case 3:     // read 'm' of xml declaration [#4.3.1]
3072                     st = (short) ((ch == 'm') ? 4 : -1);
3073                     break;
3074 
3075                 case 4:     // read 'l' of xml declaration [#4.3.1]
3076                     st = (short) ((ch == 'l') ? 5 : -1);
3077                     break;
3078 
3079                 case 5:     // read white space after 'xml'
3080                     switch (ch) {
3081                         case ' ':
3082                         case '\t':
3083                         case '\r':
3084                         case '\n':
3085                             st = 6;
3086                             break;
3087 
3088                         default:
3089                             st = -1;
3090                             break;
3091                     }
3092                     break;
3093 
3094                 case 6:     // read content of xml declaration
3095                     switch (ch) {
3096                         case '?':
3097                             st = 7;
3098                             break;
3099 
3100                         case EOS:
3101                             st = -2;
3102                             break;
3103 
3104                         default:
3105                             break;
3106                     }
3107                     break;
3108 
3109                 case 7:     // read '>' after '?' of xml declaration
3110                     switch (ch) {
3111                         case '>':
3112                         case EOS:
3113                             st = -2;
3114                             break;
3115 
3116                         default:
3117                             st = 6;
3118                             break;
3119                     }
3120                     break;
3121 
3122                 default:
3123                     panic(FAULT);
3124                     break;
3125             }
3126         }
3127         mChLen = mChIdx;
3128         mChIdx = 0;
3129         //              If there is no xml text declaration, the encoding is default.
3130         if (st == -1) {
3131             return enc;
3132         }
3133         mChIdx = 5;  // the first white space after "<?xml"
3134         //              Parse the xml text declaration
3135         for (st = 0; st >= 0;) {
3136             ch = getch();
3137             switch (st) {
3138                 case 0:     // skip spaces after the xml declaration name
3139                     if (chtyp(ch) != ' ') {
3140                         bkch();
3141                         st = 1;
3142                     }
3143                     break;
3144 
3145                 case 1:     // read xml declaration version
3146                 case 2:     // read xml declaration encoding or standalone
3147                 case 3:     // read xml declaration standalone
3148                     switch (chtyp(ch)) {
3149                         case 'a':
3150                         case 'A':
3151                         case '_':
3152                             bkch();
3153                             str = name(false).toLowerCase();
3154                             if ("version".equals(str) == true) {
3155                                 if (st != 1) {
3156                                     panic(FAULT);
3157                                 }
3158                                 if ("1.0".equals(eqstr('=')) != true) {
3159                                     panic(FAULT);
3160                                 }
3161                                 mInp.xmlver = 0x0100;
3162                                 st = 2;
3163                             } else if ("encoding".equals(str) == true) {
3164                                 if (st != 2) {
3165                                     panic(FAULT);
3166                                 }
3167                                 mInp.xmlenc = eqstr('=').toUpperCase();
3168                                 enc = mInp.xmlenc;
3169                                 st = 3;
3170                             } else if ("standalone".equals(str) == true) {
3171                                 if ((st == 1) || (mPh >= PH_DOC_START)) // [#4.3.1]
3172                                 {
3173                                     panic(FAULT);
3174                                 }
3175                                 str = eqstr('=').toLowerCase();
3176                                 //              Check the 'standalone' value and use it [#5.1]
3177                                 if (str.equals("yes") == true) {
3178                                     mIsSAlone = true;
3179                                 } else if (str.equals("no") == true) {
3180                                     mIsSAlone = false;
3181                                 } else {
3182                                     panic(FAULT);
3183                                 }
3184                                 mIsSAloneSet = true;
3185                                 st = 4;
3186                             } else {
3187                                 panic(FAULT);
3188                             }
3189                             break;
3190 
3191                         case ' ':
3192                             break;
3193 
3194                         case '?':
3195                             if (st == 1) {
3196                                 panic(FAULT);
3197                             }
3198                             bkch();
3199                             st = 4;
3200                             break;
3201 
3202                         default:
3203                             panic(FAULT);
3204                     }
3205                     break;
3206 
3207                 case 4:     // end of xml declaration
3208                     switch (chtyp(ch)) {
3209                         case '?':
3210                             if (getch() != '>') {
3211                                 panic(FAULT);
3212                             }
3213                             if (mPh <= PH_DOC_START) {
3214                                 mPh = PH_MISC_DTD;  // misc before DTD
3215                             }
3216                             st = -1;
3217                             break;
3218 
3219                         case ' ':
3220                             break;
3221 
3222                         default:
3223                             panic(FAULT);
3224                     }
3225                     break;
3226 
3227                 default:
3228                     panic(FAULT);
3229             }
3230         }
3231         return enc;
3232     }
3233 
3234     /**
3235      * Sets up the document reader.
3236      *
3237      * @param name an encoding name.
3238      * @param is the document byte input stream.
3239      * @return a reader constructed from encoding name and input stream.
3240      * @exception UnsupportedEncodingException
3241      */
3242     private Reader enc(String name, InputStream is)
3243             throws UnsupportedEncodingException {
3244         //              DO NOT CLOSE current reader if any!
3245         if (name.equals("UTF-8")) {
3246             return new ReaderUTF8(is);
3247         } else if (name.equals("UTF-16LE")) {
3248             return new ReaderUTF16(is, 'l');
3249         } else if (name.equals("UTF-16BE")) {
3250             return new ReaderUTF16(is, 'b');
3251         } else {
3252             return new InputStreamReader(is, name);
3253         }
3254     }
3255 
3256     /**
3257      * Sets up current input on the top of the input stack.
3258      *
3259      * @param inp A new input to set up.
3260      */
3261     protected void push(Input inp) {
3262         mInp.chLen = mChLen;
3263         mInp.chIdx = mChIdx;
3264         inp.next = mInp;
3265         mInp = inp;
3266         mChars = inp.chars;
3267         mChLen = inp.chLen;
3268         mChIdx = inp.chIdx;
3269     }
3270 
3271     /**
3272      * Restores previous input on the top of the input stack.
3273      */
3274     protected void pop() {
3275         if (mInp.src != null) {
3276             try {
3277                 mInp.src.close();
3278             } catch (IOException ioe) {
3279             }
3280             mInp.src = null;
3281         }
3282         mInp = mInp.next;
3283         if (mInp != null) {
3284             mChars = mInp.chars;
3285             mChLen = mInp.chLen;
3286             mChIdx = mInp.chIdx;
3287         } else {
3288             mChars = null;
3289             mChLen = 0;
3290             mChIdx = 0;
3291         }
3292     }
3293 
3294     /**
3295      * Maps a character to its type.
3296      *
3297      * Possible character type values are:
3298      * <ul>
3299      * <li>' ' - for any kind of whitespace character;</li>
3300      * <li>'a' - for any lower case alphabetical character value;</li>
3301      * <li>'A' - for any upper case alphabetical character value;</li>
3302      * <li>'d' - for any decimal digit character value;</li>
3303      * <li>'z' - for any character less than ' ' except '\t', '\n', '\r';</li>
3304      * <li>'X' - for any not ASCII character;</li>
3305      * <li>'Z' - for EOS character.</li>
3306      * </ul>
3307      * An ASCII (7 bit) character which does not fall in any category
3308      * listed above is mapped to itself.
3309      *
3310      * @param ch The character to map.
3311      * @return The type of character.
3312      */
3313     protected char chtyp(char ch) {
3314         if (ch < 0x80) {
3315             return (char) asctyp[ch];
3316         }
3317         return (ch != EOS) ? 'X' : 'Z';
3318     }
3319 
3320     /**
3321      * Retrives the next character in the document.
3322      *
3323      * @return The next character in the document.
3324      */
3325     protected char getch()
3326             throws IOException {
3327         if (mChIdx >= mChLen) {
3328             if (mInp.src == null) {
3329                 pop();  // remove internal entity
3330                 return getch();
3331             }
3332             //          Read new portion of the document characters
3333             int Num = mInp.src.read(mChars, 0, mChars.length);
3334             if (Num < 0) {
3335                 if (mInp != mDoc) {
3336                     pop();  // restore the previous input
3337                     return getch();
3338                 } else {
3339                     mChars[0] = EOS;
3340                     mChLen = 1;
3341                 }
3342             } else {
3343                 mChLen = Num;
3344             }
3345             mChIdx = 0;
3346         }
3347         return mChars[mChIdx++];
3348     }
3349 
3350     /**
3351      * Puts back the last read character.
3352      *
3353      * This method <strong>MUST NOT</strong> be called more then once after each
3354      * call of {@link #getch getch} method.
3355      */
3356     protected void bkch()
3357             throws Exception {
3358         if (mChIdx <= 0) {
3359             panic(FAULT);
3360         }
3361         mChIdx--;
3362     }
3363 
3364     /**
3365      * Sets the current character.
3366      *
3367      * @param ch The character to set.
3368      */
3369     protected void setch(char ch) {
3370         mChars[mChIdx] = ch;
3371     }
3372 
3373     /**
3374      * Finds a pair in the pair chain by a qualified name.
3375      *
3376      * @param chain The first element of the chain of pairs.
3377      * @param qname The qualified name.
3378      * @return A pair with the specified qualified name or null.
3379      */
3380     protected Pair find(Pair chain, char[] qname) {
3381         for (Pair pair = chain; pair != null; pair = pair.next) {
3382             if (pair.eqname(qname) == true) {
3383                 return pair;
3384             }
3385         }
3386         return null;
3387     }
3388 
3389     /**
3390      * Provedes an instance of a pair.
3391      *
3392      * @param next The reference to a next pair.
3393      * @return An instance of a pair.
3394      */
3395     protected Pair pair(Pair next) {
3396         Pair pair;
3397 
3398         if (mDltd != null) {
3399             pair = mDltd;
3400             mDltd = pair.next;
3401         } else {
3402             pair = new Pair();
3403         }
3404         pair.next = next;
3405 
3406         return pair;
3407     }
3408 
3409     /**
3410      * Deletes an instance of a pair.
3411      *
3412      * @param pair The pair to delete.
3413      * @return A reference to the next pair in a chain.
3414      */
3415     protected Pair del(Pair pair) {
3416         Pair next = pair.next;
3417 
3418         pair.name = null;
3419         pair.value = null;
3420         pair.chars = null;
3421         pair.list = null;
3422         pair.next = mDltd;
3423         mDltd = pair;
3424 
3425         return next;
3426     }
3427 }