New src/share/classes/jdk/internal/util/xml/impl/Parser.java

   1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.internal.util.xml.impl;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import java.io.InputStreamReader;
  31 import java.io.Reader;
  32 import java.io.UnsupportedEncodingException;
  33 import java.util.HashMap;
  34 import java.util.Map;
  35 import jdk.internal.org.xml.sax.InputSource;
  36 import jdk.internal.org.xml.sax.SAXException;
  37 
  38 /**
  39  * XML non-validating parser engine.
  40  */
  41 public abstract class Parser {
  42 
  43     public final static String FAULT = "";
  44     protected final static int BUFFSIZE_READER = 512;
  45     protected final static int BUFFSIZE_PARSER = 128;
  46     /**
  47      * The end of stream character.
  48      */
  49     public final static char EOS = 0xffff;
  50     private Pair mNoNS; // there is no namespace
  51     private Pair mXml;  // the xml namespace
  52     private Map<String, Input> mEnt;  // the entities look up table
  53     private Map<String, Input> mPEnt; // the parmeter entities look up table
  54     protected boolean mIsSAlone;     // xml decl standalone flag
  55     protected boolean mIsSAloneSet;  // standalone is explicitely set
  56     protected boolean mIsNSAware;    // if true - namespace aware mode
  57     protected int mPh;  // current phase of document processing
  58     protected final static int PH_BEFORE_DOC = -1;  // before parsing
  59     protected final static int PH_DOC_START = 0;   // document start
  60     protected final static int PH_MISC_DTD = 1;   // misc before DTD
  61     protected final static int PH_DTD = 2;   // DTD
  62     protected final static int PH_DTD_MISC = 3;   // misc after DTD
  63     protected final static int PH_DOCELM = 4;   // document's element
  64     protected final static int PH_DOCELM_MISC = 5;   // misc after element
  65     protected final static int PH_AFTER_DOC = 6;   // after parsing
  66     protected int mEvt;  // current event type
  67     protected final static int EV_NULL = 0;   // unknown
  68     protected final static int EV_ELM = 1;   // empty element
  69     protected final static int EV_ELMS = 2;   // start element
  70     protected final static int EV_ELME = 3;   // end element
  71     protected final static int EV_TEXT = 4;   // textual content
  72     protected final static int EV_WSPC = 5;   // white space content
  73     protected final static int EV_PI = 6;   // processing instruction
  74     protected final static int EV_CDAT = 7;   // character data
  75     protected final static int EV_COMM = 8;   // comment
  76     protected final static int EV_DTD = 9;   // document type definition
  77     protected final static int EV_ENT = 10;  // skipped entity
  78     private char mESt; // built-in entity recognizer state
  79     // mESt values:
  80     //   0x100   : the initial state
  81     //   > 0x100 : unrecognized name
  82     //   < 0x100 : replacement character
  83     protected char[] mBuff;       // parser buffer
  84     protected int mBuffIdx;    // index of the last char
  85     protected Pair mPref;       // stack of prefixes
  86     protected Pair mElm;        // stack of elements
  87     // mAttL.chars - element qname
  88     // mAttL.next  - next element 
  89     // mAttL.list  - list of attributes defined on this element
  90     // mAttL.list.chars - attribute qname
  91     // mAttL.list.id    - a char representing attribute's type see below
  92     // mAttL.list.next  - next attribute defined on the element
  93     // mAttL.list.list  - devault value structure or null
  94     // mAttL.list.list.chars - "name='value' " chars array for Input 
  95     // 
  96     // Attribute type character values:
  97     // 'i' - "ID"
  98     // 'r' - "IDREF"
  99     // 'R' - "IDREFS"
 100     // 'n' - "ENTITY"
 101     // 'N' - "ENTITIES"
 102     // 't' - "NMTOKEN"
 103     // 'T' - "NMTOKENS"
 104     // 'u' - enumeration type
 105     // 'o' - "NOTATION"
 106     // 'c' - "CDATA"
 107     // see also: bkeyword() and atype()
 108     //
 109     protected Pair mAttL;       // list of defined attrs by element name
 110     protected Input mDoc;        // document entity
 111     protected Input mInp;        // stack of entities
 112     private char[] mChars;      // reading buffer
 113     private int mChLen;      // current capacity
 114     private int mChIdx;      // index to the next char
 115     protected Attrs mAttrs;      // attributes of the curr. element
 116     private String[] mItems;      // attributes array of the curr. element
 117     private char mAttrIdx;    // attributes counter/index
 118     private String mUnent;  // unresolved entity name
 119     private Pair mDltd;   // deleted objects for reuse
 120     /**
 121      * Default prefixes
 122      */
 123     private final static char NONS[];
 124     private final static char XML[];
 125     private final static char XMLNS[];
 126 
 127     static {
 128         NONS = new char[1];
 129         NONS[0] = (char) 0;
 130 
 131         XML = new char[4];
 132         XML[0] = (char) 4;
 133         XML[1] = 'x';
 134         XML[2] = 'm';
 135         XML[3] = 'l';
 136 
 137         XMLNS = new char[6];
 138         XMLNS[0] = (char) 6;
 139         XMLNS[1] = 'x';
 140         XMLNS[2] = 'm';
 141         XMLNS[3] = 'l';
 142         XMLNS[4] = 'n';
 143         XMLNS[5] = 's';
 144     }
 145     /**
 146      * ASCII character type array.
 147      *
 148      * This array maps an ASCII (7 bit) character to the character type.<br />
 149      * Possible character type values are:<br /> - ' ' for any kind of white
 150      * space character;<br /> - 'a' for any lower case alphabetical character
 151      * value;<br /> - 'A' for any upper case alphabetical character value;<br />
 152      * - 'd' for any decimal digit character value;<br /> - 'z' for any
 153      * character less then ' ' except '\t', '\n', '\r';<br /> An ASCII (7 bit)
 154      * character which does not fall in any category listed above is mapped to
 155      * it self.
 156      */
 157     private static final byte asctyp[];
 158     /**
 159      * NMTOKEN character type array.
 160      *
 161      * This array maps an ASCII (7 bit) character to the character type.<br />
 162      * Possible character type values are:<br /> - 0 for underscore ('_') or any
 163      * lower and upper case alphabetical character value;<br /> - 1 for colon
 164      * (':') character;<br /> - 2 for dash ('-') and dot ('.') or any decimal
 165      * digit character value;<br /> - 3 for any kind of white space character<br
 166      * /> An ASCII (7 bit) character which does not fall in any category listed
 167      * above is mapped to 0xff.
 168      */
 169     private static final byte nmttyp[];
 170 
 171     /**
 172      * Static constructor.
 173      *
 174      * Sets up the ASCII character type array which is used by
 175      * {@link #asctyp asctyp} method and NMTOKEN character type array.
 176      */
 177     static {
 178         short i = 0;
 179 
 180         asctyp = new byte[0x80];
 181         while (i < ' ') {
 182             asctyp[i++] = (byte) 'z';
 183         }
 184         asctyp['\t'] = (byte) ' ';
 185         asctyp['\r'] = (byte) ' ';
 186         asctyp['\n'] = (byte) ' ';
 187         while (i < '0') {
 188             asctyp[i] = (byte) i++;
 189         }
 190         while (i <= '9') {
 191             asctyp[i++] = (byte) 'd';
 192         }
 193         while (i < 'A') {
 194             asctyp[i] = (byte) i++;
 195         }
 196         while (i <= 'Z') {
 197             asctyp[i++] = (byte) 'A';
 198         }
 199         while (i < 'a') {
 200             asctyp[i] = (byte) i++;
 201         }
 202         while (i <= 'z') {
 203             asctyp[i++] = (byte) 'a';
 204         }
 205         while (i < 0x80) {
 206             asctyp[i] = (byte) i++;
 207         }
 208 
 209         nmttyp = new byte[0x80];
 210         for (i = 0; i < '0'; i++) {
 211             nmttyp[i] = (byte) 0xff;
 212         }
 213         while (i <= '9') {
 214             nmttyp[i++] = (byte) 2;  // digits
 215         }
 216         while (i < 'A') {
 217             nmttyp[i++] = (byte) 0xff;
 218         }
 219         // skiped upper case alphabetical character are already 0
 220         for (i = '['; i < 'a'; i++) {
 221             nmttyp[i] = (byte) 0xff;
 222         }
 223         // skiped lower case alphabetical character are already 0
 224         for (i = '{'; i < 0x80; i++) {
 225             nmttyp[i] = (byte) 0xff;
 226         }
 227         nmttyp['_'] = 0;
 228         nmttyp[':'] = 1;
 229         nmttyp['.'] = 2;
 230         nmttyp['-'] = 2;
 231         nmttyp[' '] = 3;
 232         nmttyp['\t'] = 3;
 233         nmttyp['\r'] = 3;
 234         nmttyp['\n'] = 3;
 235     }
 236 
 237     /**
 238      * Constructor.
 239      */
 240     protected Parser() {
 241         mPh = PH_BEFORE_DOC;  // before parsing
 242 
 243         //              Initialize the parser
 244         mBuff = new char[BUFFSIZE_PARSER];
 245         mAttrs = new Attrs();
 246 
 247         //              Default namespace
 248         mPref = pair(mPref);
 249         mPref.name = "";
 250         mPref.value = "";
 251         mPref.chars = NONS;
 252         mNoNS = mPref;  // no namespace
 253         //              XML namespace
 254         mPref = pair(mPref);
 255         mPref.name = "xml";
 256         mPref.value = "http://www.w3.org/XML/1998/namespace";
 257         mPref.chars = XML;
 258         mXml = mPref;  // XML namespace
 259     }
 260 
 261     /**
 262      * Initializes parser's internals. Note, current input has to be set before
 263      * this method is called.
 264      */
 265     protected void init() {
 266         mUnent = null;
 267         mElm = null;
 268         mPref = mXml;
 269         mAttL = null;
 270         mPEnt = new HashMap<String, Input>();
 271         mEnt = new HashMap<String, Input>();
 272         mDoc = mInp;          // current input is document entity
 273         mChars = mInp.chars;    // use document entity buffer
 274         mPh = PH_DOC_START;  // the begining of the document
 275     }
 276 
 277     /**
 278      * Cleans up parser internal resources.
 279      */
 280     protected void cleanup() {
 281         //              Default attributes
 282         while (mAttL != null) {
 283             while (mAttL.list != null) {
 284                 if (mAttL.list.list != null) {
 285                     del(mAttL.list.list);
 286                 }
 287                 mAttL.list = del(mAttL.list);
 288             }
 289             mAttL = del(mAttL);
 290         }
 291         //              Element stack
 292         while (mElm != null) {
 293             mElm = del(mElm);
 294         }
 295         //              Namespace prefixes
 296         while (mPref != mXml) {
 297             mPref = del(mPref);
 298         }
 299         //              Inputs
 300         while (mInp != null) {
 301             pop();
 302         }
 303         //              Document reader
 304         if ((mDoc != null) && (mDoc.src != null)) {
 305             try {
 306                 mDoc.src.close();
 307             } catch (IOException ioe) {
 308             }
 309         }
 310         mPEnt = null;
 311         mEnt = null;
 312         mDoc = null;
 313         mPh = PH_AFTER_DOC;  // before documnet processing
 314     }
 315 
 316     /**
 317      * Processes a portion of document. This method returns one of EV_*
 318      * constants as an identifier of the portion of document have been read.
 319      *
 320      * @return Identifier of processed document portion.
 321      * @exception Exception is parser specific exception form panic method.
 322      * @exception IOException
 323      */
 324     protected int step()
 325             throws Exception {
 326         mEvt = EV_NULL;
 327         int st = 0;
 328         while (mEvt == EV_NULL) {
 329             char ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
 330             switch (st) {
 331                 case 0:     // all sorts of markup (dispetcher)
 332                     if (ch != '<') {
 333                         bkch();
 334                         mBuffIdx = -1;  // clean parser buffer
 335                         st = 1;
 336                         break;
 337                     }
 338                     switch (getch()) {
 339                         case '/':  // the end of the element content
 340                             mEvt = EV_ELME;
 341                             if (mElm == null) {
 342                                 panic(FAULT);
 343                             }
 344                             //          Check element's open/close tags balance
 345                             mBuffIdx = -1;  // clean parser buffer
 346                             bname(mIsNSAware);
 347                             char[] chars = mElm.chars;
 348                             if (chars.length == (mBuffIdx + 1)) {
 349                                 for (char i = 1; i <= mBuffIdx; i += 1) {
 350                                     if (chars[i] != mBuff[i]) {
 351                                         panic(FAULT);
 352                                     }
 353                                 }
 354                             } else {
 355                                 panic(FAULT);
 356                             }
 357                             //          Skip white spaces before '>'
 358                             if (wsskip() != '>') {
 359                                 panic(FAULT);
 360                             }
 361                             getch();  // read '>'
 362                             break;
 363 
 364                         case '!':  // a comment or a CDATA
 365                             ch = getch();
 366                             bkch();
 367                             switch (ch) {
 368                                 case '-':  // must be a comment
 369                                     mEvt = EV_COMM;
 370                                     comm();
 371                                     break;
 372 
 373                                 case '[':  // must be a CDATA section
 374                                     mEvt = EV_CDAT;
 375                                     cdat();
 376                                     break;
 377 
 378                                 default:   // must be 'DOCTYPE'
 379                                     mEvt = EV_DTD;
 380                                     dtd();
 381                                     break;
 382                             }
 383                             break;
 384 
 385                         case '?':  // processing instruction
 386                             mEvt = EV_PI;
 387                             pi();
 388                             break;
 389 
 390                         default:  // must be the first char of an xml name 
 391                             bkch();
 392                             //          Read an element name and put it on top of the 
 393                             //          element stack
 394                             mElm = pair(mElm);  // add new element to the stack
 395                             mElm.chars = qname(mIsNSAware);
 396                             mElm.name = mElm.local();
 397                             mElm.id = (mElm.next != null) ? mElm.next.id : 0;  // flags
 398                             mElm.num = 0;     // namespace counter
 399                             //          Find the list of defined attributs of the current 
 400                             //          element 
 401                             Pair elm = find(mAttL, mElm.chars);
 402                             mElm.list = (elm != null) ? elm.list : null;
 403                             //          Read attributes till the end of the element tag
 404                             mAttrIdx = 0;
 405                             Pair att = pair(null);
 406                             att.num = 0;  // clear attribute's flags
 407                             attr(att);     // get all attributes inc. defaults
 408                             del(att);
 409                             mElm.value = (mIsNSAware) ? rslv(mElm.chars) : null;
 410                             //          Skip white spaces before '>'
 411                             switch (wsskip()) {
 412                                 case '>':
 413                                     getch();  // read '>'
 414                                     mEvt = EV_ELMS;
 415                                     break;
 416 
 417                                 case '/':
 418                                     getch();  // read '/'
 419                                     if (getch() != '>') // read '>'
 420                                     {
 421                                         panic(FAULT);
 422                                     }
 423                                     mEvt = EV_ELM;
 424                                     break;
 425 
 426                                 default:
 427                                     panic(FAULT);
 428                             }
 429                             break;
 430                     }
 431                     break;
 432 
 433                 case 1:     // read white space
 434                     switch (ch) {
 435                         case ' ':
 436                         case '\t':
 437                         case '\n':
 438                             bappend(ch);
 439                             break;
 440 
 441                         case '\r':              // EOL processing [#2.11]
 442                             if (getch() != '\n') {
 443                                 bkch();
 444                             }
 445                             bappend('\n');
 446                             break;
 447 
 448                         case '<':
 449                             mEvt = EV_WSPC;
 450                             bkch();
 451                             bflash_ws();
 452                             break;
 453 
 454                         default:
 455                             bkch();
 456                             st = 2;
 457                             break;
 458                     }
 459                     break;
 460 
 461                 case 2:     // read the text content of the element
 462                     switch (ch) {
 463                         case '&':
 464                             if (mUnent == null) {
 465                                 //              There was no unresolved entity on previous step.
 466                                 if ((mUnent = ent('x')) != null) {
 467                                     mEvt = EV_TEXT;
 468                                     bkch();      // move back to ';' after entity name
 469                                     setch('&');  // parser must be back on next step
 470                                     bflash();
 471                                 }
 472                             } else {
 473                                 //              There was unresolved entity on previous step.
 474                                 mEvt = EV_ENT;
 475                                 skippedEnt(mUnent);
 476                                 mUnent = null;
 477                             }
 478                             break;
 479 
 480                         case '<':
 481                             mEvt = EV_TEXT;
 482                             bkch();
 483                             bflash();
 484                             break;
 485 
 486                         case '\r':  // EOL processing [#2.11]
 487                             if (getch() != '\n') {
 488                                 bkch();
 489                             }
 490                             bappend('\n');
 491                             break;
 492 
 493                         case EOS:
 494                             panic(FAULT);
 495 
 496                         default:
 497                             bappend(ch);
 498                             break;
 499                     }
 500                     break;
 501 
 502                 default:
 503                     panic(FAULT);
 504             }
 505         }
 506 
 507         return mEvt;
 508     }
 509 
 510     /**
 511      * Parses the document type declaration.
 512      *
 513      * @exception Exception is parser specific exception form panic method.
 514      * @exception IOException
 515      */
 516     private void dtd()
 517             throws Exception {
 518         char ch;
 519         String str = null;
 520         String name = null;
 521         Pair psid = null;
 522         // read 'DOCTYPE'
 523         if ("DOCTYPE".equals(name(false)) != true) {
 524             panic(FAULT);
 525         }
 526         mPh = PH_DTD;  // DTD
 527         for (short st = 0; st >= 0;) {
 528             ch = getch();
 529             switch (st) {
 530                 case 0:     // read the document type name
 531                     if (chtyp(ch) != ' ') {
 532                         bkch();
 533                         name = name(mIsNSAware);
 534                         wsskip();
 535                         st = 1;  // read 'PUPLIC' or 'SYSTEM'
 536                     }
 537                     break;
 538 
 539                 case 1:     // read 'PUPLIC' or 'SYSTEM'
 540                     switch (chtyp(ch)) {
 541                         case 'A':
 542                             bkch();
 543                             psid = pubsys(' ');
 544                             st = 2;  // skip spaces before internal subset
 545                             docType(name, psid.name, psid.value);
 546                             break;
 547 
 548                         case '[':
 549                             bkch();
 550                             st = 2;    // skip spaces before internal subset
 551                             docType(name, null, null);
 552                             break;
 553 
 554                         case '>':
 555                             bkch();
 556                             st = 3;    // skip spaces after internal subset
 557                             docType(name, null, null);
 558                             break;
 559 
 560                         default:
 561                             panic(FAULT);
 562                     }
 563                     break;
 564 
 565                 case 2:     // skip spaces before internal subset
 566                     switch (chtyp(ch)) {
 567                         case '[':
 568                             //          Process internal subset
 569                             dtdsub();
 570                             st = 3;  // skip spaces after internal subset
 571                             break;
 572 
 573                         case '>':
 574                             //          There is no internal subset
 575                             bkch();
 576                             st = 3;  // skip spaces after internal subset
 577                             break;
 578 
 579                         case ' ':
 580                             // skip white spaces
 581                             break;
 582 
 583                         default:
 584                             panic(FAULT);
 585                     }
 586                     break;
 587 
 588                 case 3:     // skip spaces after internal subset
 589                     switch (chtyp(ch)) {
 590                         case '>':
 591                             if (psid != null) {
 592                                 //              Report the DTD external subset
 593                                 InputSource is = resolveEnt(name, psid.name, psid.value);
 594                                 if (is != null) {
 595                                     if (mIsSAlone == false) {
 596                                         //              Set the end of DTD external subset char
 597                                         bkch();
 598                                         setch(']');
 599                                         //              Set the DTD external subset InputSource
 600                                         push(new Input(BUFFSIZE_READER));
 601                                         setinp(is);
 602                                         mInp.pubid = psid.name;
 603                                         mInp.sysid = psid.value;
 604                                         //              Parse the DTD external subset
 605                                         dtdsub();
 606                                     } else {
 607                                         //              Unresolved DTD external subset
 608                                         skippedEnt("[dtd]");
 609                                         //              Release reader and stream
 610                                         if (is.getCharacterStream() != null) {
 611                                             try {
 612                                                 is.getCharacterStream().close();
 613                                             } catch (IOException ioe) {
 614                                             }
 615                                         }
 616                                         if (is.getByteStream() != null) {
 617                                             try {
 618                                                 is.getByteStream().close();
 619                                             } catch (IOException ioe) {
 620                                             }
 621                                         }
 622                                     }
 623                                 } else {
 624                                     //          Unresolved DTD external subset
 625                                     skippedEnt("[dtd]");
 626                                 }
 627                                 del(psid);
 628                             }
 629                             st = -1;  // end of DTD
 630                             break;
 631 
 632                         case ' ':
 633                             // skip white spaces
 634                             break;
 635 
 636                         default:
 637                             panic(FAULT);
 638                     }
 639                     break;
 640 
 641                 default:
 642                     panic(FAULT);
 643             }
 644         }
 645     }
 646 
 647     /**
 648      * Parses the document type declaration subset.
 649      *
 650      * @exception Exception is parser specific exception form panic method.
 651      * @exception IOException
 652      */
 653     private void dtdsub()
 654             throws Exception {
 655         char ch;
 656         for (short st = 0; st >= 0;) {
 657             ch = getch();
 658             switch (st) {
 659                 case 0:     // skip white spaces before a declaration
 660                     switch (chtyp(ch)) {
 661                         case '<':
 662                             ch = getch();
 663                             switch (ch) {
 664                                 case '?':
 665                                     pi();
 666                                     break;
 667 
 668                                 case '!':
 669                                     ch = getch();
 670                                     bkch();
 671                                     if (ch == '-') {
 672                                         comm();
 673                                         break;
 674                                     }
 675                                     //          A markup or an entity declaration
 676                                     bntok();
 677                                     switch (bkeyword()) {
 678                                         case 'n':
 679                                             dtdent();
 680                                             break;
 681 
 682                                         case 'a':
 683                                             dtdattl();    // parse attributes declaration
 684                                             break;
 685 
 686                                         case 'e':
 687                                             dtdelm();     // parse element declaration
 688                                             break;
 689 
 690                                         case 'o':
 691                                             dtdnot();     // parse notation declaration
 692                                             break;
 693 
 694                                         default:
 695                                             panic(FAULT); // unsupported markup declaration
 696                                             break;
 697                                     }
 698                                     st = 1;  // read the end of declaration
 699                                     break;
 700 
 701                                 default:
 702                                     panic(FAULT);
 703                                     break;
 704                             }
 705                             break;
 706 
 707                         case '%':
 708                             //          A parameter entity reference
 709                             pent(' ');
 710                             break;
 711 
 712                         case ']':
 713                             //          End of DTD subset
 714                             st = -1;
 715                             break;
 716 
 717                         case ' ':
 718                             //          Skip white spaces
 719                             break;
 720 
 721                         case 'Z':
 722                             //          End of stream
 723                             if (getch() != ']') {
 724                                 panic(FAULT);
 725                             }
 726                             st = -1;
 727                             break;
 728 
 729                         default:
 730                             panic(FAULT);
 731                     }
 732                     break;
 733 
 734                 case 1:     // read the end of declaration
 735                     switch (ch) {
 736                         case '>':   // there is no notation 
 737                             st = 0; // skip white spaces before a declaration
 738                             break;
 739 
 740                         case ' ':
 741                         case '\n':
 742                         case '\r':
 743                         case '\t':
 744                             //          Skip white spaces
 745                             break;
 746 
 747                         default:
 748                             panic(FAULT);
 749                             break;
 750                     }
 751                     break;
 752 
 753                 default:
 754                     panic(FAULT);
 755             }
 756         }
 757     }
 758 
 759     /**
 760      * Parses an entity declaration. This method fills the general (
 761      * <code>mEnt</code>) and parameter 
 762          * (
 763      * <code>mPEnt</code>) entity look up table.
 764      *
 765      * @exception Exception is parser specific exception form panic method.
 766      * @exception IOException
 767      */
 768     private void dtdent()
 769             throws Exception {
 770         String str = null;
 771         char[] val = null;
 772         Input inp = null;
 773         Pair ids = null;
 774         char ch;
 775         for (short st = 0; st >= 0;) {
 776             ch = getch();
 777             switch (st) {
 778                 case 0:     // skip white spaces before entity name
 779                     switch (chtyp(ch)) {
 780                         case ' ':
 781                             //          Skip white spaces
 782                             break;
 783 
 784                         case '%':
 785                             //          Parameter entity or parameter entity declaration.
 786                             ch = getch();
 787                             bkch();
 788                             if (chtyp(ch) == ' ') {
 789                                 //              Parameter entity declaration.
 790                                 wsskip();
 791                                 str = name(false);
 792                                 switch (chtyp(wsskip())) {
 793                                     case 'A':
 794                                         //              Read the external identifier
 795                                         ids = pubsys(' ');
 796                                         if (wsskip() == '>') {
 797                                             //          External parsed entity
 798                                             if (mPEnt.containsKey(str) == false) {      // [#4.2]
 799                                                 inp = new Input();
 800                                                 inp.pubid = ids.name;
 801                                                 inp.sysid = ids.value;
 802                                                 mPEnt.put(str, inp);
 803                                             }
 804                                         } else {
 805                                             panic(FAULT);
 806                                         }
 807                                         del(ids);
 808                                         st = -1;  // the end of declaration
 809                                         break;
 810 
 811                                     case '\"':
 812                                     case '\'':
 813                                         //              Read the parameter entity value
 814                                         bqstr('d');
 815                                         //              Create the parameter entity value
 816                                         val = new char[mBuffIdx + 1];
 817                                         System.arraycopy(mBuff, 1, val, 1, val.length - 1);
 818                                         //              Add surrounding spaces [#4.4.8]
 819                                         val[0] = ' ';
 820                                         //              Add the entity to the entity look up table
 821                                         if (mPEnt.containsKey(str) == false) {  // [#4.2]
 822                                             inp = new Input(val);
 823                                             inp.pubid = mInp.pubid;
 824                                             inp.sysid = mInp.sysid;
 825                                             inp.xmlenc = mInp.xmlenc;
 826                                             inp.xmlver = mInp.xmlver;
 827                                             mPEnt.put(str, inp);
 828                                         }
 829                                         st = -1;  // the end of declaration
 830                                         break;
 831 
 832                                     default:
 833                                         panic(FAULT);
 834                                         break;
 835                                 }
 836                             } else {
 837                                 //              Parameter entity reference.
 838                                 pent(' ');
 839                             }
 840                             break;
 841 
 842                         default:
 843                             bkch();
 844                             str = name(false);
 845                             st = 1;  // read entity declaration value
 846                             break;
 847                     }
 848                     break;
 849 
 850                 case 1:     // read entity declaration value
 851                     switch (chtyp(ch)) {
 852                         case '\"':  // internal entity
 853                         case '\'':
 854                             bkch();
 855                             bqstr('d');  // read a string into the buffer
 856                             if (mEnt.get(str) == null) {
 857                                 //              Create general entity value
 858                                 val = new char[mBuffIdx];
 859                                 System.arraycopy(mBuff, 1, val, 0, val.length);
 860                                 //              Add the entity to the entity look up table
 861                                 if (mEnt.containsKey(str) == false) {   // [#4.2]
 862                                     inp = new Input(val);
 863                                     inp.pubid = mInp.pubid;
 864                                     inp.sysid = mInp.sysid;
 865                                     inp.xmlenc = mInp.xmlenc;
 866                                     inp.xmlver = mInp.xmlver;
 867                                     mEnt.put(str, inp);
 868                                 }
 869                             }
 870                             st = -1;  // the end of declaration
 871                             break;
 872 
 873                         case 'A':  // external entity
 874                             bkch();
 875                             ids = pubsys(' ');
 876                             switch (wsskip()) {
 877                                 case '>':  // external parsed entity
 878                                     if (mEnt.containsKey(str) == false) {  // [#4.2]
 879                                         inp = new Input();
 880                                         inp.pubid = ids.name;
 881                                         inp.sysid = ids.value;
 882                                         mEnt.put(str, inp);
 883                                     }
 884                                     break;
 885 
 886                                 case 'N':  // external general unparsed entity
 887                                     if ("NDATA".equals(name(false)) == true) {
 888                                         wsskip();
 889                                         unparsedEntDecl(str, ids.name, ids.value, name(false));
 890                                         break;
 891                                     }
 892                                 default:
 893                                     panic(FAULT);
 894                                     break;
 895                             }
 896                             del(ids);
 897                             st = -1;  // the end of declaration
 898                             break;
 899 
 900                         case ' ':
 901                             //          Skip white spaces
 902                             break;
 903 
 904                         default:
 905                             panic(FAULT);
 906                             break;
 907                     }
 908                     break;
 909 
 910                 default:
 911                     panic(FAULT);
 912             }
 913         }
 914     }
 915 
 916     /**
 917      * Parses an element declaration.
 918      *
 919      * This method parses the declaration up to the closing angle bracket.
 920      *
 921      * @exception Exception is parser specific exception form panic method.
 922      * @exception IOException
 923      */
 924     private void dtdelm()
 925             throws Exception {
 926         //              This is stub implementation which skips an element 
 927         //              declaration.
 928         wsskip();
 929         name(mIsNSAware);
 930 
 931         char ch;
 932         while (true) {
 933             ch = getch();
 934             switch (ch) {
 935                 case '>':
 936                     bkch();
 937                     return;
 938 
 939                 case EOS:
 940                     panic(FAULT);
 941 
 942                 default:
 943                     break;
 944             }
 945         }
 946     }
 947 
 948     /**
 949      * Parses an attribute list declaration.
 950      *
 951      * This method parses the declaration up to the closing angle bracket.
 952      *
 953      * @exception Exception is parser specific exception form panic method.
 954      * @exception IOException
 955      */
 956     private void dtdattl()
 957             throws Exception {
 958         char elmqn[] = null;
 959         Pair elm = null;
 960         char ch;
 961         for (short st = 0; st >= 0;) {
 962             ch = getch();
 963             switch (st) {
 964                 case 0:     // read the element name
 965                     switch (chtyp(ch)) {
 966                         case 'a':
 967                         case 'A':
 968                         case '_':
 969                         case 'X':
 970                         case ':':
 971                             bkch();
 972                             //          Get the element from the list or add a new one.
 973                             elmqn = qname(mIsNSAware);
 974                             elm = find(mAttL, elmqn);
 975                             if (elm == null) {
 976                                 elm = pair(mAttL);
 977                                 elm.chars = elmqn;
 978                                 mAttL = elm;
 979                             }
 980                             st = 1;  // read an attribute declaration
 981                             break;
 982 
 983                         case ' ':
 984                             break;
 985 
 986                         case '%':
 987                             pent(' ');
 988                             break;
 989 
 990                         default:
 991                             panic(FAULT);
 992                             break;
 993                     }
 994                     break;
 995 
 996                 case 1:     // read an attribute declaration
 997                     switch (chtyp(ch)) {
 998                         case 'a':
 999                         case 'A':
1000                         case '_':
1001                         case 'X':
1002                         case ':':
1003                             bkch();
1004                             dtdatt(elm);
1005                             if (wsskip() == '>') {
1006                                 return;
1007                             }
1008                             break;
1009 
1010                         case ' ':
1011                             break;
1012 
1013                         case '%':
1014                             pent(' ');
1015                             break;
1016 
1017                         default:
1018                             panic(FAULT);
1019                             break;
1020                     }
1021                     break;
1022 
1023                 default:
1024                     panic(FAULT);
1025                     break;
1026             }
1027         }
1028     }
1029 
1030     /**
1031      * Parses an attribute declaration.
1032      *
1033      * The attribute uses the following fields of Pair object: chars - characters
1034      * of qualified name id - the type identifier of the attribute list - a pair
1035      * which holds the default value (chars field)
1036      *
1037      * @param elm An object which represents all defined attributes on an
1038      * element.
1039      * @exception Exception is parser specific exception form panic method.
1040      * @exception IOException
1041      */
1042     private void dtdatt(Pair elm)
1043             throws Exception {
1044         char attqn[] = null;
1045         Pair att = null;
1046         char ch;
1047         for (short st = 0; st >= 0;) {
1048             ch = getch();
1049             switch (st) {
1050                 case 0:     // the attribute name
1051                     switch (chtyp(ch)) {
1052                         case 'a':
1053                         case 'A':
1054                         case '_':
1055                         case 'X':
1056                         case ':':
1057                             bkch();
1058                             //          Get the attribut from the list or add a new one.
1059                             attqn = qname(mIsNSAware);
1060                             att = find(elm.list, attqn);
1061                             if (att == null) {
1062                                 //              New attribute declaration
1063                                 att = pair(elm.list);
1064                                 att.chars = attqn;
1065                                 elm.list = att;
1066                             } else {
1067                                 //              Do not override the attribute declaration [#3.3]
1068                                 att = pair(null);
1069                                 att.chars = attqn;
1070                                 att.id = 'c';
1071                             }
1072                             wsskip();
1073                             st = 1;
1074                             break;
1075 
1076                         case '%':
1077                             pent(' ');
1078                             break;
1079 
1080                         case ' ':
1081                             break;
1082 
1083                         default:
1084                             panic(FAULT);
1085                             break;
1086                     }
1087                     break;
1088 
1089                 case 1:     // the attribute type
1090                     switch (chtyp(ch)) {
1091                         case '(':
1092                             att.id = 'u';  // enumeration type
1093                             st = 2;        // read the first element of the list
1094                             break;
1095 
1096                         case '%':
1097                             pent(' ');
1098                             break;
1099 
1100                         case ' ':
1101                             break;
1102 
1103                         default:
1104                             bkch();
1105                             bntok();  // read type id
1106                             att.id = bkeyword();
1107                             switch (att.id) {
1108                                 case 'o':   // NOTATION
1109                                     if (wsskip() != '(') {
1110                                         panic(FAULT);
1111                                     }
1112                                     ch = getch();
1113                                     st = 2;  // read the first element of the list
1114                                     break;
1115 
1116                                 case 'i':     // ID
1117                                 case 'r':     // IDREF
1118                                 case 'R':     // IDREFS
1119                                 case 'n':     // ENTITY
1120                                 case 'N':     // ENTITIES
1121                                 case 't':     // NMTOKEN
1122                                 case 'T':     // NMTOKENS
1123                                 case 'c':     // CDATA
1124                                     wsskip();
1125                                     st = 4;  // read default declaration
1126                                     break;
1127 
1128                                 default:
1129                                     panic(FAULT);
1130                                     break;
1131                             }
1132                             break;
1133                     }
1134                     break;
1135 
1136                 case 2:     // read the first element of the list
1137                     switch (chtyp(ch)) {
1138                         case 'a':
1139                         case 'A':
1140                         case 'd':
1141                         case '.':
1142                         case ':':
1143                         case '-':
1144                         case '_':
1145                         case 'X':
1146                             bkch();
1147                             switch (att.id) {
1148                                 case 'u':  // enumeration type
1149                                     bntok();
1150                                     break;
1151 
1152                                 case 'o':  // NOTATION
1153                                     mBuffIdx = -1;
1154                                     bname(false);
1155                                     break;
1156 
1157                                 default:
1158                                     panic(FAULT);
1159                                     break;
1160                             }
1161                             wsskip();
1162                             st = 3;  // read next element of the list
1163                             break;
1164 
1165                         case '%':
1166                             pent(' ');
1167                             break;
1168 
1169                         case ' ':
1170                             break;
1171 
1172                         default:
1173                             panic(FAULT);
1174                             break;
1175                     }
1176                     break;
1177 
1178                 case 3:     // read next element of the list
1179                     switch (ch) {
1180                         case ')':
1181                             wsskip();
1182                             st = 4;  // read default declaration
1183                             break;
1184 
1185                         case '|':
1186                             wsskip();
1187                             switch (att.id) {
1188                                 case 'u':  // enumeration type
1189                                     bntok();
1190                                     break;
1191 
1192                                 case 'o':  // NOTATION
1193                                     mBuffIdx = -1;
1194                                     bname(false);
1195                                     break;
1196 
1197                                 default:
1198                                     panic(FAULT);
1199                                     break;
1200                             }
1201                             wsskip();
1202                             break;
1203 
1204                         case '%':
1205                             pent(' ');
1206                             break;
1207 
1208                         default:
1209                             panic(FAULT);
1210                             break;
1211                     }
1212                     break;
1213 
1214                 case 4:     // read default declaration
1215                     switch (ch) {
1216                         case '#':
1217                             bntok();
1218                             switch (bkeyword()) {
1219                                 case 'F':  // FIXED
1220                                     switch (wsskip()) {
1221                                         case '\"':
1222                                         case '\'':
1223                                             st = 5;  // read the default value
1224                                             break;
1225 
1226                                         case EOS:
1227                                             panic(FAULT);
1228 
1229                                         default:
1230                                             st = -1;
1231                                             break;
1232                                     }
1233                                     break;
1234 
1235                                 case 'Q':  // REQUIRED
1236                                 case 'I':  // IMPLIED
1237                                     st = -1;
1238                                     break;
1239 
1240                                 default:
1241                                     panic(FAULT);
1242                                     break;
1243                             }
1244                             break;
1245 
1246                         case '\"':
1247                         case '\'':
1248                             bkch();
1249                             st = 5;  // read the default value
1250                             break;
1251 
1252                         case ' ':
1253                         case '\n':
1254                         case '\r':
1255                         case '\t':
1256                             break;
1257 
1258                         case '%':
1259                             pent(' ');
1260                             break;
1261 
1262                         default:
1263                             bkch();
1264                             st = -1;
1265                             break;
1266                     }
1267                     break;
1268 
1269                 case 5:     // read the default value
1270                     switch (ch) {
1271                         case '\"':
1272                         case '\'':
1273                             bkch();
1274                             bqstr('d');  // the value in the mBuff now
1275                             att.list = pair(null);
1276                             //          Create a string like "attqname='value' "
1277                             att.list.chars = new char[att.chars.length + mBuffIdx + 3];
1278                             System.arraycopy(
1279                                     att.chars, 1, att.list.chars, 0, att.chars.length - 1);
1280                             att.list.chars[att.chars.length - 1] = '=';
1281                             att.list.chars[att.chars.length] = ch;
1282                             System.arraycopy(
1283                                     mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx);
1284                             att.list.chars[att.chars.length + mBuffIdx + 1] = ch;
1285                             att.list.chars[att.chars.length + mBuffIdx + 2] = ' ';
1286                             st = -1;
1287                             break;
1288 
1289                         default:
1290                             panic(FAULT);
1291                             break;
1292                     }
1293                     break;
1294 
1295                 default:
1296                     panic(FAULT);
1297                     break;
1298             }
1299         }
1300     }
1301 
1302     /**
1303      * Parses a notation declaration.
1304      *
1305      * This method parses the declaration up to the closing angle bracket.
1306      *
1307      * @exception Exception is parser specific exception form panic method.
1308      * @exception IOException
1309      */
1310     private void dtdnot()
1311             throws Exception {
1312         wsskip();
1313         String name = name(false);
1314         wsskip();
1315         Pair ids = pubsys('N');
1316         notDecl(name, ids.name, ids.value);
1317         del(ids);
1318     }
1319 
1320     /**
1321      * Parses an attribute.
1322      *
1323      * This recursive method is responsible for prefix addition 
1324          * (
1325      * <code>mPref</code>) on the way down. The element's start tag end triggers
1326      * the return process. The method then on it's way back resolves prefixes
1327      * and accumulates attributes.
1328      *
1329      * <p><code>att.num</code> carries attribute flags where: 0x1 - attribute is
1330      * declared in DTD (attribute decalration had been read); 0x2 - attribute's
1331      * default value is used.</p>
1332      *
1333      * @param att An object which reprecents current attribute.
1334      * @exception Exception is parser specific exception form panic method.
1335      * @exception IOException
1336      */
1337     private void attr(Pair att)
1338             throws Exception {
1339         switch (wsskip()) {
1340             case '/':
1341             case '>':
1342                 if ((att.num & 0x2) == 0) {  // all attributes have been read
1343                     att.num |= 0x2;  // set default attribute flag
1344                     Input inp = mInp;
1345                     //          Go through all attributes defined on current element.
1346                     for (Pair def = mElm.list; def != null; def = def.next) {
1347                         if (def.list == null) // no default value
1348                         {
1349                             continue;
1350                         }
1351                         //              Go through all attributes defined on current 
1352                         //              element and add defaults.
1353                         Pair act = find(att.next, def.chars);
1354                         if (act == null) {
1355                             push(new Input(def.list.chars));
1356                         }
1357                     }
1358                     if (mInp != inp) {  // defaults have been added
1359                         attr(att);
1360                         return;
1361                     }
1362                 }
1363                 //              Ensure the attribute string array capacity
1364                 mAttrs.setLength(mAttrIdx);
1365                 mItems = mAttrs.mItems;
1366                 return;
1367 
1368             case EOS:
1369                 panic(FAULT);
1370 
1371             default:
1372                 //              Read the attribute name and value
1373                 att.chars = qname(mIsNSAware);
1374                 att.name = att.local();
1375                 String type = atype(att);  // sets attribute's type on att.id
1376                 wsskip();
1377                 if (getch() != '=') {
1378                     panic(FAULT);
1379                 }
1380                 bqstr((char) att.id);   // read the value with normalization.
1381                 String val = new String(mBuff, 1, mBuffIdx);
1382                 Pair next = pair(att);
1383                 next.num = (att.num & ~0x1);  // inherit attribute flags
1384                 //              Put a namespace declaration on top of the prefix stack
1385                 if ((mIsNSAware == false) || (isdecl(att, val) == false)) {
1386                     //          An ordinary attribute
1387                     mAttrIdx++;
1388                     attr(next);     // recursive call to parse the next attribute
1389                     mAttrIdx--;
1390                     //          Add the attribute to the attributes string array
1391                     char idx = (char) (mAttrIdx << 3);
1392                     mItems[idx + 1] = att.qname();  // attr qname
1393                     mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name
1394                     mItems[idx + 3] = val;          // attr value
1395                     mItems[idx + 4] = type;         // attr type
1396                     switch (att.num & 0x3) {
1397                         case 0x0:
1398                             mItems[idx + 5] = null;
1399                             break;
1400 
1401                         case 0x1:  // declared attribute
1402                             mItems[idx + 5] = "d";
1403                             break;
1404 
1405                         default:  // 0x2, 0x3 - default attribute always declared
1406                             mItems[idx + 5] = "D";
1407                             break;
1408                     }
1409                     //          Resolve the prefix if any and report the attribute
1410                     //          NOTE: The attribute does not accept the default namespace.
1411                     mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars) : "";
1412                 } else {
1413                     //          A namespace declaration. mPref.name contains prefix and 
1414                     //          mPref.value contains namespace URI set by isdecl method.
1415                     //          Report a start of the new mapping
1416                     newPrefix();
1417                     //          Recursive call to parse the next attribute
1418                     attr(next);
1419                     //          NOTE: The namespace declaration is not reported.
1420                 }
1421                 del(next);
1422                 break;
1423         }
1424     }
1425 
1426     /**
1427      * Retrieves attribute type.
1428      *
1429      * This method sets the type of normalization in the attribute
1430      * <code>id</code> field and returns the name of attribute type.
1431      *
1432      * @param att An object which represents current attribute.
1433      * @return The name of the attribute type.
1434      * @exception Exception is parser specific exception form panic method.
1435      */
1436     private String atype(Pair att)
1437             throws Exception {
1438         Pair attr;
1439 
1440         // CDATA-type normalization by default [#3.3.3]
1441         att.id = 'c';
1442         if (mElm.list == null || (attr = find(mElm.list, att.chars)) == null) {
1443             return "CDATA";
1444         }
1445 
1446         att.num |= 0x1;  // attribute is declared
1447 
1448         // Non-CDATA normalization except when the attribute type is CDATA.
1449         att.id = 'i';
1450         switch (attr.id) {
1451             case 'i':
1452                 return "ID";
1453 
1454             case 'r':
1455                 return "IDREF";
1456 
1457             case 'R':
1458                 return "IDREFS";
1459 
1460             case 'n':
1461                 return "ENTITY";
1462 
1463             case 'N':
1464                 return "ENTITIES";
1465 
1466             case 't':
1467                 return "NMTOKEN";
1468 
1469             case 'T':
1470                 return "NMTOKENS";
1471 
1472             case 'u':
1473                 return "NMTOKEN";
1474 
1475             case 'o':
1476                 return "NOTATION";
1477 
1478             case 'c':
1479                 att.id = 'c';
1480                 return "CDATA";
1481 
1482             default:
1483                 panic(FAULT);
1484         }
1485         return null;
1486     }
1487 
1488     /**
1489      * Parses a comment.
1490      *
1491      * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1492      * with first &apos;-&apos; after &apos;&lt;!&apos;.
1493      *
1494      * @exception Exception is parser specific exception form panic method.
1495      */
1496     private void comm()
1497             throws Exception {
1498         if (mPh == PH_DOC_START) {
1499             mPh = PH_MISC_DTD;  // misc before DTD
1500         }               // '<!' has been already read by dispetcher.
1501         char ch;
1502         mBuffIdx = -1;
1503         for (short st = 0; st >= 0;) {
1504             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1505             if (ch == EOS) {
1506                 panic(FAULT);
1507             }
1508             switch (st) {
1509                 case 0:     // first '-' of the comment open
1510                     if (ch == '-') {
1511                         st = 1;
1512                     } else {
1513                         panic(FAULT);
1514                     }
1515                     break;
1516 
1517                 case 1:     // secind '-' of the comment open
1518                     if (ch == '-') {
1519                         st = 2;
1520                     } else {
1521                         panic(FAULT);
1522                     }
1523                     break;
1524 
1525                 case 2:     // skip the comment body
1526                     switch (ch) {
1527                         case '-':
1528                             st = 3;
1529                             break;
1530 
1531                         default:
1532                             bappend(ch);
1533                             break;
1534                     }
1535                     break;
1536 
1537                 case 3:     // second '-' of the comment close
1538                     switch (ch) {
1539                         case '-':
1540                             st = 4;
1541                             break;
1542 
1543                         default:
1544                             bappend('-');
1545                             bappend(ch);
1546                             st = 2;
1547                             break;
1548                     }
1549                     break;
1550 
1551                 case 4:     // '>' of the comment close
1552                     if (ch == '>') {
1553                         comm(mBuff, mBuffIdx + 1);
1554                         st = -1;
1555                         break;
1556                     }
1557                 // else - panic [#2.5 compatibility note]
1558 
1559                 default:
1560                     panic(FAULT);
1561             }
1562         }
1563     }
1564 
1565     /**
1566      * Parses a processing instruction.
1567      *
1568      * The &apos;&lt;?&apos; is read in dispatcher so the method starts with
1569      * first character of PI target name after &apos;&lt;?&apos;.
1570      *
1571      * @exception Exception is parser specific exception form panic method.
1572      * @exception IOException
1573      */
1574     private void pi()
1575             throws Exception {
1576         // '<?' has been already read by dispetcher.
1577         char ch;
1578         String str = null;
1579         mBuffIdx = -1;
1580         for (short st = 0; st >= 0;) {
1581             ch = getch();
1582             if (ch == EOS) {
1583                 panic(FAULT);
1584             }
1585             switch (st) {
1586                 case 0:     // read the PI target name
1587                     switch (chtyp(ch)) {
1588                         case 'a':
1589                         case 'A':
1590                         case '_':
1591                         case ':':
1592                         case 'X':
1593                             bkch();
1594                             str = name(false);
1595                             //          PI target name may not be empty string [#2.6]
1596                             //          PI target name 'XML' is reserved [#2.6]
1597                             if ((str.length() == 0)
1598                                     || (mXml.name.equals(str.toLowerCase()) == true)) {
1599                                 panic(FAULT);
1600                             }
1601                             //          This is processing instruction
1602                             if (mPh == PH_DOC_START) // the begining of the document
1603                             {
1604                                 mPh = PH_MISC_DTD;    // misc before DTD
1605                             }
1606                             wsskip();  // skip spaces after the PI target name
1607                             st = 1;    // accumulate the PI body
1608                             mBuffIdx = -1;
1609                             break;
1610 
1611                         default:
1612                             panic(FAULT);
1613                     }
1614                     break;
1615 
1616                 case 1:     // accumulate the PI body
1617                     switch (ch) {
1618                         case '?':
1619                             st = 2;  // end of the PI body
1620                             break;
1621 
1622                         default:
1623                             bappend(ch);
1624                             break;
1625                     }
1626                     break;
1627 
1628                 case 2:     // end of the PI body
1629                     switch (ch) {
1630                         case '>':
1631                             //          PI has been read.
1632                             pi(str, new String(mBuff, 0, mBuffIdx + 1));
1633                             st = -1;
1634                             break;
1635 
1636                         case '?':
1637                             bappend('?');
1638                             break;
1639 
1640                         default:
1641                             bappend('?');
1642                             bappend(ch);
1643                             st = 1;  // accumulate the PI body
1644                             break;
1645                     }
1646                     break;
1647 
1648                 default:
1649                     panic(FAULT);
1650             }
1651         }
1652     }
1653 
1654     /**
1655      * Parses a character data.
1656      *
1657      * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1658      * with first &apos;[&apos; after &apos;&lt;!&apos;.
1659      *
1660      * @exception Exception is parser specific exception form panic method.
1661      * @exception IOException
1662      */
1663     private void cdat()
1664             throws Exception {
1665         // '<!' has been already read by dispetcher.
1666         char ch;
1667         mBuffIdx = -1;
1668         for (short st = 0; st >= 0;) {
1669             ch = getch();
1670             switch (st) {
1671                 case 0:     // the first '[' of the CDATA open
1672                     if (ch == '[') {
1673                         st = 1;
1674                     } else {
1675                         panic(FAULT);
1676                     }
1677                     break;
1678 
1679                 case 1:     // read "CDATA"
1680                     if (chtyp(ch) == 'A') {
1681                         bappend(ch);
1682                     } else {
1683                         if ("CDATA".equals(
1684                                 new String(mBuff, 0, mBuffIdx + 1)) != true) {
1685                             panic(FAULT);
1686                         }
1687                         bkch();
1688                         st = 2;
1689                     }
1690                     break;
1691 
1692                 case 2:     // the second '[' of the CDATA open
1693                     if (ch != '[') {
1694                         panic(FAULT);
1695                     }
1696                     mBuffIdx = -1;
1697                     st = 3;
1698                     break;
1699 
1700                 case 3:     // read data before the first ']'
1701                     if (ch != ']') {
1702                         bappend(ch);
1703                     } else {
1704                         st = 4;
1705                     }
1706                     break;
1707 
1708                 case 4:     // read the second ']' or continue to read the data
1709                     if (ch != ']') {
1710                         bappend(']');
1711                         bappend(ch);
1712                         st = 3;
1713                     } else {
1714                         st = 5;
1715                     }
1716                     break;
1717 
1718                 case 5:     // read '>' or continue to read the data
1719                     switch (ch) {
1720                         case ']':
1721                             bappend(']');
1722                             break;
1723 
1724                         case '>':
1725                             bflash();
1726                             st = -1;
1727                             break;
1728 
1729                         default:
1730                             bappend(']');
1731                             bappend(']');
1732                             bappend(ch);
1733                             st = 3;
1734                             break;
1735                     }
1736                     break;
1737 
1738                 default:
1739                     panic(FAULT);
1740             }
1741         }
1742     }
1743 
1744     /**
1745      * Reads a xml name.
1746      *
1747      * The xml name must conform "Namespaces in XML" specification. Therefore
1748      * the ':' character is not allowed in the name. This method should be used
1749      * for PI and entity names which may not have a namespace according to the
1750      * specification mentioned above.
1751      *
1752      * @param ns The true value turns namespace conformance on.
1753      * @return The name has been read.
1754      * @exception Exception When incorrect character appear in the name.
1755      * @exception IOException
1756      */
1757     protected String name(boolean ns)
1758             throws Exception {
1759         mBuffIdx = -1;
1760         bname(ns);
1761         return new String(mBuff, 1, mBuffIdx);
1762     }
1763 
1764     /**
1765      * Reads a qualified xml name.
1766      *
1767      * The characters of a qualified name is an array of characters. The first
1768      * (chars[0]) character is the index of the colon character which separates
1769      * the prefix from the local name. If the index is zero, the name does not
1770      * contain separator or the parser works in the namespace unaware mode. The
1771      * length of qualified name is the length of the array minus one.
1772      *
1773      * @param ns The true value turns namespace conformance on.
1774      * @return The characters of a qualified name.
1775      * @exception Exception When incorrect character appear in the name.
1776      * @exception IOException
1777      */
1778     protected char[] qname(boolean ns)
1779             throws Exception {
1780         mBuffIdx = -1;
1781         bname(ns);
1782         char chars[] = new char[mBuffIdx + 1];
1783         System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
1784         return chars;
1785     }
1786 
1787     /**
1788      * Reads the public or/and system identifiers.
1789      *
1790      * @param inp The input object.
1791      * @exception Exception is parser specific exception form panic method.
1792      * @exception IOException
1793      */
1794     private void pubsys(Input inp)
1795             throws Exception {
1796         Pair pair = pubsys(' ');
1797         inp.pubid = pair.name;
1798         inp.sysid = pair.value;
1799         del(pair);
1800     }
1801 
1802     /**
1803      * Reads the public or/and system identifiers.
1804      *
1805      * @param flag The 'N' allows public id be without system id.
1806      * @return The public or/and system identifiers pair.
1807      * @exception Exception is parser specific exception form panic method.
1808      * @exception IOException
1809      */
1810     private Pair pubsys(char flag)
1811             throws Exception {
1812         Pair ids = pair(null);
1813         String str = name(false);
1814         if ("PUBLIC".equals(str) == true) {
1815             bqstr('i');  // non-CDATA normalization [#4.2.2]
1816             ids.name = new String(mBuff, 1, mBuffIdx);
1817             switch (wsskip()) {
1818                 case '\"':
1819                 case '\'':
1820                     bqstr(' ');
1821                     ids.value = new String(mBuff, 1, mBuffIdx);
1822                     break;
1823 
1824                 case EOS:
1825                     panic(FAULT);
1826 
1827                 default:
1828                     if (flag != 'N') // [#4.7]
1829                     {
1830                         panic(FAULT);
1831                     }
1832                     ids.value = null;
1833                     break;
1834             }
1835             return ids;
1836         } else if ("SYSTEM".equals(str) == true) {
1837             ids.name = null;
1838             bqstr(' ');
1839             ids.value = new String(mBuff, 1, mBuffIdx);
1840             return ids;
1841         }
1842         panic(FAULT);
1843         return null;
1844     }
1845 
1846     /**
1847      * Reads an attribute value.
1848      *
1849      * The grammar which this method can read is:<br />
1850      * <code>eqstr := S &quot;=&quot; qstr</code><br />
1851      * <code>qstr  := S (&quot;'&quot; string &quot;'&quot;) |
1852      *  ('&quot;' string '&quot;')</code><br /> This method resolves entities
1853      * inside a string unless the parser parses DTD.
1854      *
1855      * @param flag The '=' character forces the method to accept the '='
1856      * character before quoted string and read the following string as not an
1857      * attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization;
1858      * '-' - not an attribute value; 'd' - in DTD context.
1859      * @return The content of the quoted strign as a string.
1860      * @exception Exception is parser specific exception form panic method.
1861      * @exception IOException
1862      */
1863     protected String eqstr(char flag)
1864             throws Exception {
1865         if (flag == '=') {
1866             wsskip();
1867             if (getch() != '=') {
1868                 panic(FAULT);
1869             }
1870         }
1871         bqstr((flag == '=') ? '-' : flag);
1872         return new String(mBuff, 1, mBuffIdx);
1873     }
1874 
1875     /**
1876      * Resoves an entity.
1877      *
1878      * This method resolves built-in and character entity references. It is also
1879      * reports external entities to the application.
1880      *
1881      * @param flag The 'x' character forces the method to report a skipped
1882      * entity; 'i' character - indicates non-CDATA normalization.
1883      * @return Name of unresolved entity or <code>null</code> if entity had been
1884      * resolved successfully.
1885      * @exception Exception is parser specific exception form panic method.
1886      * @exception IOException
1887      */
1888     private String ent(char flag)
1889             throws Exception {
1890         char ch;
1891         int idx = mBuffIdx + 1;
1892         Input inp = null;
1893         String str = null;
1894         mESt = 0x100;  // reset the built-in entity recognizer
1895         bappend('&');
1896         for (short st = 0; st >= 0;) {
1897             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1898             switch (st) {
1899                 case 0:     // the first character of the entity name
1900                 case 1:     // read built-in entity name
1901                     switch (chtyp(ch)) {
1902                         case 'd':
1903                         case '.':
1904                         case '-':
1905                             if (st != 1) {
1906                                 panic(FAULT);
1907                             }
1908                         case 'a':
1909                         case 'A':
1910                         case '_':
1911                         case 'X':
1912                             bappend(ch);
1913                             eappend(ch);
1914                             st = 1;
1915                             break;
1916 
1917                         case ':':
1918                             if (mIsNSAware != false) {
1919                                 panic(FAULT);
1920                             }
1921                             bappend(ch);
1922                             eappend(ch);
1923                             st = 1;
1924                             break;
1925 
1926                         case ';':
1927                             if (mESt < 0x100) {
1928                                 //              The entity is a built-in entity
1929                                 mBuffIdx = idx - 1;
1930                                 bappend(mESt);
1931                                 st = -1;
1932                                 break;
1933                             } else if (mPh == PH_DTD) {
1934                                 //              In DTD entity declaration has to resolve character 
1935                                 //              entities and include "as is" others. [#4.4.7]
1936                                 bappend(';');
1937                                 st = -1;
1938                                 break;
1939                             }
1940                             //          Convert an entity name to a string
1941                             str = new String(mBuff, idx + 1, mBuffIdx - idx);
1942                             inp = (Input) mEnt.get(str);
1943                             //          Restore the buffer offset
1944                             mBuffIdx = idx - 1;
1945                             if (inp != null) {
1946                                 if (inp.chars == null) {
1947                                     //          External entity
1948                                     InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
1949                                     if (is != null) {
1950                                         push(new Input(BUFFSIZE_READER));
1951                                         setinp(is);
1952                                         mInp.pubid = inp.pubid;
1953                                         mInp.sysid = inp.sysid;
1954                                         str = null;  // the entity is resolved
1955                                     } else {
1956                                         //              Unresolved external entity
1957                                         if (flag != 'x') {
1958                                             panic(FAULT);  // unknown entity within marckup
1959                                         }                                                               //              str is name of unresolved entity
1960                                     }
1961                                 } else {
1962                                     //          Internal entity
1963                                     push(inp);
1964                                     str = null;  // the entity is resolved
1965                                 }
1966                             } else {
1967                                 //              Unknown or general unparsed entity
1968                                 if (flag != 'x') {
1969                                     panic(FAULT);  // unknown entity within marckup
1970                                 }                                               //              str is name of unresolved entity
1971                             }
1972                             st = -1;
1973                             break;
1974 
1975                         case '#':
1976                             if (st != 0) {
1977                                 panic(FAULT);
1978                             }
1979                             st = 2;
1980                             break;
1981 
1982                         default:
1983                             panic(FAULT);
1984                     }
1985                     break;
1986 
1987                 case 2:     // read character entity
1988                     switch (chtyp(ch)) {
1989                         case 'd':
1990                             bappend(ch);
1991                             break;
1992 
1993                         case ';':
1994                             //          Convert the character entity to a character
1995                             try {
1996                                 int i = Integer.parseInt(
1997                                         new String(mBuff, idx + 1, mBuffIdx - idx), 10);
1998                                 if (i >= 0xffff) {
1999                                     panic(FAULT);
2000                                 }
2001                                 ch = (char) i;
2002                             } catch (NumberFormatException nfe) {
2003                                 panic(FAULT);
2004                             }
2005                             //          Restore the buffer offset
2006                             mBuffIdx = idx - 1;
2007                             if (ch == ' ' || mInp.next != null) {
2008                                 bappend(ch, flag);
2009                             } else {
2010                                 bappend(ch);
2011                             }
2012                             st = -1;
2013                             break;
2014 
2015                         case 'a':
2016                             //          If the entity buffer is empty and ch == 'x'
2017                             if ((mBuffIdx == idx) && (ch == 'x')) {
2018                                 st = 3;
2019                                 break;
2020                             }
2021                         default:
2022                             panic(FAULT);
2023                     }
2024                     break;
2025 
2026                 case 3:     // read hex character entity
2027                     switch (chtyp(ch)) {
2028                         case 'A':
2029                         case 'a':
2030                         case 'd':
2031                             bappend(ch);
2032                             break;
2033 
2034                         case ';':
2035                             //          Convert the character entity to a character
2036                             try {
2037                                 int i = Integer.parseInt(
2038                                         new String(mBuff, idx + 1, mBuffIdx - idx), 16);
2039                                 if (i >= 0xffff) {
2040                                     panic(FAULT);
2041                                 }
2042                                 ch = (char) i;
2043                             } catch (NumberFormatException nfe) {
2044                                 panic(FAULT);
2045                             }
2046                             //          Restore the buffer offset
2047                             mBuffIdx = idx - 1;
2048                             if (ch == ' ' || mInp.next != null) {
2049                                 bappend(ch, flag);
2050                             } else {
2051                                 bappend(ch);
2052                             }
2053                             st = -1;
2054                             break;
2055 
2056                         default:
2057                             panic(FAULT);
2058                     }
2059                     break;
2060 
2061                 default:
2062                     panic(FAULT);
2063             }
2064         }
2065 
2066         return str;
2067     }
2068 
2069     /**
2070      * Resoves a parameter entity.
2071      *
2072      * This method resolves a parameter entity references. It is also reports
2073      * external entities to the application.
2074      *
2075      * @param flag The '-' instruct the method to do not set up surrounding
2076      * spaces [#4.4.8].
2077      * @exception Exception is parser specific exception form panic method.
2078      * @exception IOException
2079      */
2080     private void pent(char flag)
2081             throws Exception {
2082         char ch;
2083         int idx = mBuffIdx + 1;
2084         Input inp = null;
2085         String str = null;
2086         bappend('%');
2087         if (mPh != PH_DTD) // the DTD internal subset
2088         {
2089             return;         // Not Recognized [#4.4.1]
2090         }               //              Read entity name
2091         bname(false);
2092         str = new String(mBuff, idx + 2, mBuffIdx - idx - 1);
2093         if (getch() != ';') {
2094             panic(FAULT);
2095         }
2096         inp = (Input) mPEnt.get(str);
2097         //              Restore the buffer offset
2098         mBuffIdx = idx - 1;
2099         if (inp != null) {
2100             if (inp.chars == null) {
2101                 //              External parameter entity
2102                 InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
2103                 if (is != null) {
2104                     if (flag != '-') {
2105                         bappend(' ');  // tail space
2106                     }
2107                     push(new Input(BUFFSIZE_READER));
2108                     // BUG: there is no leading space! [#4.4.8]
2109                     setinp(is);
2110                     mInp.pubid = inp.pubid;
2111                     mInp.sysid = inp.sysid;
2112                 } else {
2113                     //          Unresolved external parameter entity
2114                     skippedEnt("%" + str);
2115                 }
2116             } else {
2117                 //              Internal parameter entity
2118                 if (flag == '-') {
2119                     //          No surrounding spaces
2120                     inp.chIdx = 1;
2121                 } else {
2122                     //          Insert surrounding spaces
2123                     bappend(' ');  // tail space
2124                     inp.chIdx = 0;
2125                 }
2126                 push(inp);
2127             }
2128         } else {
2129             //          Unknown parameter entity
2130             skippedEnt("%" + str);
2131         }
2132     }
2133 
2134     /**
2135      * Recognizes and handles a namespace declaration.
2136      *
2137      * This method identifies a type of namespace declaration if any and puts
2138      * new mapping on top of prefix stack.
2139      *
2140      * @param name The attribute qualified name (<code>name.value</code> is a
2141      * <code>String</code> object which represents the attribute prefix).
2142      * @param value The attribute value.
2143      * @return <code>true</code> if a namespace declaration is recognized.
2144      */
2145     private boolean isdecl(Pair name, String value) {
2146         if (name.chars[0] == 0) {
2147             if ("xmlns".equals(name.name) == true) {
2148                 //              New default namespace declaration
2149                 mPref = pair(mPref);
2150                 mPref.list = mElm;  // prefix owner element
2151                 mPref.value = value;
2152                 mPref.name = "";
2153                 mPref.chars = NONS;
2154                 mElm.num++;  // namespace counter
2155                 return true;
2156             }
2157         } else {
2158             if (name.eqpref(XMLNS) == true) {
2159                 //              New prefix declaration
2160                 int len = name.name.length();
2161                 mPref = pair(mPref);
2162                 mPref.list = mElm;  // prefix owner element
2163                 mPref.value = value;
2164                 mPref.name = name.name;
2165                 mPref.chars = new char[len + 1];
2166                 mPref.chars[0] = (char) (len + 1);
2167                 name.name.getChars(0, len, mPref.chars, 1);
2168                 mElm.num++;  // namespace counter
2169                 return true;
2170             }
2171         }
2172         return false;
2173     }
2174 
2175     /**
2176      * Resolves a prefix.
2177      *
2178      * @return The namespace assigned to the prefix.
2179      * @exception Exception When mapping for specified prefix is not found.
2180      */
2181     private String rslv(char[] qname)
2182             throws Exception {
2183         for (Pair pref = mPref; pref != null; pref = pref.next) {
2184             if (pref.eqpref(qname) == true) {
2185                 return pref.value;
2186             }
2187         }
2188         if (qname[0] == 1) {  // QNames like ':local'
2189             for (Pair pref = mPref; pref != null; pref = pref.next) {
2190                 if (pref.chars[0] == 0) {
2191                     return pref.value;
2192                 }
2193             }
2194         }
2195         panic(FAULT);
2196         return null;
2197     }
2198 
2199     /**
2200      * Skips xml white space characters.
2201      *
2202      * This method skips white space characters (' ', '\t', '\n', '\r') and
2203      * looks ahead not white space character.
2204      *
2205      * @return The first not white space look ahead character.
2206      * @exception IOException
2207      */
2208     protected char wsskip()
2209             throws IOException {
2210         char ch;
2211         while (true) {
2212             //          Read next character
2213             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2214             if (ch < 0x80) {
2215                 if (nmttyp[ch] != 3) // [ \t\n\r]
2216                 {
2217                     break;
2218                 }
2219             } else {
2220                 break;
2221             }
2222         }
2223         mChIdx--;  // bkch();
2224         return ch;
2225     }
2226 
2227     /**
2228      * Reports document type.
2229      *
2230      * @param name The name of the entity.
2231      * @param pubid The public identifier of the entity or <code>null</code>.
2232      * @param sysid The system identifier of the entity or <code>null</code>.
2233      */
2234     protected abstract void docType(String name, String pubid, String sysid)
2235             throws SAXException;
2236 
2237     /**
2238      * Reports a comment.
2239      *
2240      * @param text The comment text starting from first charcater.
2241      * @param length The number of characters in comment.
2242      */
2243     protected abstract void comm(char[] text, int length);
2244 
2245     /**
2246      * Reports a processing instruction.
2247      *
2248      * @param target The processing instruction target name.
2249      * @param body The processing instruction body text.
2250      */
2251     protected abstract void pi(String target, String body)
2252             throws Exception;
2253 
2254     /**
2255      * Reports new namespace prefix. The Namespace prefix (
2256      * <code>mPref.name</code>) being declared and the Namespace URI (
2257      * <code>mPref.value</code>) the prefix is mapped to. An empty string is
2258      * used for the default element namespace, which has no prefix.
2259      */
2260     protected abstract void newPrefix()
2261             throws Exception;
2262 
2263     /**
2264      * Reports skipped entity name.
2265      *
2266      * @param name The entity name.
2267      */
2268     protected abstract void skippedEnt(String name)
2269             throws Exception;
2270 
2271     /**
2272      * Returns an
2273      * <code>InputSource</code> for specified entity or
2274      * <code>null</code>.
2275      *
2276      * @param name The name of the entity.
2277      * @param pubid The public identifier of the entity.
2278      * @param sysid The system identifier of the entity.
2279      */
2280     protected abstract InputSource resolveEnt(
2281             String name, String pubid, String sysid)
2282             throws Exception;
2283 
2284     /**
2285      * Reports notation declaration.
2286      *
2287      * @param name The notation's name.
2288      * @param pubid The notation's public identifier, or null if none was given.
2289      * @param sysid The notation's system identifier, or null if none was given.
2290      */
2291     protected abstract void notDecl(String name, String pubid, String sysid)
2292             throws Exception;
2293 
2294     /**
2295      * Reports unparsed entity name.
2296      *
2297      * @param name The unparsed entity's name.
2298      * @param pubid The entity's public identifier, or null if none was given.
2299      * @param sysid The entity's system identifier.
2300      * @param notation The name of the associated notation.
2301      */
2302     protected abstract void unparsedEntDecl(
2303             String name, String pubid, String sysid, String notation)
2304             throws Exception;
2305 
2306     /**
2307      * Notifies the handler about fatal parsing error.
2308      *
2309      * @param msg The problem description message.
2310      */
2311     protected abstract void panic(String msg)
2312             throws Exception;
2313 
2314     /**
2315      * Reads a qualified xml name.
2316      *
2317      * This is low level routine which leaves a qName in the buffer. The
2318      * characters of a qualified name is an array of characters. The first
2319      * (chars[0]) character is the index of the colon character which separates
2320      * the prefix from the local name. If the index is zero, the name does not
2321      * contain separator or the parser works in the namespace unaware mode. The
2322      * length of qualified name is the length of the array minus one.
2323      *
2324      * @param ns The true value turns namespace conformance on.
2325      * @exception Exception is parser specific exception form panic method.
2326      * @exception IOException
2327      */
2328     private void bname(boolean ns)
2329             throws Exception {
2330         char ch;
2331         char type;
2332         mBuffIdx++;  // allocate a char for colon offset
2333         int bqname = mBuffIdx;
2334         int bcolon = bqname;
2335         int bchidx = bqname + 1;
2336         int bstart = bchidx;
2337         int cstart = mChIdx;
2338         short st = (short) ((ns == true) ? 0 : 2);
2339         while (true) {
2340             //          Read next character
2341             if (mChIdx >= mChLen) {
2342                 bcopy(cstart, bstart);
2343                 getch();
2344                 mChIdx--;  // bkch();
2345                 cstart = mChIdx;
2346                 bstart = bchidx;
2347             }
2348             ch = mChars[mChIdx++];
2349             type = (char) 0;  // [X]
2350             if (ch < 0x80) {
2351                 type = (char) nmttyp[ch];
2352             } else if (ch == EOS) {
2353                 panic(FAULT);
2354             }
2355             //          Parse QName
2356             switch (st) {
2357                 case 0:     // read the first char of the prefix
2358                 case 2:     // read the first char of the suffix
2359                     switch (type) {
2360                         case 0:  // [aA_X]
2361                             bchidx++;  // append char to the buffer
2362                             st++;      // (st == 0)? 1: 3;
2363                             break;
2364 
2365                         case 1:  // [:]
2366                             mChIdx--;  // bkch();
2367                             st++;      // (st == 0)? 1: 3;
2368                             break;
2369 
2370                         default:
2371                             panic(FAULT);
2372                     }
2373                     break;
2374 
2375                 case 1:     // read the prefix
2376                 case 3:     // read the suffix
2377                     switch (type) {
2378                         case 0:  // [aA_X]
2379                         case 2:  // [.-d]
2380                             bchidx++;  // append char to the buffer
2381                             break;
2382 
2383                         case 1:  // [:]
2384                             bchidx++;  // append char to the buffer
2385                             if (ns == true) {
2386                                 if (bcolon != bqname) {
2387                                     panic(FAULT);  // it must be only one colon
2388                                 }
2389                                 bcolon = bchidx - 1;
2390                                 if (st == 1) {
2391                                     st = 2;
2392                                 }
2393                             }
2394                             break;
2395 
2396                         default:
2397                             mChIdx--;  // bkch();
2398                             bcopy(cstart, bstart);
2399                             mBuff[bqname] = (char) (bcolon - bqname);
2400                             return;
2401                     }
2402                     break;
2403 
2404                 default:
2405                     panic(FAULT);
2406             }
2407         }
2408     }
2409 
2410     /**
2411      * Reads a nmtoken.
2412      *
2413      * This is low level routine which leaves a nmtoken in the buffer.
2414      *
2415      * @exception Exception is parser specific exception form panic method.
2416      * @exception IOException
2417      */
2418     private void bntok()
2419             throws Exception {
2420         char ch;
2421         mBuffIdx = -1;
2422         bappend((char) 0);  // default offset to the colon char
2423         while (true) {
2424             ch = getch();
2425             switch (chtyp(ch)) {
2426                 case 'a':
2427                 case 'A':
2428                 case 'd':
2429                 case '.':
2430                 case ':':
2431                 case '-':
2432                 case '_':
2433                 case 'X':
2434                     bappend(ch);
2435                     break;
2436 
2437                 case 'Z':
2438                     panic(FAULT);
2439 
2440                 default:
2441                     bkch();
2442                     return;
2443             }
2444         }
2445     }
2446 
2447     /**
2448      * Recognizes a keyword.
2449      *
2450      * This is low level routine which recognizes one of keywords in the buffer.
2451      * Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN -
2452      * t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED -
2453      * Q IMPLIED - I FIXED - F
2454      *
2455      * @return an id of a keyword or '?'.
2456      * @exception Exception is parser specific exception form panic method.
2457      * @exception IOException
2458      */
2459     private char bkeyword()
2460             throws Exception {
2461         String str = new String(mBuff, 1, mBuffIdx);
2462         switch (str.length()) {
2463             case 2:  // ID
2464                 return ("ID".equals(str) == true) ? 'i' : '?';
2465 
2466             case 5:  // IDREF, CDATA, FIXED
2467                 switch (mBuff[1]) {
2468                     case 'I':
2469                         return ("IDREF".equals(str) == true) ? 'r' : '?';
2470                     case 'C':
2471                         return ("CDATA".equals(str) == true) ? 'c' : '?';
2472                     case 'F':
2473                         return ("FIXED".equals(str) == true) ? 'F' : '?';
2474                     default:
2475                         break;
2476                 }
2477                 break;
2478 
2479             case 6:  // IDREFS, ENTITY
2480                 switch (mBuff[1]) {
2481                     case 'I':
2482                         return ("IDREFS".equals(str) == true) ? 'R' : '?';
2483                     case 'E':
2484                         return ("ENTITY".equals(str) == true) ? 'n' : '?';
2485                     default:
2486                         break;
2487                 }
2488                 break;
2489 
2490             case 7:  // NMTOKEN, IMPLIED, ATTLIST, ELEMENT
2491                 switch (mBuff[1]) {
2492                     case 'I':
2493                         return ("IMPLIED".equals(str) == true) ? 'I' : '?';
2494                     case 'N':
2495                         return ("NMTOKEN".equals(str) == true) ? 't' : '?';
2496                     case 'A':
2497                         return ("ATTLIST".equals(str) == true) ? 'a' : '?';
2498                     case 'E':
2499                         return ("ELEMENT".equals(str) == true) ? 'e' : '?';
2500                     default:
2501                         break;
2502                 }
2503                 break;
2504 
2505             case 8:  // ENTITIES, NMTOKENS, NOTATION, REQUIRED
2506                 switch (mBuff[2]) {
2507                     case 'N':
2508                         return ("ENTITIES".equals(str) == true) ? 'N' : '?';
2509                     case 'M':
2510                         return ("NMTOKENS".equals(str) == true) ? 'T' : '?';
2511                     case 'O':
2512                         return ("NOTATION".equals(str) == true) ? 'o' : '?';
2513                     case 'E':
2514                         return ("REQUIRED".equals(str) == true) ? 'Q' : '?';
2515                     default:
2516                         break;
2517                 }
2518                 break;
2519 
2520             default:
2521                 break;
2522         }
2523         return '?';
2524     }
2525 
2526     /**
2527      * Reads a single or double quotted string in to the buffer.
2528      *
2529      * This method resolves entities inside a string unless the parser parses
2530      * DTD.
2531      *
2532      * @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' -
2533      * not an attribute value; 'd' - in DTD context.
2534      * @exception Exception is parser specific exception form panic method.
2535      * @exception IOException
2536      */
2537     private void bqstr(char flag)
2538             throws Exception {
2539         Input inp = mInp;  // remember the original input
2540         mBuffIdx = -1;
2541         bappend((char) 0);  // default offset to the colon char
2542         char ch;
2543         for (short st = 0; st >= 0;) {
2544             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2545             switch (st) {
2546                 case 0:     // read a single or double quote
2547                     switch (ch) {
2548                         case ' ':
2549                         case '\n':
2550                         case '\r':
2551                         case '\t':
2552                             break;
2553 
2554                         case '\'':
2555                             st = 2;  // read a single quoted string
2556                             break;
2557 
2558                         case '\"':
2559                             st = 3;  // read a double quoted string
2560                             break;
2561 
2562                         default:
2563                             panic(FAULT);
2564                             break;
2565                     }
2566                     break;
2567 
2568                 case 2:     // read a single quoted string
2569                 case 3:     // read a double quoted string
2570                     switch (ch) {
2571                         case '\'':
2572                             if ((st == 2) && (mInp == inp)) {
2573                                 st = -1;
2574                             } else {
2575                                 bappend(ch);
2576                             }
2577                             break;
2578 
2579                         case '\"':
2580                             if ((st == 3) && (mInp == inp)) {
2581                                 st = -1;
2582                             } else {
2583                                 bappend(ch);
2584                             }
2585                             break;
2586 
2587                         case '&':
2588                             if (flag != 'd') {
2589                                 ent(flag);
2590                             } else {
2591                                 bappend(ch);
2592                             }
2593                             break;
2594 
2595                         case '%':
2596                             if (flag == 'd') {
2597                                 pent('-');
2598                             } else {
2599                                 bappend(ch);
2600                             }
2601                             break;
2602 
2603                         case '<':
2604                             if ((flag == '-') || (flag == 'd')) {
2605                                 bappend(ch);
2606                             } else {
2607                                 panic(FAULT);
2608                             }
2609                             break;
2610 
2611                         case EOS:               // EOS before single/double quote
2612                             panic(FAULT);
2613 
2614                         case '\r':     // EOL processing [#2.11 & #3.3.3]
2615                             if (flag != ' ' && mInp.next == null) {
2616                                 if (getch() != '\n') {
2617                                     bkch();
2618                                 }
2619                                 ch = '\n';
2620                             }
2621                         default:
2622                             bappend(ch, flag);
2623                             break;
2624                     }
2625                     break;
2626 
2627                 default:
2628                     panic(FAULT);
2629             }
2630         }
2631         //              There is maximum one space at the end of the string in
2632         //              i-mode (non CDATA normalization) and it has to be removed.
2633         if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) {
2634             mBuffIdx -= 1;
2635         }
2636     }
2637 
2638     /**
2639      * Reports characters and empties the parser's buffer. This method is called
2640      * only if parser is going to return control to the main loop. This means
2641      * that this method may use parser buffer to report white space without
2642      * copeing characters to temporary buffer.
2643      */
2644     protected abstract void bflash()
2645             throws Exception;
2646 
2647     /**
2648      * Reports white space characters and empties the parser's buffer. This
2649      * method is called only if parser is going to return control to the main
2650      * loop. This means that this method may use parser buffer to report white
2651      * space without copeing characters to temporary buffer.
2652      */
2653     protected abstract void bflash_ws()
2654             throws Exception;
2655 
2656     /**
2657      * Appends a character to parser's buffer with normalization.
2658      *
2659      * @param ch The character to append to the buffer.
2660      * @param mode The normalization mode.
2661      */
2662     private void bappend(char ch, char mode) {
2663         //              This implements attribute value normalization as 
2664         //              described in the XML specification [#3.3.3].
2665         switch (mode) {
2666             case 'i':  // non CDATA normalization
2667                 switch (ch) {
2668                     case ' ':
2669                     case '\n':
2670                     case '\r':
2671                     case '\t':
2672                         if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) {
2673                             bappend(' ');
2674                         }
2675                         return;
2676 
2677                     default:
2678                         break;
2679                 }
2680                 break;
2681 
2682             case 'c':  // CDATA normalization
2683                 switch (ch) {
2684                     case '\n':
2685                     case '\r':
2686                     case '\t':
2687                         ch = ' ';
2688                         break;
2689 
2690                     default:
2691                         break;
2692                 }
2693                 break;
2694 
2695             default:  // no normalization
2696                 break;
2697         }
2698         mBuffIdx++;
2699         if (mBuffIdx < mBuff.length) {
2700             mBuff[mBuffIdx] = ch;
2701         } else {
2702             mBuffIdx--;
2703             bappend(ch);
2704         }
2705     }
2706 
2707     /**
2708      * Appends a character to parser's buffer.
2709      *
2710      * @param ch The character to append to the buffer.
2711      */
2712     private void bappend(char ch) {
2713         try {
2714             mBuff[++mBuffIdx] = ch;
2715         } catch (Exception exp) {
2716             //          Double the buffer size
2717             char buff[] = new char[mBuff.length << 1];
2718             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2719             mBuff = buff;
2720             mBuff[mBuffIdx] = ch;
2721         }
2722     }
2723 
2724     /**
2725      * Appends (mChIdx - cidx) characters from character buffer (mChars) to
2726      * parser's buffer (mBuff).
2727      *
2728      * @param cidx The character buffer (mChars) start index.
2729      * @param bidx The parser buffer (mBuff) start index.
2730      */
2731     private void bcopy(int cidx, int bidx) {
2732         int length = mChIdx - cidx;
2733         if ((bidx + length + 1) >= mBuff.length) {
2734             //          Expand the buffer
2735             char buff[] = new char[mBuff.length + length];
2736             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2737             mBuff = buff;
2738         }
2739         System.arraycopy(mChars, cidx, mBuff, bidx, length);
2740         mBuffIdx += length;
2741     }
2742 
2743     /**
2744      * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
2745      * <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe
2746      * 0x100 is a built-in entity replacement character.
2747      *
2748      * @param ch the next character of an entity name.
2749      */
2750     private void eappend(char ch) {
2751         switch (mESt) {
2752             case 0x100:  // "l" or "g" or "a" or "q"
2753                 switch (ch) {
2754                     case 'l':
2755                         mESt = 0x101;
2756                         break;
2757                     case 'g':
2758                         mESt = 0x102;
2759                         break;
2760                     case 'a':
2761                         mESt = 0x103;
2762                         break;
2763                     case 'q':
2764                         mESt = 0x107;
2765                         break;
2766                     default:
2767                         mESt = 0x200;
2768                         break;
2769                 }
2770                 break;
2771 
2772             case 0x101:  // "lt"
2773                 mESt = (ch == 't') ? '<' : (char) 0x200;
2774                 break;
2775 
2776             case 0x102:  // "gt"
2777                 mESt = (ch == 't') ? '>' : (char) 0x200;
2778                 break;
2779 
2780             case 0x103:  // "am" or "ap"
2781                 switch (ch) {
2782                     case 'm':
2783                         mESt = 0x104;
2784                         break;
2785                     case 'p':
2786                         mESt = 0x105;
2787                         break;
2788                     default:
2789                         mESt = 0x200;
2790                         break;
2791                 }
2792                 break;
2793 
2794             case 0x104:  // "amp"
2795                 mESt = (ch == 'p') ? '&' : (char) 0x200;
2796                 break;
2797 
2798             case 0x105:  // "apo"
2799                 mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200;
2800                 break;
2801 
2802             case 0x106:  // "apos"
2803                 mESt = (ch == 's') ? '\'' : (char) 0x200;
2804                 break;
2805 
2806             case 0x107:  // "qu"
2807                 mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200;
2808                 break;
2809 
2810             case 0x108:  // "quo"
2811                 mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200;
2812                 break;
2813 
2814             case 0x109:  // "quot"
2815                 mESt = (ch == 't') ? '\"' : (char) 0x200;
2816                 break;
2817 
2818             case '<':   // "lt"
2819             case '>':   // "gt"
2820             case '&':   // "amp"
2821             case '\'':  // "apos"
2822             case '\"':  // "quot"
2823                 mESt = 0x200;
2824             default:
2825                 break;
2826         }
2827     }
2828 
2829     /**
2830      * Sets up a new input source on the top of the input stack. Note, the first
2831      * byte returned by the entity's byte stream has to be the first byte in the
2832      * entity. However, the parser does not expect the byte order mask in both
2833      * cases when encoding is provided by the input source.
2834      *
2835      * @param is A new input source to set up.
2836      * @exception IOException If any IO errors occur.
2837      * @exception Exception is parser specific exception form panic method.
2838      */
2839     protected void setinp(InputSource is)
2840             throws Exception {
2841         Reader reader = null;
2842         mChIdx = 0;
2843         mChLen = 0;
2844         mChars = mInp.chars;
2845         mInp.src = null;
2846         if (mPh < PH_DOC_START) {
2847             mIsSAlone = false;  // default [#2.9]
2848         }
2849         mIsSAloneSet = false;
2850         if (is.getCharacterStream() != null) {
2851             //          Ignore encoding in the xml text decl. 
2852             reader = is.getCharacterStream();
2853             xml(reader);
2854         } else if (is.getByteStream() != null) {
2855             String expenc;
2856             if (is.getEncoding() != null) {
2857                 //              Ignore encoding in the xml text decl.
2858                 expenc = is.getEncoding().toUpperCase();
2859                 if (expenc.equals("UTF-16")) {
2860                     reader = bom(is.getByteStream(), 'U');  // UTF-16 [#4.3.3]
2861                 } else {
2862                     reader = enc(expenc, is.getByteStream());
2863                 }
2864                 xml(reader);
2865             } else {
2866                 //              Get encoding from BOM or the xml text decl.
2867                 reader = bom(is.getByteStream(), ' ');
2868                 if (reader == null) {
2869                     //          Encoding is defined by the xml text decl.
2870                     reader = enc("UTF-8", is.getByteStream());
2871                     expenc = xml(reader);
2872                     if (expenc.startsWith("UTF-16")) {
2873                         panic(FAULT);  // UTF-16 must have BOM [#4.3.3]
2874                     }
2875                     reader = enc(expenc, is.getByteStream());
2876                 } else {
2877                     //          Encoding is defined by the BOM.
2878                     xml(reader);
2879                 }
2880             }
2881         } else {
2882             //          There is no support for public/system identifiers.
2883             panic(FAULT);
2884         }
2885         mInp.src = reader;
2886         mInp.pubid = is.getPublicId();
2887         mInp.sysid = is.getSystemId();
2888     }
2889 
2890     /**
2891      * Determines the entity encoding.
2892      *
2893      * This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the
2894      * first byte returned by the entity's byte stream has to be the first byte
2895      * in the entity. Also, there is no support for UCS-4.
2896      *
2897      * @param is A byte stream of the entity.
2898      * @param hint An encoding hint, character U means UTF-16.
2899      * @return a reader constructed from the BOM or UTF-8 by default.
2900      * @exception Exception is parser specific exception form panic method.
2901      * @exception IOException
2902      */
2903     private Reader bom(InputStream is, char hint)
2904             throws Exception {
2905         int val = is.read();
2906         switch (val) {
2907             case 0xef:     // UTF-8
2908                 if (hint == 'U') // must be UTF-16
2909                 {
2910                     panic(FAULT);
2911                 }
2912                 if (is.read() != 0xbb) {
2913                     panic(FAULT);
2914                 }
2915                 if (is.read() != 0xbf) {
2916                     panic(FAULT);
2917                 }
2918                 return new ReaderUTF8(is);
2919 
2920             case 0xfe:     // UTF-16, big-endian
2921                 if (is.read() != 0xff) {
2922                     panic(FAULT);
2923                 }
2924                 return new ReaderUTF16(is, 'b');
2925 
2926             case 0xff:     // UTF-16, little-endian
2927                 if (is.read() != 0xfe) {
2928                     panic(FAULT);
2929                 }
2930                 return new ReaderUTF16(is, 'l');
2931 
2932             case -1:
2933                 mChars[mChIdx++] = EOS;
2934                 return new ReaderUTF8(is);
2935 
2936             default:
2937                 if (hint == 'U') // must be UTF-16
2938                 {
2939                     panic(FAULT);
2940                 }
2941                 //              Read the rest of UTF-8 character
2942                 switch (val & 0xf0) {
2943                     case 0xc0:
2944                     case 0xd0:
2945                         mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f));
2946                         break;
2947 
2948                     case 0xe0:
2949                         mChars[mChIdx++] = (char) (((val & 0x0f) << 12)
2950                                 | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
2951                         break;
2952 
2953                     case 0xf0:  // UCS-4 character
2954                         throw new UnsupportedEncodingException();
2955 
2956                     default:
2957                         mChars[mChIdx++] = (char) val;
2958                         break;
2959                 }
2960                 return null;
2961         }
2962     }
2963 
2964     /**
2965      * Parses the xml text declaration.
2966      *
2967      * This method gets encoding from the xml text declaration [#4.3.1] if any.
2968      * The method assumes the buffer (mChars) is big enough to accomodate whole
2969      * xml text declaration.
2970      *
2971      * @param reader is entity reader.
2972      * @return The xml text declaration encoding or default UTF-8 encoding.
2973      * @exception Exception is parser specific exception form panic method.
2974      * @exception IOException
2975      */
2976     private String xml(Reader reader)
2977             throws Exception {
2978         String str = null;
2979         String enc = "UTF-8";
2980         char ch;
2981         int val;
2982         short st;
2983         //              Read the xml text declaration into the buffer
2984         if (mChIdx != 0) {
2985             //          The bom method have read ONE char into the buffer. 
2986             st = (short) ((mChars[0] == '<') ? 1 : -1);
2987         } else {
2988             st = 0;
2989         }
2990         while (st >= 0 && mChIdx < mChars.length) {
2991             ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
2992             mChars[mChIdx++] = ch;
2993             switch (st) {
2994                 case 0:     // read '<' of xml declaration
2995                     switch (ch) {
2996                         case '<':
2997                             st = 1;
2998                             break;
2999 
3000                         case 0xfeff:    // the byte order mask
3001                             ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
3002                             mChars[mChIdx - 1] = ch;
3003                             st = (short) ((ch == '<') ? 1 : -1);
3004                             break;
3005 
3006                         default:
3007                             st = -1;
3008                             break;
3009                     }
3010                     break;
3011 
3012                 case 1:     // read '?' of xml declaration [#4.3.1]
3013                     st = (short) ((ch == '?') ? 2 : -1);
3014                     break;
3015 
3016                 case 2:     // read 'x' of xml declaration [#4.3.1]
3017                     st = (short) ((ch == 'x') ? 3 : -1);
3018                     break;
3019 
3020                 case 3:     // read 'm' of xml declaration [#4.3.1]
3021                     st = (short) ((ch == 'm') ? 4 : -1);
3022                     break;
3023 
3024                 case 4:     // read 'l' of xml declaration [#4.3.1]
3025                     st = (short) ((ch == 'l') ? 5 : -1);
3026                     break;
3027 
3028                 case 5:     // read white space after 'xml'
3029                     switch (ch) {
3030                         case ' ':
3031                         case '\t':
3032                         case '\r':
3033                         case '\n':
3034                             st = 6;
3035                             break;
3036 
3037                         default:
3038                             st = -1;
3039                             break;
3040                     }
3041                     break;
3042 
3043                 case 6:     // read content of xml declaration
3044                     switch (ch) {
3045                         case '?':
3046                             st = 7;
3047                             break;
3048 
3049                         case EOS:
3050                             st = -2;
3051                             break;
3052 
3053                         default:
3054                             break;
3055                     }
3056                     break;
3057 
3058                 case 7:     // read '>' after '?' of xml declaration
3059                     switch (ch) {
3060                         case '>':
3061                         case EOS:
3062                             st = -2;
3063                             break;
3064 
3065                         default:
3066                             st = 6;
3067                             break;
3068                     }
3069                     break;
3070 
3071                 default:
3072                     panic(FAULT);
3073                     break;
3074             }
3075         }
3076         mChLen = mChIdx;
3077         mChIdx = 0;
3078         //              If there is no xml text declaration, the encoding is default.
3079         if (st == -1) {
3080             return enc;
3081         }
3082         mChIdx = 5;  // the first white space after "<?xml"
3083         //              Parse the xml text declaration
3084         for (st = 0; st >= 0;) {
3085             ch = getch();
3086             switch (st) {
3087                 case 0:     // skip spaces after the xml declaration name
3088                     if (chtyp(ch) != ' ') {
3089                         bkch();
3090                         st = 1;
3091                     }
3092                     break;
3093 
3094                 case 1:     // read xml declaration version
3095                 case 2:     // read xml declaration encoding or standalone
3096                 case 3:     // read xml declaration standalone
3097                     switch (chtyp(ch)) {
3098                         case 'a':
3099                         case 'A':
3100                         case '_':
3101                             bkch();
3102                             str = name(false).toLowerCase();
3103                             if ("version".equals(str) == true) {
3104                                 if (st != 1) {
3105                                     panic(FAULT);
3106                                 }
3107                                 if ("1.0".equals(eqstr('=')) != true) {
3108                                     panic(FAULT);
3109                                 }
3110                                 mInp.xmlver = 0x0100;
3111                                 st = 2;
3112                             } else if ("encoding".equals(str) == true) {
3113                                 if (st != 2) {
3114                                     panic(FAULT);
3115                                 }
3116                                 mInp.xmlenc = eqstr('=').toUpperCase();
3117                                 enc = mInp.xmlenc;
3118                                 st = 3;
3119                             } else if ("standalone".equals(str) == true) {
3120                                 if ((st == 1) || (mPh >= PH_DOC_START)) // [#4.3.1]
3121                                 {
3122                                     panic(FAULT);
3123                                 }
3124                                 str = eqstr('=').toLowerCase();
3125                                 //              Check the 'standalone' value and use it [#5.1]
3126                                 if (str.equals("yes") == true) {
3127                                     mIsSAlone = true;
3128                                 } else if (str.equals("no") == true) {
3129                                     mIsSAlone = false;
3130                                 } else {
3131                                     panic(FAULT);
3132                                 }
3133                                 mIsSAloneSet = true;
3134                                 st = 4;
3135                             } else {
3136                                 panic(FAULT);
3137                             }
3138                             break;
3139 
3140                         case ' ':
3141                             break;
3142 
3143                         case '?':
3144                             if (st == 1) {
3145                                 panic(FAULT);
3146                             }
3147                             bkch();
3148                             st = 4;
3149                             break;
3150 
3151                         default:
3152                             panic(FAULT);
3153                     }
3154                     break;
3155 
3156                 case 4:     // end of xml declaration
3157                     switch (chtyp(ch)) {
3158                         case '?':
3159                             if (getch() != '>') {
3160                                 panic(FAULT);
3161                             }
3162                             if (mPh <= PH_DOC_START) {
3163                                 mPh = PH_MISC_DTD;  // misc before DTD
3164                             }
3165                             st = -1;
3166                             break;
3167 
3168                         case ' ':
3169                             break;
3170 
3171                         default:
3172                             panic(FAULT);
3173                     }
3174                     break;
3175 
3176                 default:
3177                     panic(FAULT);
3178             }
3179         }
3180         return enc;
3181     }
3182 
3183     /**
3184      * Sets up the document reader.
3185      *
3186      * @param name an encoding name.
3187      * @param is the document byte input stream.
3188      * @return a reader constructed from encoding name and input stream.
3189      * @exception UnsupportedEncodingException
3190      */
3191     private Reader enc(String name, InputStream is)
3192             throws UnsupportedEncodingException {
3193         //              DO NOT CLOSE current reader if any! 
3194         if (name.equals("UTF-8")) {
3195             return new ReaderUTF8(is);
3196         } else if (name.equals("UTF-16LE")) {
3197             return new ReaderUTF16(is, 'l');
3198         } else if (name.equals("UTF-16BE")) {
3199             return new ReaderUTF16(is, 'b');
3200         } else {
3201             return new InputStreamReader(is, name);
3202         }
3203     }
3204 
3205     /**
3206      * Sets up current input on the top of the input stack.
3207      *
3208      * @param inp A new input to set up.
3209      */
3210     protected void push(Input inp) {
3211         mInp.chLen = mChLen;
3212         mInp.chIdx = mChIdx;
3213         inp.next = mInp;
3214         mInp = inp;
3215         mChars = inp.chars;
3216         mChLen = inp.chLen;
3217         mChIdx = inp.chIdx;
3218     }
3219 
3220     /**
3221      * Restores previous input on the top of the input stack.
3222      */
3223     protected void pop() {
3224         if (mInp.src != null) {
3225             try {
3226                 mInp.src.close();
3227             } catch (IOException ioe) {
3228             }
3229             mInp.src = null;
3230         }
3231         mInp = mInp.next;
3232         if (mInp != null) {
3233             mChars = mInp.chars;
3234             mChLen = mInp.chLen;
3235             mChIdx = mInp.chIdx;
3236         } else {
3237             mChars = null;
3238             mChLen = 0;
3239             mChIdx = 0;
3240         }
3241     }
3242 
3243     /**
3244      * Maps a character to it's type.
3245      *
3246      * Possible character type values are:<br /> - ' ' for any kind of white
3247      * space character;<br /> - 'a' for any lower case alphabetical character
3248      * value;<br /> - 'A' for any upper case alphabetical character value;<br />
3249      * - 'd' for any decimal digit character value;<br /> - 'z' for any
3250      * character less then ' ' except '\t', '\n', '\r';<br /> - 'X' for any not
3251      * ASCII character;<br /> - 'Z' for EOS character.<br /> An ASCII (7 bit)
3252      * character which does not fall in any category listed above is mapped to
3253      * it self.
3254      *
3255      * @param ch The character to map.
3256      * @return The type of character.
3257      */
3258     protected char chtyp(char ch) {
3259         if (ch < 0x80) {
3260             return (char) asctyp[ch];
3261         }
3262         return (ch != EOS) ? 'X' : 'Z';
3263     }
3264 
3265     /**
3266      * Retrives the next character in the document.
3267      *
3268      * @return The next character in the document.
3269      */
3270     protected char getch()
3271             throws IOException {
3272         if (mChIdx >= mChLen) {
3273             if (mInp.src == null) {
3274                 pop();  // remove internal entity
3275                 return getch();
3276             }
3277             //          Read new portion of the document characters
3278             int Num = mInp.src.read(mChars, 0, mChars.length);
3279             if (Num < 0) {
3280                 if (mInp != mDoc) {
3281                     pop();  // restore the previous input
3282                     return getch();
3283                 } else {
3284                     mChars[0] = EOS;
3285                     mChLen = 1;
3286                 }
3287             } else {
3288                 mChLen = Num;
3289             }
3290             mChIdx = 0;
3291         }
3292         return mChars[mChIdx++];
3293     }
3294 
3295     /**
3296      * Puts back the last read character.
3297      *
3298      * This method <strong>MUST NOT</strong> be called more then once after each
3299      * call of {@link #getch getch} method.
3300      */
3301     protected void bkch()
3302             throws Exception {
3303         if (mChIdx <= 0) {
3304             panic(FAULT);
3305         }
3306         mChIdx--;
3307     }
3308 
3309     /**
3310      * Sets the current character.
3311      *
3312      * @param ch The character to set.
3313      */
3314     protected void setch(char ch) {
3315         mChars[mChIdx] = ch;
3316     }
3317 
3318     /**
3319      * Finds a pair in the pair chain by a qualified name.
3320      *
3321      * @param chain The first element of the chain of pairs.
3322      * @param qname The qualified name.
3323      * @return A pair with the specified qualified name or null.
3324      */
3325     protected Pair find(Pair chain, char[] qname) {
3326         for (Pair pair = chain; pair != null; pair = pair.next) {
3327             if (pair.eqname(qname) == true) {
3328                 return pair;
3329             }
3330         }
3331         return null;
3332     }
3333 
3334     /**
3335      * Provedes an instance of a pair.
3336      *
3337      * @param next The reference to a next pair.
3338      * @return An instance of a pair.
3339      */
3340     protected Pair pair(Pair next) {
3341         Pair pair;
3342 
3343         if (mDltd != null) {
3344             pair = mDltd;
3345             mDltd = pair.next;
3346         } else {
3347             pair = new Pair();
3348         }
3349         pair.next = next;
3350 
3351         return pair;
3352     }
3353 
3354     /**
3355      * Deletes an instance of a pair.
3356      *
3357      * @param pair The pair to delete.
3358      * @return A reference to the next pair in a chain.
3359      */
3360     protected Pair del(Pair pair) {
3361         Pair next = pair.next;
3362 
3363         pair.name = null;
3364         pair.value = null;
3365         pair.chars = null;
3366         pair.list = null;
3367         pair.next = mDltd;
3368         mDltd = pair;
3369 
3370         return next;
3371     }
3372 }