< prev index next >

src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java

Print this page




 103     // 'T' - "NMTOKENS"
 104     // 'u' - enumeration type
 105     // 'o' - "NOTATION"
 106     // 'c' - "CDATA"
 107     // see also: bkeyword() and atype()
 108     //
 109     protected Pair mAttL;       // list of defined attrs by element name
 110     protected Input mDoc;        // document entity
 111     protected Input mInp;        // stack of entities
 112     private char[] mChars;      // reading buffer
 113     private int mChLen;      // current capacity
 114     private int mChIdx;      // index to the next char
 115     protected Attrs mAttrs;      // attributes of the curr. element
 116     private String[] mItems;      // attributes array of the curr. element
 117     private char mAttrIdx;    // attributes counter/index
 118     private String mUnent;  // unresolved entity name
 119     private Pair mDltd;   // deleted objects for reuse
 120     /**
 121      * Default prefixes
 122      */
 123     private static final char NONS[];
 124     private static final char XML[];
 125     private static final char XMLNS[];
 126 
 127     static {
 128         NONS = new char[1];
 129         NONS[0] = (char) 0;
 130 
 131         XML = new char[4];
 132         XML[0] = (char) 4;
 133         XML[1] = 'x';
 134         XML[2] = 'm';
 135         XML[3] = 'l';
 136 
 137         XMLNS = new char[6];
 138         XMLNS[0] = (char) 6;
 139         XMLNS[1] = 'x';
 140         XMLNS[2] = 'm';
 141         XMLNS[3] = 'l';
 142         XMLNS[4] = 'n';
 143         XMLNS[5] = 's';
 144     }
 145     /**
 146      * ASCII character type array.
 147      *
 148      * This array maps an ASCII (7 bit) character to the character type.<br>
 149      * Possible character type values are:<br> - ' ' for any kind of white
 150      * space character;<br> - 'a' for any lower case alphabetical character
 151      * value;<br> - 'A' for any upper case alphabetical character value;<br>
 152      * - 'd' for any decimal digit character value;<br> - 'z' for any
 153      * character less than ' ' except '\t', '\n', '\r';<br> An ASCII (7 bit)
 154      * character which does not fall in any category listed above is mapped to
 155      * it self.
 156      */
 157     private static final byte asctyp[];
 158     /**
 159      * NMTOKEN character type array.
 160      *
 161      * This array maps an ASCII (7 bit) character to the character type.<br>
 162      * Possible character type values are:<br> - 0 for underscore ('_') or any
 163      * lower and upper case alphabetical character value;<br> - 1 for colon
 164      * (':') character;<br> - 2 for dash ('-') and dot ('.') or any decimal
 165      * digit character value;<br> - 3 for any kind of white space character<br>
 166      * An ASCII (7 bit) character which does not fall in any category listed
 167      * above is mapped to 0xff.
 168      */
 169     private static final byte nmttyp[];
 170 
 171     /**
 172      * Static constructor.
 173      *
 174      * Sets up the ASCII character type array which is used by
 175      * {@link #asctyp asctyp} method and NMTOKEN character type array.
 176      */
 177     static {
 178         short i = 0;
 179 
 180         asctyp = new byte[0x80];
 181         while (i < ' ') {
 182             asctyp[i++] = (byte) 'z';
 183         }
 184         asctyp['\t'] = (byte) ' ';
 185         asctyp['\r'] = (byte) ' ';
 186         asctyp['\n'] = (byte) ' ';
 187         while (i < '0') {
 188             asctyp[i] = (byte) i++;
 189         }


 935                     return;
 936 
 937                 case EOS:
 938                     panic(FAULT);
 939 
 940                 default:
 941                     break;
 942             }
 943         }
 944     }
 945 
 946     /**
 947      * Parses an attribute list declaration.
 948      *
 949      * This method parses the declaration up to the closing angle bracket.
 950      *
 951      * @exception Exception is parser specific exception form panic method.
 952      * @exception IOException
 953      */
 954     private void dtdattl() throws Exception {
 955         char elmqn[] = null;
 956         Pair elm = null;
 957         char ch;
 958         for (short st = 0; st >= 0;) {
 959             ch = getch();
 960             switch (st) {
 961                 case 0:     // read the element name
 962                     switch (chtyp(ch)) {
 963                         case 'a':
 964                         case 'A':
 965                         case '_':
 966                         case 'X':
 967                         case ':':
 968                             bkch();
 969                             //          Get the element from the list or add a new one.
 970                             elmqn = qname(mIsNSAware);
 971                             elm = find(mAttL, elmqn);
 972                             if (elm == null) {
 973                                 elm = pair(mAttL);
 974                                 elm.chars = elmqn;
 975                                 mAttL = elm;


1021                     panic(FAULT);
1022                     break;
1023             }
1024         }
1025     }
1026 
1027     /**
1028      * Parses an attribute declaration.
1029      *
1030      * The attribute uses the following fields of Pair object: chars - characters
1031      * of qualified name id - the type identifier of the attribute list - a pair
1032      * which holds the default value (chars field)
1033      *
1034      * @param elm An object which represents all defined attributes on an
1035      * element.
1036      * @exception Exception is parser specific exception form panic method.
1037      * @exception IOException
1038      */
1039     @SuppressWarnings("fallthrough")
1040     private void dtdatt(Pair elm) throws Exception {
1041         char attqn[] = null;
1042         Pair att = null;
1043         char ch;
1044         for (short st = 0; st >= 0;) {
1045             ch = getch();
1046             switch (st) {
1047                 case 0:     // the attribute name
1048                     switch (chtyp(ch)) {
1049                         case 'a':
1050                         case 'A':
1051                         case '_':
1052                         case 'X':
1053                         case ':':
1054                             bkch();
1055                             //          Get the attribute from the list or add a new one.
1056                             attqn = qname(mIsNSAware);
1057                             att = find(elm.list, attqn);
1058                             if (att == null) {
1059                                 //              New attribute declaration
1060                                 att = pair(elm.list);
1061                                 att.chars = attqn;


1757     }
1758 
1759     /**
1760      * Reads a qualified xml name.
1761      *
1762      * The characters of a qualified name is an array of characters. The first
1763      * (chars[0]) character is the index of the colon character which separates
1764      * the prefix from the local name. If the index is zero, the name does not
1765      * contain separator or the parser works in the namespace unaware mode. The
1766      * length of qualified name is the length of the array minus one.
1767      *
1768      * @param ns The true value turns namespace conformance on.
1769      * @return The characters of a qualified name.
1770      * @exception Exception When incorrect character appear in the name.
1771      * @exception IOException
1772      */
1773     protected char[] qname(boolean ns)
1774             throws Exception {
1775         mBuffIdx = -1;
1776         bname(ns);
1777         char chars[] = new char[mBuffIdx + 1];
1778         System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
1779         return chars;
1780     }
1781 
1782     /**
1783      * Reads the public or/and system identifiers.
1784      *
1785      * @param inp The input object.
1786      * @exception Exception is parser specific exception form panic method.
1787      * @exception IOException
1788      */
1789     private void pubsys(Input inp)
1790             throws Exception {
1791         Pair pair = pubsys(' ');
1792         inp.pubid = pair.name;
1793         inp.sysid = pair.value;
1794         del(pair);
1795     }
1796 
1797     /**


2693         }
2694         mBuffIdx++;
2695         if (mBuffIdx < mBuff.length) {
2696             mBuff[mBuffIdx] = ch;
2697         } else {
2698             mBuffIdx--;
2699             bappend(ch);
2700         }
2701     }
2702 
2703     /**
2704      * Appends a character to parser's buffer.
2705      *
2706      * @param ch The character to append to the buffer.
2707      */
2708     private void bappend(char ch) {
2709         try {
2710             mBuff[++mBuffIdx] = ch;
2711         } catch (Exception exp) {
2712             //          Double the buffer size
2713             char buff[] = new char[mBuff.length << 1];
2714             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2715             mBuff = buff;
2716             mBuff[mBuffIdx] = ch;
2717         }
2718     }
2719 
2720     /**
2721      * Appends (mChIdx - cidx) characters from character buffer (mChars) to
2722      * parser's buffer (mBuff).
2723      *
2724      * @param cidx The character buffer (mChars) start index.
2725      * @param bidx The parser buffer (mBuff) start index.
2726      */
2727     private void bcopy(int cidx, int bidx) {
2728         int length = mChIdx - cidx;
2729         if ((bidx + length + 1) >= mBuff.length) {
2730             //          Expand the buffer
2731             char buff[] = new char[mBuff.length + length];
2732             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2733             mBuff = buff;
2734         }
2735         System.arraycopy(mChars, cidx, mBuff, bidx, length);
2736         mBuffIdx += length;
2737     }
2738 
2739     /**
2740      * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
2741      * <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe
2742      * 0x100 is a built-in entity replacement character.
2743      *
2744      * @param ch the next character of an entity name.
2745      */
2746     @SuppressWarnings("fallthrough")
2747     private void eappend(char ch) {
2748         switch (mESt) {
2749             case 0x100:  // "l" or "g" or "a" or "q"
2750                 switch (ch) {
2751                     case 'l':




 103     // 'T' - "NMTOKENS"
 104     // 'u' - enumeration type
 105     // 'o' - "NOTATION"
 106     // 'c' - "CDATA"
 107     // see also: bkeyword() and atype()
 108     //
 109     protected Pair mAttL;       // list of defined attrs by element name
 110     protected Input mDoc;        // document entity
 111     protected Input mInp;        // stack of entities
 112     private char[] mChars;      // reading buffer
 113     private int mChLen;      // current capacity
 114     private int mChIdx;      // index to the next char
 115     protected Attrs mAttrs;      // attributes of the curr. element
 116     private String[] mItems;      // attributes array of the curr. element
 117     private char mAttrIdx;    // attributes counter/index
 118     private String mUnent;  // unresolved entity name
 119     private Pair mDltd;   // deleted objects for reuse
 120     /**
 121      * Default prefixes
 122      */
 123     private static final char[] NONS;
 124     private static final char[] XML;
 125     private static final char[] XMLNS;
 126 
 127     static {
 128         NONS = new char[1];
 129         NONS[0] = (char) 0;
 130 
 131         XML = new char[4];
 132         XML[0] = (char) 4;
 133         XML[1] = 'x';
 134         XML[2] = 'm';
 135         XML[3] = 'l';
 136 
 137         XMLNS = new char[6];
 138         XMLNS[0] = (char) 6;
 139         XMLNS[1] = 'x';
 140         XMLNS[2] = 'm';
 141         XMLNS[3] = 'l';
 142         XMLNS[4] = 'n';
 143         XMLNS[5] = 's';
 144     }
 145     /**
 146      * ASCII character type array.
 147      *
 148      * This array maps an ASCII (7 bit) character to the character type.<br>
 149      * Possible character type values are:<br> - ' ' for any kind of white
 150      * space character;<br> - 'a' for any lower case alphabetical character
 151      * value;<br> - 'A' for any upper case alphabetical character value;<br>
 152      * - 'd' for any decimal digit character value;<br> - 'z' for any
 153      * character less than ' ' except '\t', '\n', '\r';<br> An ASCII (7 bit)
 154      * character which does not fall in any category listed above is mapped to
 155      * it self.
 156      */
 157     private static final byte[] asctyp;
 158     /**
 159      * NMTOKEN character type array.
 160      *
 161      * This array maps an ASCII (7 bit) character to the character type.<br>
 162      * Possible character type values are:<br> - 0 for underscore ('_') or any
 163      * lower and upper case alphabetical character value;<br> - 1 for colon
 164      * (':') character;<br> - 2 for dash ('-') and dot ('.') or any decimal
 165      * digit character value;<br> - 3 for any kind of white space character<br>
 166      * An ASCII (7 bit) character which does not fall in any category listed
 167      * above is mapped to 0xff.
 168      */
 169     private static final byte[] nmttyp;
 170 
 171     /**
 172      * Static constructor.
 173      *
 174      * Sets up the ASCII character type array which is used by
 175      * {@link #asctyp asctyp} method and NMTOKEN character type array.
 176      */
 177     static {
 178         short i = 0;
 179 
 180         asctyp = new byte[0x80];
 181         while (i < ' ') {
 182             asctyp[i++] = (byte) 'z';
 183         }
 184         asctyp['\t'] = (byte) ' ';
 185         asctyp['\r'] = (byte) ' ';
 186         asctyp['\n'] = (byte) ' ';
 187         while (i < '0') {
 188             asctyp[i] = (byte) i++;
 189         }


 935                     return;
 936 
 937                 case EOS:
 938                     panic(FAULT);
 939 
 940                 default:
 941                     break;
 942             }
 943         }
 944     }
 945 
 946     /**
 947      * Parses an attribute list declaration.
 948      *
 949      * This method parses the declaration up to the closing angle bracket.
 950      *
 951      * @exception Exception is parser specific exception form panic method.
 952      * @exception IOException
 953      */
 954     private void dtdattl() throws Exception {
 955         char[] elmqn = null;
 956         Pair elm = null;
 957         char ch;
 958         for (short st = 0; st >= 0;) {
 959             ch = getch();
 960             switch (st) {
 961                 case 0:     // read the element name
 962                     switch (chtyp(ch)) {
 963                         case 'a':
 964                         case 'A':
 965                         case '_':
 966                         case 'X':
 967                         case ':':
 968                             bkch();
 969                             //          Get the element from the list or add a new one.
 970                             elmqn = qname(mIsNSAware);
 971                             elm = find(mAttL, elmqn);
 972                             if (elm == null) {
 973                                 elm = pair(mAttL);
 974                                 elm.chars = elmqn;
 975                                 mAttL = elm;


1021                     panic(FAULT);
1022                     break;
1023             }
1024         }
1025     }
1026 
1027     /**
1028      * Parses an attribute declaration.
1029      *
1030      * The attribute uses the following fields of Pair object: chars - characters
1031      * of qualified name id - the type identifier of the attribute list - a pair
1032      * which holds the default value (chars field)
1033      *
1034      * @param elm An object which represents all defined attributes on an
1035      * element.
1036      * @exception Exception is parser specific exception form panic method.
1037      * @exception IOException
1038      */
1039     @SuppressWarnings("fallthrough")
1040     private void dtdatt(Pair elm) throws Exception {
1041         char[] attqn = null;
1042         Pair att = null;
1043         char ch;
1044         for (short st = 0; st >= 0;) {
1045             ch = getch();
1046             switch (st) {
1047                 case 0:     // the attribute name
1048                     switch (chtyp(ch)) {
1049                         case 'a':
1050                         case 'A':
1051                         case '_':
1052                         case 'X':
1053                         case ':':
1054                             bkch();
1055                             //          Get the attribute from the list or add a new one.
1056                             attqn = qname(mIsNSAware);
1057                             att = find(elm.list, attqn);
1058                             if (att == null) {
1059                                 //              New attribute declaration
1060                                 att = pair(elm.list);
1061                                 att.chars = attqn;


1757     }
1758 
1759     /**
1760      * Reads a qualified xml name.
1761      *
1762      * The characters of a qualified name is an array of characters. The first
1763      * (chars[0]) character is the index of the colon character which separates
1764      * the prefix from the local name. If the index is zero, the name does not
1765      * contain separator or the parser works in the namespace unaware mode. The
1766      * length of qualified name is the length of the array minus one.
1767      *
1768      * @param ns The true value turns namespace conformance on.
1769      * @return The characters of a qualified name.
1770      * @exception Exception When incorrect character appear in the name.
1771      * @exception IOException
1772      */
1773     protected char[] qname(boolean ns)
1774             throws Exception {
1775         mBuffIdx = -1;
1776         bname(ns);
1777         char[] chars = new char[mBuffIdx + 1];
1778         System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
1779         return chars;
1780     }
1781 
1782     /**
1783      * Reads the public or/and system identifiers.
1784      *
1785      * @param inp The input object.
1786      * @exception Exception is parser specific exception form panic method.
1787      * @exception IOException
1788      */
1789     private void pubsys(Input inp)
1790             throws Exception {
1791         Pair pair = pubsys(' ');
1792         inp.pubid = pair.name;
1793         inp.sysid = pair.value;
1794         del(pair);
1795     }
1796 
1797     /**


2693         }
2694         mBuffIdx++;
2695         if (mBuffIdx < mBuff.length) {
2696             mBuff[mBuffIdx] = ch;
2697         } else {
2698             mBuffIdx--;
2699             bappend(ch);
2700         }
2701     }
2702 
2703     /**
2704      * Appends a character to parser's buffer.
2705      *
2706      * @param ch The character to append to the buffer.
2707      */
2708     private void bappend(char ch) {
2709         try {
2710             mBuff[++mBuffIdx] = ch;
2711         } catch (Exception exp) {
2712             //          Double the buffer size
2713             char[] buff = new char[mBuff.length << 1];
2714             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2715             mBuff = buff;
2716             mBuff[mBuffIdx] = ch;
2717         }
2718     }
2719 
2720     /**
2721      * Appends (mChIdx - cidx) characters from character buffer (mChars) to
2722      * parser's buffer (mBuff).
2723      *
2724      * @param cidx The character buffer (mChars) start index.
2725      * @param bidx The parser buffer (mBuff) start index.
2726      */
2727     private void bcopy(int cidx, int bidx) {
2728         int length = mChIdx - cidx;
2729         if ((bidx + length + 1) >= mBuff.length) {
2730             //          Expand the buffer
2731             char[] buff = new char[mBuff.length + length];
2732             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2733             mBuff = buff;
2734         }
2735         System.arraycopy(mChars, cidx, mBuff, bidx, length);
2736         mBuffIdx += length;
2737     }
2738 
2739     /**
2740      * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
2741      * <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe
2742      * 0x100 is a built-in entity replacement character.
2743      *
2744      * @param ch the next character of an entity name.
2745      */
2746     @SuppressWarnings("fallthrough")
2747     private void eappend(char ch) {
2748         switch (mESt) {
2749             case 0x100:  // "l" or "g" or "a" or "q"
2750                 switch (ch) {
2751                     case 'l':


< prev index next >