src/share/classes/javax/swing/text/html/parser/Parser.java

Print this page




 835                 return;
 836             }
 837         }
 838     }
 839 
 840     /**
 841      * Parse identifier. Uppercase characters are folded
 842      * to lowercase when lower is true. Returns falsed if
 843      * no identifier is found. [55] 346:17
 844      */
 845     boolean parseIdentifier(boolean lower) throws IOException {
 846         switch (ch) {
 847           case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 848           case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 849           case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 850           case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 851           case 'Y': case 'Z':
 852             if (lower) {
 853                 ch = 'a' + (ch - 'A');
 854             }

 855 
 856           case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 857           case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 858           case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 859           case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 860           case 'y': case 'z':
 861             break;
 862 
 863           default:
 864             return false;
 865         }
 866 
 867         while (true) {
 868             addString(ch);
 869 
 870             switch (ch = readCh()) {
 871               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 872               case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 873               case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 874               case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 875               case 'Y': case 'Z':
 876                 if (lower) {
 877                     ch = 'a' + (ch - 'A');
 878                 }

 879 
 880               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 881               case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 882               case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 883               case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 884               case 'y': case 'z':
 885 
 886               case '0': case '1': case '2': case '3': case '4':
 887               case '5': case '6': case '7': case '8': case '9':
 888 
 889               case '.': case '-':
 890 
 891               case '_': // not officially allowed
 892                 break;
 893 
 894               default:
 895                 return true;
 896             }
 897         }
 898     }


1197                 c = '\n';
1198                 break;
1199               default:
1200                 ch = readCh();
1201                 break;
1202             }
1203 
1204             // output character
1205             if (textpos == text.length) {
1206                 char newtext[] = new char[text.length + 128];
1207                 System.arraycopy(text, 0, newtext, 0, text.length);
1208                 text = newtext;
1209             }
1210             text[textpos++] = (char)c;
1211         }
1212     }
1213 
1214     /**
1215      * Parse attribute value. [33] 331:1
1216      */

1217     String parseAttributeValue(boolean lower) throws IOException {
1218         int delim = -1;
1219 
1220         // Check for a delimiter
1221         switch(ch) {
1222           case '\'':
1223           case '"':
1224             delim = ch;
1225             ch = readCh();
1226             break;
1227         }
1228 
1229         // Parse the rest of the value
1230         while (true) {
1231             int c = ch;
1232 
1233             switch (c) {
1234               case '\n':
1235                 ln++;
1236                 ch = readCh();


1241                 break;
1242 
1243               case '\r':
1244                 ln++;
1245 
1246                 if ((ch = readCh()) == '\n') {
1247                     ch = readCh();
1248                     crlfCount++;
1249                 }
1250                 else {
1251                     crCount++;
1252                 }
1253                 if (delim < 0) {
1254                     return getString(0);
1255                 }
1256                 break;
1257 
1258               case '\t':
1259                   if (delim < 0)
1260                       c = ' ';

1261               case ' ':
1262                 ch = readCh();
1263                 if (delim < 0) {
1264                     return getString(0);
1265                 }
1266                 break;
1267 
1268               case '>':
1269               case '<':
1270                 if (delim < 0) {
1271                     return getString(0);
1272                 }
1273                 ch = readCh();
1274                 break;
1275 
1276               case '\'':
1277               case '"':
1278                 ch = readCh();
1279                 if (c == delim) {
1280                     return getString(0);


1542         // ignore all data upto the close bracket '>'
1543         while (true) {
1544             skipSpace();
1545             switch (ch) {
1546               case '>':
1547               case -1:
1548                   ch = readCh();
1549                 return;
1550               case '<':
1551                   return;
1552               default:
1553                   ch = readCh();
1554 
1555             }
1556         }
1557     }
1558 
1559     /**
1560      * Parse a start or end tag.
1561      */

1562     void parseTag() throws IOException {
1563         Element elem;
1564         boolean net = false;
1565         boolean warned = false;
1566         boolean unknown = false;
1567 
1568         switch (ch = readCh()) {
1569           case '!':
1570             switch (ch = readCh()) {
1571               case '-':
1572                 // Parse comment. [92] 391:7
1573                 while (true) {
1574                     if (ch == '-') {
1575                         if (!strict || ((ch = readCh()) == '-')) {
1576                             ch = readCh();
1577                             if (!strict && ch == '-') {
1578                                 ch = readCh();
1579                             }
1580                             // send over any text you might see
1581                             // before parsing and sending the


1585                                 System.arraycopy(text, 0, newtext, 0, textpos);
1586                                 handleText(newtext);
1587                                 lastBlockStartPos = currentBlockStartPos;
1588                                 textpos = 0;
1589                             }
1590                             parseComment();
1591                             last = makeTag(dtd.getElement("comment"), true);
1592                             handleComment(getChars(0));
1593                             continue;
1594                         } else if (!warned) {
1595                             warned = true;
1596                             error("invalid.commentchar", "-");
1597                         }
1598                     }
1599                     skipSpace();
1600                     switch (ch) {
1601                       case '-':
1602                         continue;
1603                       case '>':
1604                         ch = readCh();

1605                       case -1:
1606                         return;
1607                       default:
1608                         ch = readCh();
1609                         if (!warned) {
1610                             warned = true;
1611                             error("invalid.commentchar",
1612                                   String.valueOf((char)ch));
1613                         }
1614                         break;
1615                     }
1616                 }
1617 
1618               default:
1619                 // deal with marked sections
1620                 StringBuffer strBuff = new StringBuffer();
1621                 while (true) {
1622                     strBuff.append((char)ch);
1623                     if (parseMarkupDeclarations(strBuff)) {
1624                         return;
1625                     }
1626                     switch(ch) {
1627                       case '>':
1628                         ch = readCh();

1629                       case -1:
1630                         error("invalid.markup");
1631                         return;
1632                       case '\n':
1633                         ln++;
1634                         ch = readCh();
1635                         lfCount++;
1636                         break;
1637                       case '\r':
1638                         ln++;
1639                         if ((ch = readCh()) == '\n') {
1640                             ch = readCh();
1641                             crlfCount++;
1642                         }
1643                         else {
1644                             crCount++;
1645                         }
1646                         break;
1647 
1648                       default:
1649                         ch = readCh();
1650                         break;
1651                     }
1652                 }
1653             }
1654 
1655           case '/':
1656             // parse end tag [19] 317:4
1657             switch (ch = readCh()) {
1658               case '>':
1659                 ch = readCh();

1660               case '<':
1661                 // empty end tag. either </> or </<
1662                 if (recent == null) {
1663                     error("invalid.shortend");
1664                     return;
1665                 }
1666                 elem = recent;
1667                 break;
1668 
1669               default:
1670                 if (!parseIdentifier(true)) {
1671                     error("expected.endtagname");
1672                     return;
1673                 }
1674                 skipSpace();
1675                 switch (ch) {
1676                   case '>':
1677                     ch = readCh();

1678                   case '<':
1679                     break;
1680 
1681                   default:
1682                     error("expected", "'>'");
1683                     while ((ch != -1) && (ch != '\n') && (ch != '>')) {
1684                         ch = readCh();
1685                     }
1686                     if (ch == '>') {
1687                         ch = readCh();
1688                     }
1689                     break;
1690                 }
1691                 String elemStr = getString(0);
1692                 if (!dtd.elementExists(elemStr)) {
1693                     error("end.unrecognized", elemStr);
1694                     // Ignore RE before end tag
1695                     if ((textpos > 0) && (text[textpos-1] == '\n')) {
1696                         textpos--;
1697                     }


1858 
1859             /* determine if this element is part of the dtd. */
1860 
1861             if (!dtd.elementExists(elemStr)) {
1862                 //              parseInvalidTag();
1863                 error("tag.unrecognized ", elemStr);
1864                 elem = dtd.getElement("unknown");
1865                 elem.name = elemStr;
1866                 unknown = true;
1867             } else {
1868                 elem = dtd.getElement(elemStr);
1869             }
1870         }
1871 
1872         // Parse attributes
1873         parseAttributeSpecificationList(elem);
1874 
1875         switch (ch) {
1876           case '/':
1877             net = true;

1878           case '>':
1879             ch = readCh();
1880             if (ch == '>' && net) {
1881                 ch = readCh();
1882             }
1883           case '<':
1884             break;
1885 
1886           default:
1887             error("expected", "'>'");
1888             break;
1889         }
1890 
1891         if (!strict) {
1892           if (elem.getName().equals("script")) {
1893             error("javascript.unsupported");
1894           }
1895         }
1896 
1897         // ignore RE after start tag




 835                 return;
 836             }
 837         }
 838     }
 839 
 840     /**
 841      * Parse identifier. Uppercase characters are folded
 842      * to lowercase when lower is true. Returns falsed if
 843      * no identifier is found. [55] 346:17
 844      */
 845     boolean parseIdentifier(boolean lower) throws IOException {
 846         switch (ch) {
 847           case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 848           case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 849           case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 850           case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 851           case 'Y': case 'Z':
 852             if (lower) {
 853                 ch = 'a' + (ch - 'A');
 854             }
 855             break;
 856 
 857           case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 858           case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 859           case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 860           case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 861           case 'y': case 'z':
 862             break;
 863 
 864           default:
 865             return false;
 866         }
 867 
 868         while (true) {
 869             addString(ch);
 870 
 871             switch (ch = readCh()) {
 872               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 873               case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 874               case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 875               case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 876               case 'Y': case 'Z':
 877                 if (lower) {
 878                     ch = 'a' + (ch - 'A');
 879                 }
 880                 break;
 881 
 882               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 883               case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 884               case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 885               case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 886               case 'y': case 'z':
 887 
 888               case '0': case '1': case '2': case '3': case '4':
 889               case '5': case '6': case '7': case '8': case '9':
 890 
 891               case '.': case '-':
 892 
 893               case '_': // not officially allowed
 894                 break;
 895 
 896               default:
 897                 return true;
 898             }
 899         }
 900     }


1199                 c = '\n';
1200                 break;
1201               default:
1202                 ch = readCh();
1203                 break;
1204             }
1205 
1206             // output character
1207             if (textpos == text.length) {
1208                 char newtext[] = new char[text.length + 128];
1209                 System.arraycopy(text, 0, newtext, 0, text.length);
1210                 text = newtext;
1211             }
1212             text[textpos++] = (char)c;
1213         }
1214     }
1215 
1216     /**
1217      * Parse attribute value. [33] 331:1
1218      */
1219     @SuppressWarnings("fallthrough")
1220     String parseAttributeValue(boolean lower) throws IOException {
1221         int delim = -1;
1222 
1223         // Check for a delimiter
1224         switch(ch) {
1225           case '\'':
1226           case '"':
1227             delim = ch;
1228             ch = readCh();
1229             break;
1230         }
1231 
1232         // Parse the rest of the value
1233         while (true) {
1234             int c = ch;
1235 
1236             switch (c) {
1237               case '\n':
1238                 ln++;
1239                 ch = readCh();


1244                 break;
1245 
1246               case '\r':
1247                 ln++;
1248 
1249                 if ((ch = readCh()) == '\n') {
1250                     ch = readCh();
1251                     crlfCount++;
1252                 }
1253                 else {
1254                     crCount++;
1255                 }
1256                 if (delim < 0) {
1257                     return getString(0);
1258                 }
1259                 break;
1260 
1261               case '\t':
1262                   if (delim < 0)
1263                       c = ' ';
1264                   // Fall through
1265               case ' ':
1266                 ch = readCh();
1267                 if (delim < 0) {
1268                     return getString(0);
1269                 }
1270                 break;
1271 
1272               case '>':
1273               case '<':
1274                 if (delim < 0) {
1275                     return getString(0);
1276                 }
1277                 ch = readCh();
1278                 break;
1279 
1280               case '\'':
1281               case '"':
1282                 ch = readCh();
1283                 if (c == delim) {
1284                     return getString(0);


1546         // ignore all data upto the close bracket '>'
1547         while (true) {
1548             skipSpace();
1549             switch (ch) {
1550               case '>':
1551               case -1:
1552                   ch = readCh();
1553                 return;
1554               case '<':
1555                   return;
1556               default:
1557                   ch = readCh();
1558 
1559             }
1560         }
1561     }
1562 
1563     /**
1564      * Parse a start or end tag.
1565      */
1566     @SuppressWarnings("fallthrough")
1567     void parseTag() throws IOException {
1568         Element elem;
1569         boolean net = false;
1570         boolean warned = false;
1571         boolean unknown = false;
1572 
1573         switch (ch = readCh()) {
1574           case '!':
1575             switch (ch = readCh()) {
1576               case '-':
1577                 // Parse comment. [92] 391:7
1578                 while (true) {
1579                     if (ch == '-') {
1580                         if (!strict || ((ch = readCh()) == '-')) {
1581                             ch = readCh();
1582                             if (!strict && ch == '-') {
1583                                 ch = readCh();
1584                             }
1585                             // send over any text you might see
1586                             // before parsing and sending the


1590                                 System.arraycopy(text, 0, newtext, 0, textpos);
1591                                 handleText(newtext);
1592                                 lastBlockStartPos = currentBlockStartPos;
1593                                 textpos = 0;
1594                             }
1595                             parseComment();
1596                             last = makeTag(dtd.getElement("comment"), true);
1597                             handleComment(getChars(0));
1598                             continue;
1599                         } else if (!warned) {
1600                             warned = true;
1601                             error("invalid.commentchar", "-");
1602                         }
1603                     }
1604                     skipSpace();
1605                     switch (ch) {
1606                       case '-':
1607                         continue;
1608                       case '>':
1609                         ch = readCh();
1610                         return;
1611                       case -1:
1612                         return;
1613                       default:
1614                         ch = readCh();
1615                         if (!warned) {
1616                             warned = true;
1617                             error("invalid.commentchar",
1618                                   String.valueOf((char)ch));
1619                         }
1620                         break;
1621                     }
1622                 }
1623 
1624               default:
1625                 // deal with marked sections
1626                 StringBuffer strBuff = new StringBuffer();
1627                 while (true) {
1628                     strBuff.append((char)ch);
1629                     if (parseMarkupDeclarations(strBuff)) {
1630                         return;
1631                     }
1632                     switch(ch) {
1633                       case '>':
1634                         ch = readCh();
1635                         // Fall through
1636                       case -1:
1637                         error("invalid.markup");
1638                         return;
1639                       case '\n':
1640                         ln++;
1641                         ch = readCh();
1642                         lfCount++;
1643                         break;
1644                       case '\r':
1645                         ln++;
1646                         if ((ch = readCh()) == '\n') {
1647                             ch = readCh();
1648                             crlfCount++;
1649                         }
1650                         else {
1651                             crCount++;
1652                         }
1653                         break;
1654 
1655                       default:
1656                         ch = readCh();
1657                         break;
1658                     }
1659                 }
1660             }
1661 
1662           case '/':
1663             // parse end tag [19] 317:4
1664             switch (ch = readCh()) {
1665               case '>':
1666                 ch = readCh();
1667                 // Fall through
1668               case '<':
1669                 // empty end tag. either </> or </<
1670                 if (recent == null) {
1671                     error("invalid.shortend");
1672                     return;
1673                 }
1674                 elem = recent;
1675                 break;
1676 
1677               default:
1678                 if (!parseIdentifier(true)) {
1679                     error("expected.endtagname");
1680                     return;
1681                 }
1682                 skipSpace();
1683                 switch (ch) {
1684                   case '>':
1685                     ch = readCh();
1686                     break;
1687                   case '<':
1688                     break;
1689 
1690                   default:
1691                     error("expected", "'>'");
1692                     while ((ch != -1) && (ch != '\n') && (ch != '>')) {
1693                         ch = readCh();
1694                     }
1695                     if (ch == '>') {
1696                         ch = readCh();
1697                     }
1698                     break;
1699                 }
1700                 String elemStr = getString(0);
1701                 if (!dtd.elementExists(elemStr)) {
1702                     error("end.unrecognized", elemStr);
1703                     // Ignore RE before end tag
1704                     if ((textpos > 0) && (text[textpos-1] == '\n')) {
1705                         textpos--;
1706                     }


1867 
1868             /* determine if this element is part of the dtd. */
1869 
1870             if (!dtd.elementExists(elemStr)) {
1871                 //              parseInvalidTag();
1872                 error("tag.unrecognized ", elemStr);
1873                 elem = dtd.getElement("unknown");
1874                 elem.name = elemStr;
1875                 unknown = true;
1876             } else {
1877                 elem = dtd.getElement(elemStr);
1878             }
1879         }
1880 
1881         // Parse attributes
1882         parseAttributeSpecificationList(elem);
1883 
1884         switch (ch) {
1885           case '/':
1886             net = true;
1887             // Fall through
1888           case '>':
1889             ch = readCh();
1890             if (ch == '>' && net) {
1891                 ch = readCh();
1892             }
1893           case '<':
1894             break;
1895 
1896           default:
1897             error("expected", "'>'");
1898             break;
1899         }
1900 
1901         if (!strict) {
1902           if (elem.getName().equals("script")) {
1903             error("javascript.unsupported");
1904           }
1905         }
1906 
1907         // ignore RE after start tag