835 return;
836 }
837 }
838 }
839
840 /**
841 * Parse identifier. Uppercase characters are folded
842 * to lowercase when lower is true. Returns falsed if
843 * no identifier is found. [55] 346:17
844 */
845 boolean parseIdentifier(boolean lower) throws IOException {
846 switch (ch) {
847 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
848 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
849 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
850 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
851 case 'Y': case 'Z':
852 if (lower) {
853 ch = 'a' + (ch - 'A');
854 }
855
856 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
857 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
858 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
859 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
860 case 'y': case 'z':
861 break;
862
863 default:
864 return false;
865 }
866
867 while (true) {
868 addString(ch);
869
870 switch (ch = readCh()) {
871 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
872 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
873 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
874 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
875 case 'Y': case 'Z':
876 if (lower) {
877 ch = 'a' + (ch - 'A');
878 }
879
880 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
881 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
882 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
883 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
884 case 'y': case 'z':
885
886 case '0': case '1': case '2': case '3': case '4':
887 case '5': case '6': case '7': case '8': case '9':
888
889 case '.': case '-':
890
891 case '_': // not officially allowed
892 break;
893
894 default:
895 return true;
896 }
897 }
898 }
1197 c = '\n';
1198 break;
1199 default:
1200 ch = readCh();
1201 break;
1202 }
1203
1204 // output character
1205 if (textpos == text.length) {
1206 char newtext[] = new char[text.length + 128];
1207 System.arraycopy(text, 0, newtext, 0, text.length);
1208 text = newtext;
1209 }
1210 text[textpos++] = (char)c;
1211 }
1212 }
1213
1214 /**
1215 * Parse attribute value. [33] 331:1
1216 */
1217 String parseAttributeValue(boolean lower) throws IOException {
1218 int delim = -1;
1219
1220 // Check for a delimiter
1221 switch(ch) {
1222 case '\'':
1223 case '"':
1224 delim = ch;
1225 ch = readCh();
1226 break;
1227 }
1228
1229 // Parse the rest of the value
1230 while (true) {
1231 int c = ch;
1232
1233 switch (c) {
1234 case '\n':
1235 ln++;
1236 ch = readCh();
1241 break;
1242
1243 case '\r':
1244 ln++;
1245
1246 if ((ch = readCh()) == '\n') {
1247 ch = readCh();
1248 crlfCount++;
1249 }
1250 else {
1251 crCount++;
1252 }
1253 if (delim < 0) {
1254 return getString(0);
1255 }
1256 break;
1257
1258 case '\t':
1259 if (delim < 0)
1260 c = ' ';
1261 case ' ':
1262 ch = readCh();
1263 if (delim < 0) {
1264 return getString(0);
1265 }
1266 break;
1267
1268 case '>':
1269 case '<':
1270 if (delim < 0) {
1271 return getString(0);
1272 }
1273 ch = readCh();
1274 break;
1275
1276 case '\'':
1277 case '"':
1278 ch = readCh();
1279 if (c == delim) {
1280 return getString(0);
1542 // ignore all data upto the close bracket '>'
1543 while (true) {
1544 skipSpace();
1545 switch (ch) {
1546 case '>':
1547 case -1:
1548 ch = readCh();
1549 return;
1550 case '<':
1551 return;
1552 default:
1553 ch = readCh();
1554
1555 }
1556 }
1557 }
1558
1559 /**
1560 * Parse a start or end tag.
1561 */
1562 void parseTag() throws IOException {
1563 Element elem;
1564 boolean net = false;
1565 boolean warned = false;
1566 boolean unknown = false;
1567
1568 switch (ch = readCh()) {
1569 case '!':
1570 switch (ch = readCh()) {
1571 case '-':
1572 // Parse comment. [92] 391:7
1573 while (true) {
1574 if (ch == '-') {
1575 if (!strict || ((ch = readCh()) == '-')) {
1576 ch = readCh();
1577 if (!strict && ch == '-') {
1578 ch = readCh();
1579 }
1580 // send over any text you might see
1581 // before parsing and sending the
1585 System.arraycopy(text, 0, newtext, 0, textpos);
1586 handleText(newtext);
1587 lastBlockStartPos = currentBlockStartPos;
1588 textpos = 0;
1589 }
1590 parseComment();
1591 last = makeTag(dtd.getElement("comment"), true);
1592 handleComment(getChars(0));
1593 continue;
1594 } else if (!warned) {
1595 warned = true;
1596 error("invalid.commentchar", "-");
1597 }
1598 }
1599 skipSpace();
1600 switch (ch) {
1601 case '-':
1602 continue;
1603 case '>':
1604 ch = readCh();
1605 case -1:
1606 return;
1607 default:
1608 ch = readCh();
1609 if (!warned) {
1610 warned = true;
1611 error("invalid.commentchar",
1612 String.valueOf((char)ch));
1613 }
1614 break;
1615 }
1616 }
1617
1618 default:
1619 // deal with marked sections
1620 StringBuffer strBuff = new StringBuffer();
1621 while (true) {
1622 strBuff.append((char)ch);
1623 if (parseMarkupDeclarations(strBuff)) {
1624 return;
1625 }
1626 switch(ch) {
1627 case '>':
1628 ch = readCh();
1629 case -1:
1630 error("invalid.markup");
1631 return;
1632 case '\n':
1633 ln++;
1634 ch = readCh();
1635 lfCount++;
1636 break;
1637 case '\r':
1638 ln++;
1639 if ((ch = readCh()) == '\n') {
1640 ch = readCh();
1641 crlfCount++;
1642 }
1643 else {
1644 crCount++;
1645 }
1646 break;
1647
1648 default:
1649 ch = readCh();
1650 break;
1651 }
1652 }
1653 }
1654
1655 case '/':
1656 // parse end tag [19] 317:4
1657 switch (ch = readCh()) {
1658 case '>':
1659 ch = readCh();
1660 case '<':
1661 // empty end tag. either </> or </<
1662 if (recent == null) {
1663 error("invalid.shortend");
1664 return;
1665 }
1666 elem = recent;
1667 break;
1668
1669 default:
1670 if (!parseIdentifier(true)) {
1671 error("expected.endtagname");
1672 return;
1673 }
1674 skipSpace();
1675 switch (ch) {
1676 case '>':
1677 ch = readCh();
1678 case '<':
1679 break;
1680
1681 default:
1682 error("expected", "'>'");
1683 while ((ch != -1) && (ch != '\n') && (ch != '>')) {
1684 ch = readCh();
1685 }
1686 if (ch == '>') {
1687 ch = readCh();
1688 }
1689 break;
1690 }
1691 String elemStr = getString(0);
1692 if (!dtd.elementExists(elemStr)) {
1693 error("end.unrecognized", elemStr);
1694 // Ignore RE before end tag
1695 if ((textpos > 0) && (text[textpos-1] == '\n')) {
1696 textpos--;
1697 }
1858
1859 /* determine if this element is part of the dtd. */
1860
1861 if (!dtd.elementExists(elemStr)) {
1862 // parseInvalidTag();
1863 error("tag.unrecognized ", elemStr);
1864 elem = dtd.getElement("unknown");
1865 elem.name = elemStr;
1866 unknown = true;
1867 } else {
1868 elem = dtd.getElement(elemStr);
1869 }
1870 }
1871
1872 // Parse attributes
1873 parseAttributeSpecificationList(elem);
1874
1875 switch (ch) {
1876 case '/':
1877 net = true;
1878 case '>':
1879 ch = readCh();
1880 if (ch == '>' && net) {
1881 ch = readCh();
1882 }
1883 case '<':
1884 break;
1885
1886 default:
1887 error("expected", "'>'");
1888 break;
1889 }
1890
1891 if (!strict) {
1892 if (elem.getName().equals("script")) {
1893 error("javascript.unsupported");
1894 }
1895 }
1896
1897 // ignore RE after start tag
|
835 return;
836 }
837 }
838 }
839
840 /**
841 * Parse identifier. Uppercase characters are folded
842 * to lowercase when lower is true. Returns falsed if
843 * no identifier is found. [55] 346:17
844 */
845 boolean parseIdentifier(boolean lower) throws IOException {
846 switch (ch) {
847 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
848 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
849 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
850 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
851 case 'Y': case 'Z':
852 if (lower) {
853 ch = 'a' + (ch - 'A');
854 }
855 break;
856
857 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
858 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
859 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
860 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
861 case 'y': case 'z':
862 break;
863
864 default:
865 return false;
866 }
867
868 while (true) {
869 addString(ch);
870
871 switch (ch = readCh()) {
872 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
873 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
874 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
875 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
876 case 'Y': case 'Z':
877 if (lower) {
878 ch = 'a' + (ch - 'A');
879 }
880 break;
881
882 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
883 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
884 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
885 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
886 case 'y': case 'z':
887
888 case '0': case '1': case '2': case '3': case '4':
889 case '5': case '6': case '7': case '8': case '9':
890
891 case '.': case '-':
892
893 case '_': // not officially allowed
894 break;
895
896 default:
897 return true;
898 }
899 }
900 }
1199 c = '\n';
1200 break;
1201 default:
1202 ch = readCh();
1203 break;
1204 }
1205
1206 // output character
1207 if (textpos == text.length) {
1208 char newtext[] = new char[text.length + 128];
1209 System.arraycopy(text, 0, newtext, 0, text.length);
1210 text = newtext;
1211 }
1212 text[textpos++] = (char)c;
1213 }
1214 }
1215
1216 /**
1217 * Parse attribute value. [33] 331:1
1218 */
1219 @SuppressWarnings("fallthrough")
1220 String parseAttributeValue(boolean lower) throws IOException {
1221 int delim = -1;
1222
1223 // Check for a delimiter
1224 switch(ch) {
1225 case '\'':
1226 case '"':
1227 delim = ch;
1228 ch = readCh();
1229 break;
1230 }
1231
1232 // Parse the rest of the value
1233 while (true) {
1234 int c = ch;
1235
1236 switch (c) {
1237 case '\n':
1238 ln++;
1239 ch = readCh();
1244 break;
1245
1246 case '\r':
1247 ln++;
1248
1249 if ((ch = readCh()) == '\n') {
1250 ch = readCh();
1251 crlfCount++;
1252 }
1253 else {
1254 crCount++;
1255 }
1256 if (delim < 0) {
1257 return getString(0);
1258 }
1259 break;
1260
1261 case '\t':
1262 if (delim < 0)
1263 c = ' ';
1264 // Fall through
1265 case ' ':
1266 ch = readCh();
1267 if (delim < 0) {
1268 return getString(0);
1269 }
1270 break;
1271
1272 case '>':
1273 case '<':
1274 if (delim < 0) {
1275 return getString(0);
1276 }
1277 ch = readCh();
1278 break;
1279
1280 case '\'':
1281 case '"':
1282 ch = readCh();
1283 if (c == delim) {
1284 return getString(0);
1546 // ignore all data upto the close bracket '>'
1547 while (true) {
1548 skipSpace();
1549 switch (ch) {
1550 case '>':
1551 case -1:
1552 ch = readCh();
1553 return;
1554 case '<':
1555 return;
1556 default:
1557 ch = readCh();
1558
1559 }
1560 }
1561 }
1562
1563 /**
1564 * Parse a start or end tag.
1565 */
1566 @SuppressWarnings("fallthrough")
1567 void parseTag() throws IOException {
1568 Element elem;
1569 boolean net = false;
1570 boolean warned = false;
1571 boolean unknown = false;
1572
1573 switch (ch = readCh()) {
1574 case '!':
1575 switch (ch = readCh()) {
1576 case '-':
1577 // Parse comment. [92] 391:7
1578 while (true) {
1579 if (ch == '-') {
1580 if (!strict || ((ch = readCh()) == '-')) {
1581 ch = readCh();
1582 if (!strict && ch == '-') {
1583 ch = readCh();
1584 }
1585 // send over any text you might see
1586 // before parsing and sending the
1590 System.arraycopy(text, 0, newtext, 0, textpos);
1591 handleText(newtext);
1592 lastBlockStartPos = currentBlockStartPos;
1593 textpos = 0;
1594 }
1595 parseComment();
1596 last = makeTag(dtd.getElement("comment"), true);
1597 handleComment(getChars(0));
1598 continue;
1599 } else if (!warned) {
1600 warned = true;
1601 error("invalid.commentchar", "-");
1602 }
1603 }
1604 skipSpace();
1605 switch (ch) {
1606 case '-':
1607 continue;
1608 case '>':
1609 ch = readCh();
1610 return;
1611 case -1:
1612 return;
1613 default:
1614 ch = readCh();
1615 if (!warned) {
1616 warned = true;
1617 error("invalid.commentchar",
1618 String.valueOf((char)ch));
1619 }
1620 break;
1621 }
1622 }
1623
1624 default:
1625 // deal with marked sections
1626 StringBuffer strBuff = new StringBuffer();
1627 while (true) {
1628 strBuff.append((char)ch);
1629 if (parseMarkupDeclarations(strBuff)) {
1630 return;
1631 }
1632 switch(ch) {
1633 case '>':
1634 ch = readCh();
1635 // Fall through
1636 case -1:
1637 error("invalid.markup");
1638 return;
1639 case '\n':
1640 ln++;
1641 ch = readCh();
1642 lfCount++;
1643 break;
1644 case '\r':
1645 ln++;
1646 if ((ch = readCh()) == '\n') {
1647 ch = readCh();
1648 crlfCount++;
1649 }
1650 else {
1651 crCount++;
1652 }
1653 break;
1654
1655 default:
1656 ch = readCh();
1657 break;
1658 }
1659 }
1660 }
1661
1662 case '/':
1663 // parse end tag [19] 317:4
1664 switch (ch = readCh()) {
1665 case '>':
1666 ch = readCh();
1667 // Fall through
1668 case '<':
1669 // empty end tag. either </> or </<
1670 if (recent == null) {
1671 error("invalid.shortend");
1672 return;
1673 }
1674 elem = recent;
1675 break;
1676
1677 default:
1678 if (!parseIdentifier(true)) {
1679 error("expected.endtagname");
1680 return;
1681 }
1682 skipSpace();
1683 switch (ch) {
1684 case '>':
1685 ch = readCh();
1686 break;
1687 case '<':
1688 break;
1689
1690 default:
1691 error("expected", "'>'");
1692 while ((ch != -1) && (ch != '\n') && (ch != '>')) {
1693 ch = readCh();
1694 }
1695 if (ch == '>') {
1696 ch = readCh();
1697 }
1698 break;
1699 }
1700 String elemStr = getString(0);
1701 if (!dtd.elementExists(elemStr)) {
1702 error("end.unrecognized", elemStr);
1703 // Ignore RE before end tag
1704 if ((textpos > 0) && (text[textpos-1] == '\n')) {
1705 textpos--;
1706 }
1867
1868 /* determine if this element is part of the dtd. */
1869
1870 if (!dtd.elementExists(elemStr)) {
1871 // parseInvalidTag();
1872 error("tag.unrecognized ", elemStr);
1873 elem = dtd.getElement("unknown");
1874 elem.name = elemStr;
1875 unknown = true;
1876 } else {
1877 elem = dtd.getElement(elemStr);
1878 }
1879 }
1880
1881 // Parse attributes
1882 parseAttributeSpecificationList(elem);
1883
1884 switch (ch) {
1885 case '/':
1886 net = true;
1887 // Fall through
1888 case '>':
1889 ch = readCh();
1890 if (ch == '>' && net) {
1891 ch = readCh();
1892 }
1893 case '<':
1894 break;
1895
1896 default:
1897 error("expected", "'>'");
1898 break;
1899 }
1900
1901 if (!strict) {
1902 if (elem.getName().equals("script")) {
1903 error("javascript.unsupported");
1904 }
1905 }
1906
1907 // ignore RE after start tag
|