1 /*
2 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
63 * <p>as well as:
64 * '<p><a href="xx"> <em>Using</em></a></p>'
65 * which appears to be treated as:
66 * '<p><a href="xx"><em>Using</em></a></p>'
67 * <p>
68 * If <code>strict</code> is false, when a tag that breaks flow,
69 * (<code>TagElement.breaksFlows</code>) or trailing whitespace is
70 * encountered, all whitespace will be ignored until a non whitespace
71 * character is encountered. This appears to give behavior closer to
72 * the popular browsers.
73 *
74 * @see DTD
75 * @see TagElement
76 * @see SimpleAttributeSet
77 * @author Arthur van Hoff
78 * @author Sunita Mani
79 */
80 public
81 class Parser implements DTDConstants {
82
83 private char text[] = new char[1024];
84 private int textpos = 0;
85 private TagElement last;
86 private boolean space;
87
88 private char str[] = new char[128];
89 private int strpos = 0;
90
91 protected DTD dtd = null;
92
93 private int ch;
94 private int ln;
95 private Reader in;
96
97 private Element recent;
98 private TagStack stack;
99 private boolean skipTag = false;
100 private TagElement lastFormSent = null;
101 private SimpleAttributeSet attributes = new SimpleAttributeSet();
102
935 ln++;
936 ch = readCh();
937 lfCount++;
938 break;
939
940 case '\r':
941 ln++;
942 if ((ch = readCh()) == '\n') {
943 ch = readCh();
944 crlfCount++;
945 }
946 else {
947 crCount++;
948 }
949 break;
950
951 case ';':
952 ch = readCh();
953 break;
954 }
955 char data[] = {mapNumericReference((char) n)};
956 return data;
957 }
958 addString('#');
959 if (!parseIdentifier(false)) {
960 error("ident.expected");
961 strpos = pos;
962 char data[] = {'&', '#'};
963 return data;
964 }
965 } else if (!parseIdentifier(false)) {
966 char data[] = {'&'};
967 return data;
968 }
969
970 boolean semicolon = false;
971
972 switch (ch) {
973 case '\n':
974 ln++;
975 ch = readCh();
976 lfCount++;
977 break;
|
1 /*
2 * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
63 * <p>as well as:
64 * '<p><a href="xx"> <em>Using</em></a></p>'
65 * which appears to be treated as:
66 * '<p><a href="xx"><em>Using</em></a></p>'
67 * <p>
68 * If <code>strict</code> is false, when a tag that breaks flow,
69 * (<code>TagElement.breaksFlows</code>) or trailing whitespace is
70 * encountered, all whitespace will be ignored until a non whitespace
71 * character is encountered. This appears to give behavior closer to
72 * the popular browsers.
73 *
74 * @see DTD
75 * @see TagElement
76 * @see SimpleAttributeSet
77 * @author Arthur van Hoff
78 * @author Sunita Mani
79 */
80 public
81 class Parser implements DTDConstants {
82
83 // Maximum codepoint value within BMP
84 private static final int MAX_BMP_BOUND = 65535;
85
86 private char text[] = new char[1024];
87 private int textpos = 0;
88 private TagElement last;
89 private boolean space;
90
91 private char str[] = new char[128];
92 private int strpos = 0;
93
94 protected DTD dtd = null;
95
96 private int ch;
97 private int ln;
98 private Reader in;
99
100 private Element recent;
101 private TagStack stack;
102 private boolean skipTag = false;
103 private TagElement lastFormSent = null;
104 private SimpleAttributeSet attributes = new SimpleAttributeSet();
105
938 ln++;
939 ch = readCh();
940 lfCount++;
941 break;
942
943 case '\r':
944 ln++;
945 if ((ch = readCh()) == '\n') {
946 ch = readCh();
947 crlfCount++;
948 }
949 else {
950 crCount++;
951 }
952 break;
953
954 case ';':
955 ch = readCh();
956 break;
957 }
958 // Check if n codepoint is within BMP; convert into surrogate
959 // pair otherwise
960 try {
961 char data[];
962 if (n <= MAX_BMP_BOUND) {
963 data = Character.toChars(mapNumericReference((char) n));
964 } else {
965 data = Character.toChars(n);
966 }
967
968 return data;
969 }
970 catch(IllegalArgumentException ex) {
971 error(ex.toString());
972 return new char[0];
973 }
974 }
975 addString('#');
976 if (!parseIdentifier(false)) {
977 error("ident.expected");
978 strpos = pos;
979 char data[] = {'&', '#'};
980 return data;
981 }
982 } else if (!parseIdentifier(false)) {
983 char data[] = {'&'};
984 return data;
985 }
986
987 boolean semicolon = false;
988
989 switch (ch) {
990 case '\n':
991 ln++;
992 ch = readCh();
993 lfCount++;
994 break;
|