1 /* 2 * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package javax.swing.text.html.parser; 27 28 import sun.awt.AppContext; 29 30 import java.io.PrintStream; 31 import java.io.File; 32 import java.io.FileInputStream; 33 import java.io.InputStream; 34 import java.io.IOException; 35 import java.io.FileNotFoundException; 36 import java.io.BufferedInputStream; 37 import java.io.DataInputStream; 38 import java.util.Hashtable; 39 import java.util.Vector; 40 import java.util.BitSet; 41 import java.util.StringTokenizer; 42 import java.util.Enumeration; 43 import java.util.Properties; 44 import java.net.URL; 45 46 /** 47 * The representation of an SGML DTD. DTD describes a document 48 * syntax and is used in parsing of HTML documents. It contains 49 * a list of elements and their attributes as well as a list of 50 * entities defined in the DTD. 51 * 52 * @see Element 53 * @see AttributeList 54 * @see ContentModel 55 * @see Parser 56 * @author Arthur van Hoff 57 */ 58 public 59 class DTD implements DTDConstants { 60 61 /** 62 * the name of the DTD 63 */ 64 public String name; 65 66 /** 67 * The vector of elements 68 */ 69 public Vector<Element> elements = new Vector<Element>(); 70 71 /** 72 * The hash table contains the name of element and 73 * the corresponding element. 74 */ 75 public Hashtable<String,Element> elementHash 76 = new Hashtable<String,Element>(); 77 78 /** 79 * The hash table contains an {@code Object} and the corresponding {@code Entity} 80 */ 81 public Hashtable<Object,Entity> entityHash 82 = new Hashtable<Object,Entity>(); 83 84 /** 85 * The element corresponding to pcdata. 86 */ 87 public final Element pcdata = getElement("#pcdata"); 88 89 /** 90 * The element corresponding to html. 91 */ 92 public final Element html = getElement("html"); 93 94 /** 95 * The element corresponding to meta. 96 */ 97 public final Element meta = getElement("meta"); 98 99 /** 100 * The element corresponding to base. 101 */ 102 public final Element base = getElement("base"); 103 104 /** 105 * The element corresponding to isindex. 106 */ 107 public final Element isindex = getElement("isindex"); 108 109 /** 110 * The element corresponding to head. 111 */ 112 public final Element head = getElement("head"); 113 114 /** 115 * The element corresponding to body. 116 */ 117 public final Element body = getElement("body"); 118 119 /** 120 * The element corresponding to applet. 121 */ 122 public final Element applet = getElement("applet"); 123 124 /** 125 * The element corresponding to param. 126 */ 127 public final Element param = getElement("param"); 128 129 /** 130 * The element corresponding to p. 131 */ 132 public final Element p = getElement("p"); 133 134 /** 135 * The element corresponding to title. 136 */ 137 public final Element title = getElement("title"); 138 final Element style = getElement("style"); 139 final Element link = getElement("link"); 140 final Element script = getElement("script"); 141 142 /** 143 * The version of a file 144 */ 145 public static final int FILE_VERSION = 1; 146 147 /** 148 * Creates a new DTD with the specified name. 149 * @param name the name, as a <code>String</code> of the new DTD 150 */ 151 protected DTD(String name) { 152 this.name = name; 153 defEntity("#RE", GENERAL, '\r'); 154 defEntity("#RS", GENERAL, '\n'); 155 defEntity("#SPACE", GENERAL, ' '); 156 defineElement("unknown", EMPTY, false, true, null, null, null, null); 157 } 158 159 /** 160 * Gets the name of the DTD. 161 * @return the name of the DTD 162 */ 163 public String getName() { 164 return name; 165 } 166 167 /** 168 * Gets an entity by name. 169 * @param name the entity name 170 * @return the <code>Entity</code> corresponding to the 171 * <code>name</code> <code>String</code> 172 */ 173 public Entity getEntity(String name) { 174 return entityHash.get(name); 175 } 176 177 /** 178 * Gets a character entity. 179 * @param ch the character 180 * @return the <code>Entity</code> corresponding to the 181 * <code>ch</code> character 182 */ 183 public Entity getEntity(int ch) { 184 return entityHash.get(Integer.valueOf(ch)); 185 } 186 187 /** 188 * Returns <code>true</code> if the element is part of the DTD, 189 * otherwise returns <code>false</code>. 190 * 191 * @param name the requested <code>String</code> 192 * @return <code>true</code> if <code>name</code> exists as 193 * part of the DTD, otherwise returns <code>false</code> 194 */ 195 boolean elementExists(String name) { 196 return !"unknown".equals(name) && (elementHash.get(name) != null); 197 } 198 199 /** 200 * Gets an element by name. A new element is 201 * created if the element doesn't exist. 202 * 203 * @param name the requested <code>String</code> 204 * @return the <code>Element</code> corresponding to 205 * <code>name</code>, which may be newly created 206 */ 207 public Element getElement(String name) { 208 Element e = elementHash.get(name); 209 if (e == null) { 210 e = new Element(name, elements.size()); 211 elements.addElement(e); 212 elementHash.put(name, e); 213 } 214 return e; 215 } 216 217 /** 218 * Gets an element by index. 219 * 220 * @param index the requested index 221 * @return the <code>Element</code> corresponding to 222 * <code>index</code> 223 */ 224 public Element getElement(int index) { 225 return elements.elementAt(index); 226 } 227 228 /** 229 * Defines an entity. If the <code>Entity</code> specified 230 * by <code>name</code>, <code>type</code>, and <code>data</code> 231 * exists, it is returned; otherwise a new <code>Entity</code> 232 * is created and is returned. 233 * 234 * @param name the name of the <code>Entity</code> as a <code>String</code> 235 * @param type the type of the <code>Entity</code> 236 * @param data the <code>Entity</code>'s data 237 * @return the <code>Entity</code> requested or a new <code>Entity</code> 238 * if not found 239 */ 240 public Entity defineEntity(String name, int type, char data[]) { 241 Entity ent = entityHash.get(name); 242 if (ent == null) { 243 ent = new Entity(name, type, data); 244 entityHash.put(name, ent); 245 if (((type & GENERAL) != 0) && (data.length == 1)) { 246 switch (type & ~GENERAL) { 247 case CDATA: 248 case SDATA: 249 entityHash.put(Integer.valueOf(data[0]), ent); 250 break; 251 } 252 } 253 } 254 return ent; 255 } 256 257 /** 258 * Returns the <code>Element</code> which matches the 259 * specified parameters. If one doesn't exist, a new 260 * one is created and returned. 261 * 262 * @param name the name of the <code>Element</code> 263 * @param type the type of the <code>Element</code> 264 * @param omitStart <code>true</code> if start should be omitted 265 * @param omitEnd <code>true</code> if end should be omitted 266 * @param content the <code>ContentModel</code> 267 * @param exclusions the set of elements that must not occur inside the element 268 * @param inclusions the set of elements that can occur inside the element 269 * @param atts the <code>AttributeList</code> specifying the 270 * <code>Element</code> 271 * @return the <code>Element</code> specified 272 */ 273 public Element defineElement(String name, int type, 274 boolean omitStart, boolean omitEnd, ContentModel content, 275 BitSet exclusions, BitSet inclusions, AttributeList atts) { 276 Element e = getElement(name); 277 e.type = type; 278 e.oStart = omitStart; 279 e.oEnd = omitEnd; 280 e.content = content; 281 e.exclusions = exclusions; 282 e.inclusions = inclusions; 283 e.atts = atts; 284 return e; 285 } 286 287 /** 288 * Defines attributes for an {@code Element}. 289 * 290 * @param name the name of the <code>Element</code> 291 * @param atts the <code>AttributeList</code> specifying the 292 * <code>Element</code> 293 */ 294 public void defineAttributes(String name, AttributeList atts) { 295 Element e = getElement(name); 296 e.atts = atts; 297 } 298 299 /** 300 * Creates and returns a character <code>Entity</code>. 301 * @param name the entity's name 302 * @param type the entity's type 303 * @param ch the entity's value (character) 304 * @return the new character <code>Entity</code> 305 */ 306 public Entity defEntity(String name, int type, int ch) { 307 char data[] = {(char)ch}; 308 return defineEntity(name, type, data); 309 } 310 311 /** 312 * Creates and returns an <code>Entity</code>. 313 * @param name the entity's name 314 * @param type the entity's type 315 * @param str the entity's data section 316 * @return the new <code>Entity</code> 317 */ 318 protected Entity defEntity(String name, int type, String str) { 319 int len = str.length(); 320 char data[] = new char[len]; 321 str.getChars(0, len, data, 0); 322 return defineEntity(name, type, data); 323 } 324 325 /** 326 * Creates and returns an <code>Element</code>. 327 * @param name the element's name 328 * @param type the element's type 329 * @param omitStart {@code true} if the element needs no starting tag 330 * @param omitEnd {@code true} if the element needs no closing tag 331 * @param content the element's content 332 * @param exclusions the elements that must be excluded from the content of the element 333 * @param inclusions the elements that can be included as the content of the element 334 * @param atts the attributes of the element 335 * @return the new <code>Element</code> 336 */ 337 protected Element defElement(String name, int type, 338 boolean omitStart, boolean omitEnd, ContentModel content, 339 String[] exclusions, String[] inclusions, AttributeList atts) { 340 BitSet excl = null; 341 if (exclusions != null && exclusions.length > 0) { 342 excl = new BitSet(); 343 for (String str : exclusions) { 344 if (str.length() > 0) { 345 excl.set(getElement(str).getIndex()); 346 } 347 } 348 } 349 BitSet incl = null; 350 if (inclusions != null && inclusions.length > 0) { 351 incl = new BitSet(); 352 for (String str : inclusions) { 353 if (str.length() > 0) { 354 incl.set(getElement(str).getIndex()); 355 } 356 } 357 } 358 return defineElement(name, type, omitStart, omitEnd, content, excl, incl, atts); 359 } 360 361 /** 362 * Creates and returns an <code>AttributeList</code> responding to a new attribute. 363 * @param name the attribute's name 364 * @param type the attribute's type 365 * @param modifier the attribute's modifier 366 * @param value the default value of the attribute 367 * @param values the allowed values for the attribute (multiple values could be separated by '|') 368 * @param atts the previous attribute of the element; to be placed to {@code AttributeList.next}, 369 * creating a linked list 370 * @return the new <code>AttributeList</code> 371 */ 372 protected AttributeList defAttributeList(String name, int type, int modifier, 373 String value, String values, AttributeList atts) { 374 Vector<String> vals = null; 375 if (values != null) { 376 vals = new Vector<String>(); 377 for (StringTokenizer s = new StringTokenizer(values, "|") ; s.hasMoreTokens() ;) { 378 String str = s.nextToken(); 379 if (str.length() > 0) { 380 vals.addElement(str); 381 } 382 } 383 } 384 return new AttributeList(name, type, modifier, value, vals, atts); 385 } 386 387 /** 388 * Creates and returns a new content model. 389 * @param type the type of the new content model 390 * @param obj the content of the content model 391 * @param next pointer to the next content model 392 * @return the new <code>ContentModel</code> 393 */ 394 protected ContentModel defContentModel(int type, Object obj, ContentModel next) { 395 return new ContentModel(type, obj, next); 396 } 397 398 /** 399 * Returns a string representation of this DTD. 400 * @return the string representation of this DTD 401 */ 402 public String toString() { 403 return name; 404 } 405 406 /** 407 * The hashtable key of DTDs in AppContext. 408 */ 409 private static final Object DTD_HASH_KEY = new Object(); 410 411 /** 412 * Put a name and appropriate DTD to hashtable. 413 * 414 * @param name the name of the DTD 415 * @param dtd the DTD 416 */ 417 public static void putDTDHash(String name, DTD dtd) { 418 getDtdHash().put(name, dtd); 419 } 420 421 /** 422 * Returns a DTD with the specified <code>name</code>. If 423 * a DTD with that name doesn't exist, one is created 424 * and returned. Any uppercase characters in the name 425 * are converted to lowercase. 426 * 427 * @param name the name of the DTD 428 * @return the DTD which corresponds to <code>name</code> 429 * @throws IOException if an I/O error occurs 430 */ 431 public static DTD getDTD(String name) throws IOException { 432 name = name.toLowerCase(); 433 DTD dtd = getDtdHash().get(name); 434 if (dtd == null) 435 dtd = new DTD(name); 436 437 return dtd; 438 } 439 440 private static Hashtable<String, DTD> getDtdHash() { 441 AppContext appContext = AppContext.getAppContext(); 442 443 @SuppressWarnings("unchecked") 444 Hashtable<String, DTD> result = (Hashtable<String, DTD>) appContext.get(DTD_HASH_KEY); 445 446 if (result == null) { 447 result = new Hashtable<String, DTD>(); 448 449 appContext.put(DTD_HASH_KEY, result); 450 } 451 452 return result; 453 } 454 455 /** 456 * Recreates a DTD from an archived format. 457 * @param in the <code>DataInputStream</code> to read from 458 * @throws IOException if an I/O error occurs 459 */ 460 public void read(DataInputStream in) throws IOException { 461 if (in.readInt() != FILE_VERSION) { 462 } 463 464 // 465 // Read the list of names 466 // 467 String[] names = new String[in.readShort()]; 468 for (int i = 0; i < names.length; i++) { 469 names[i] = in.readUTF(); 470 } 471 472 473 // 474 // Read the entities 475 // 476 int num = in.readShort(); 477 for (int i = 0; i < num; i++) { 478 short nameId = in.readShort(); 479 int type = in.readByte(); 480 String name = in.readUTF(); 481 defEntity(names[nameId], type | GENERAL, name); 482 } 483 484 // Read the elements 485 // 486 num = in.readShort(); 487 for (int i = 0; i < num; i++) { 488 short nameId = in.readShort(); 489 int type = in.readByte(); 490 byte flags = in.readByte(); 491 ContentModel m = readContentModel(in, names); 492 String[] exclusions = readNameArray(in, names); 493 String[] inclusions = readNameArray(in, names); 494 AttributeList atts = readAttributeList(in, names); 495 defElement(names[nameId], type, 496 ((flags & 0x01) != 0), ((flags & 0x02) != 0), 497 m, exclusions, inclusions, atts); 498 } 499 } 500 501 private ContentModel readContentModel(DataInputStream in, String[] names) 502 throws IOException { 503 byte flag = in.readByte(); 504 switch(flag) { 505 case 0: // null 506 return null; 507 case 1: { // content_c 508 int type = in.readByte(); 509 ContentModel m = readContentModel(in, names); 510 ContentModel next = readContentModel(in, names); 511 return defContentModel(type, m, next); 512 } 513 case 2: { // content_e 514 int type = in.readByte(); 515 Element el = getElement(names[in.readShort()]); 516 ContentModel next = readContentModel(in, names); 517 return defContentModel(type, el, next); 518 } 519 default: 520 throw new IOException("bad bdtd"); 521 } 522 } 523 524 private String[] readNameArray(DataInputStream in, String[] names) 525 throws IOException { 526 int num = in.readShort(); 527 if (num == 0) { 528 return null; 529 } 530 String[] result = new String[num]; 531 for (int i = 0; i < num; i++) { 532 result[i] = names[in.readShort()]; 533 } 534 return result; 535 } 536 537 538 private AttributeList readAttributeList(DataInputStream in, String[] names) 539 throws IOException { 540 AttributeList result = null; 541 for (int num = in.readByte(); num > 0; --num) { 542 short nameId = in.readShort(); 543 int type = in.readByte(); 544 int modifier = in.readByte(); 545 short valueId = in.readShort(); 546 String value = (valueId == -1) ? null : names[valueId]; 547 Vector<String> values = null; 548 short numValues = in.readShort(); 549 if (numValues > 0) { 550 values = new Vector<String>(numValues); 551 for (int i = 0; i < numValues; i++) { 552 values.addElement(names[in.readShort()]); 553 } 554 } 555 result = new AttributeList(names[nameId], type, modifier, value, 556 values, result); 557 // We reverse the order of the linked list by doing this, but 558 // that order isn't important. 559 } 560 return result; 561 } 562 563 }