1 /*
   2  * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package javax.swing.text.html.parser;
  27 
  28 import sun.awt.AppContext;
  29 
  30 import java.io.PrintStream;
  31 import java.io.File;
  32 import java.io.FileInputStream;
  33 import java.io.InputStream;
  34 import java.io.IOException;
  35 import java.io.FileNotFoundException;
  36 import java.io.BufferedInputStream;
  37 import java.io.DataInputStream;
  38 import java.util.Hashtable;
  39 import java.util.Vector;
  40 import java.util.BitSet;
  41 import java.util.StringTokenizer;
  42 import java.util.Enumeration;
  43 import java.util.Properties;
  44 import java.net.URL;
  45 
  46 /**
  47  * The representation of an SGML DTD.  DTD describes a document
  48  * syntax and is used in parsing of HTML documents.  It contains
  49  * a list of elements and their attributes as well as a list of
  50  * entities defined in the DTD.
  51  *
  52  * @see Element
  53  * @see AttributeList
  54  * @see ContentModel
  55  * @see Parser
  56  * @author Arthur van Hoff
  57  */
  58 public
  59 class DTD implements DTDConstants {
  60 
  61     /**
  62      * the name of the DTD
  63      */
  64     public String name;
  65 
  66     /**
  67      * The vector of elements
  68      */
  69     public Vector<Element> elements = new Vector<Element>();
  70 
  71     /**
  72      * The hash table contains the name of element and
  73      * the corresponding element.
  74      */
  75     public Hashtable<String,Element> elementHash
  76         = new Hashtable<String,Element>();
  77 
  78     /**
  79      * The hash table contains an {@code Object} and the corresponding {@code Entity}
  80      */
  81     public Hashtable<Object,Entity> entityHash
  82         = new Hashtable<Object,Entity>();
  83 
  84     /**
  85      * The element corresponding to pcdata.
  86      */
  87     public final Element pcdata = getElement("#pcdata");
  88 
  89     /**
  90      * The element corresponding to html.
  91      */
  92     public final Element html = getElement("html");
  93 
  94     /**
  95      * The element corresponding to meta.
  96      */
  97     public final Element meta = getElement("meta");
  98 
  99     /**
 100      * The element corresponding to base.
 101      */
 102     public final Element base = getElement("base");
 103 
 104     /**
 105      * The element corresponding to isindex.
 106      */
 107     public final Element isindex = getElement("isindex");
 108 
 109     /**
 110      * The element corresponding to head.
 111      */
 112     public final Element head = getElement("head");
 113 
 114     /**
 115      * The element corresponding to body.
 116      */
 117     public final Element body = getElement("body");
 118 
 119     /**
 120      * The element corresponding to applet.
 121      */
 122     public final Element applet = getElement("applet");
 123 
 124     /**
 125      * The element corresponding to param.
 126      */
 127     public final Element param = getElement("param");
 128 
 129     /**
 130      * The element corresponding to p.
 131      */
 132     public final Element p = getElement("p");
 133 
 134     /**
 135      * The element corresponding to title.
 136      */
 137     public final Element title = getElement("title");
 138     final Element style = getElement("style");
 139     final Element link = getElement("link");
 140     final Element script = getElement("script");
 141 
 142     /**
 143      * The version of a file
 144      */
 145     public static final int FILE_VERSION = 1;
 146 
 147     /**
 148      * Creates a new DTD with the specified name.
 149      * @param name the name, as a <code>String</code> of the new DTD
 150      */
 151     protected DTD(String name) {
 152         this.name = name;
 153         defEntity("#RE", GENERAL, '\r');
 154         defEntity("#RS", GENERAL, '\n');
 155         defEntity("#SPACE", GENERAL, ' ');
 156         defineElement("unknown", EMPTY, false, true, null, null, null, null);
 157     }
 158 
 159     /**
 160      * Gets the name of the DTD.
 161      * @return the name of the DTD
 162      */
 163     public String getName() {
 164         return name;
 165     }
 166 
 167     /**
 168      * Gets an entity by name.
 169      * @param name  the entity name
 170      * @return the <code>Entity</code> corresponding to the
 171      *   <code>name</code> <code>String</code>
 172      */
 173     public Entity getEntity(String name) {
 174         return entityHash.get(name);
 175     }
 176 
 177     /**
 178      * Gets a character entity.
 179      * @param ch  the character
 180      * @return the <code>Entity</code> corresponding to the
 181      *    <code>ch</code> character
 182      */
 183     public Entity getEntity(int ch) {
 184         return entityHash.get(Integer.valueOf(ch));
 185     }
 186 
 187     /**
 188      * Returns <code>true</code> if the element is part of the DTD,
 189      * otherwise returns <code>false</code>.
 190      *
 191      * @param  name the requested <code>String</code>
 192      * @return <code>true</code> if <code>name</code> exists as
 193      *   part of the DTD, otherwise returns <code>false</code>
 194      */
 195     boolean elementExists(String name) {
 196         return !"unknown".equals(name) && (elementHash.get(name) != null);
 197     }
 198 
 199     /**
 200      * Gets an element by name. A new element is
 201      * created if the element doesn't exist.
 202      *
 203      * @param name the requested <code>String</code>
 204      * @return the <code>Element</code> corresponding to
 205      *   <code>name</code>, which may be newly created
 206      */
 207     public Element getElement(String name) {
 208         Element e = elementHash.get(name);
 209         if (e == null) {
 210             e = new Element(name, elements.size());
 211             elements.addElement(e);
 212             elementHash.put(name, e);
 213         }
 214         return e;
 215     }
 216 
 217     /**
 218      * Gets an element by index.
 219      *
 220      * @param index the requested index
 221      * @return the <code>Element</code> corresponding to
 222      *   <code>index</code>
 223      */
 224     public Element getElement(int index) {
 225         return elements.elementAt(index);
 226     }
 227 
 228     /**
 229      * Defines an entity.  If the <code>Entity</code> specified
 230      * by <code>name</code>, <code>type</code>, and <code>data</code>
 231      * exists, it is returned; otherwise a new <code>Entity</code>
 232      * is created and is returned.
 233      *
 234      * @param name the name of the <code>Entity</code> as a <code>String</code>
 235      * @param type the type of the <code>Entity</code>
 236      * @param data the <code>Entity</code>'s data
 237      * @return the <code>Entity</code> requested or a new <code>Entity</code>
 238      *   if not found
 239      */
 240     public Entity defineEntity(String name, int type, char data[]) {
 241         Entity ent = entityHash.get(name);
 242         if (ent == null) {
 243             ent = new Entity(name, type, data);
 244             entityHash.put(name, ent);
 245             if (((type & GENERAL) != 0) && (data.length == 1)) {
 246                 switch (type & ~GENERAL) {
 247                   case CDATA:
 248                   case SDATA:
 249                       entityHash.put(Integer.valueOf(data[0]), ent);
 250                     break;
 251                 }
 252             }
 253         }
 254         return ent;
 255     }
 256 
 257     /**
 258      * Returns the <code>Element</code> which matches the
 259      * specified parameters.  If one doesn't exist, a new
 260      * one is created and returned.
 261      *
 262      * @param name        the name of the <code>Element</code>
 263      * @param type        the type of the <code>Element</code>
 264      * @param omitStart   <code>true</code> if start should be omitted
 265      * @param omitEnd     <code>true</code> if end should be omitted
 266      * @param content     the <code>ContentModel</code>
 267      * @param exclusions  the set of elements that must not occur inside the element
 268      * @param inclusions  the set of elements that can occur inside the element
 269      * @param atts        the <code>AttributeList</code> specifying the
 270      *                    <code>Element</code>
 271      * @return the <code>Element</code> specified
 272      */
 273     public Element defineElement(String name, int type,
 274                        boolean omitStart, boolean omitEnd, ContentModel content,
 275                        BitSet exclusions, BitSet inclusions, AttributeList atts) {
 276         Element e = getElement(name);
 277         e.type = type;
 278         e.oStart = omitStart;
 279         e.oEnd = omitEnd;
 280         e.content = content;
 281         e.exclusions = exclusions;
 282         e.inclusions = inclusions;
 283         e.atts = atts;
 284         return e;
 285     }
 286 
 287     /**
 288      * Defines attributes for an {@code Element}.
 289      *
 290      * @param name the name of the <code>Element</code>
 291      * @param atts the <code>AttributeList</code> specifying the
 292      *    <code>Element</code>
 293      */
 294     public void defineAttributes(String name, AttributeList atts) {
 295         Element e = getElement(name);
 296         e.atts = atts;
 297     }
 298 
 299     /**
 300      * Creates and returns a character <code>Entity</code>.
 301      * @param name the entity's name
 302      * @param type the entity's type
 303      * @param ch   the entity's value (character)
 304      * @return the new character <code>Entity</code>
 305      */
 306     public Entity defEntity(String name, int type, int ch) {
 307         char data[] = {(char)ch};
 308         return defineEntity(name, type, data);
 309     }
 310 
 311     /**
 312      * Creates and returns an <code>Entity</code>.
 313      * @param name the entity's name
 314      * @param type the entity's type
 315      * @param str  the entity's data section
 316      * @return the new <code>Entity</code>
 317      */
 318     protected Entity defEntity(String name, int type, String str) {
 319         int len = str.length();
 320         char data[] = new char[len];
 321         str.getChars(0, len, data, 0);
 322         return defineEntity(name, type, data);
 323     }
 324 
 325     /**
 326      * Creates and returns an <code>Element</code>.
 327      * @param name        the element's name
 328      * @param type        the element's type
 329      * @param omitStart   {@code true} if the element needs no starting tag
 330      * @param omitEnd     {@code true} if the element needs no closing tag
 331      * @param content     the element's content
 332      * @param exclusions  the elements that must be excluded from the content of the element
 333      * @param inclusions  the elements that can be included as the content of the element
 334      * @param atts        the attributes of the element
 335      * @return the new <code>Element</code>
 336      */
 337     protected Element defElement(String name, int type,
 338                        boolean omitStart, boolean omitEnd, ContentModel content,
 339                        String[] exclusions, String[] inclusions, AttributeList atts) {
 340         BitSet excl = null;
 341         if (exclusions != null && exclusions.length > 0) {
 342             excl = new BitSet();
 343             for (String str : exclusions) {
 344                 if (str.length() > 0) {
 345                     excl.set(getElement(str).getIndex());
 346                 }
 347             }
 348         }
 349         BitSet incl = null;
 350         if (inclusions != null && inclusions.length > 0) {
 351             incl = new BitSet();
 352             for (String str : inclusions) {
 353                 if (str.length() > 0) {
 354                     incl.set(getElement(str).getIndex());
 355                 }
 356             }
 357         }
 358         return defineElement(name, type, omitStart, omitEnd, content, excl, incl, atts);
 359     }
 360 
 361     /**
 362      * Creates and returns an <code>AttributeList</code> responding to a new attribute.
 363      * @param name      the attribute's name
 364      * @param type      the attribute's type
 365      * @param modifier  the attribute's modifier
 366      * @param value     the default value of the attribute
 367      * @param values    the allowed values for the attribute (multiple values could be separated by '|')
 368      * @param atts      the previous attribute of the element; to be placed to {@code AttributeList.next},
 369      *                  creating a linked list
 370      * @return the new <code>AttributeList</code>
 371      */
 372     protected AttributeList defAttributeList(String name, int type, int modifier,
 373                                              String value, String values, AttributeList atts) {
 374         Vector<String> vals = null;
 375         if (values != null) {
 376             vals = new Vector<String>();
 377             for (StringTokenizer s = new StringTokenizer(values, "|") ; s.hasMoreTokens() ;) {
 378                 String str = s.nextToken();
 379                 if (str.length() > 0) {
 380                     vals.addElement(str);
 381                 }
 382             }
 383         }
 384         return new AttributeList(name, type, modifier, value, vals, atts);
 385     }
 386 
 387     /**
 388      * Creates and returns a new content model.
 389      * @param type the type of the new content model
 390      * @param obj  the content of the content model
 391      * @param next pointer to the next content model
 392      * @return the new <code>ContentModel</code>
 393      */
 394     protected ContentModel defContentModel(int type, Object obj, ContentModel next) {
 395         return new ContentModel(type, obj, next);
 396     }
 397 
 398     /**
 399      * Returns a string representation of this DTD.
 400      * @return the string representation of this DTD
 401      */
 402     public String toString() {
 403         return name;
 404     }
 405 
 406     /**
 407      * The hashtable key of DTDs in AppContext.
 408      */
 409     private static final Object DTD_HASH_KEY = new Object();
 410 
 411     /**
 412      * Put a name and appropriate DTD to hashtable.
 413      *
 414      * @param name the name of the DTD
 415      * @param dtd the DTD
 416      */
 417     public static void putDTDHash(String name, DTD dtd) {
 418         getDtdHash().put(name, dtd);
 419     }
 420 
 421     /**
 422      * Returns a DTD with the specified <code>name</code>.  If
 423      * a DTD with that name doesn't exist, one is created
 424      * and returned.  Any uppercase characters in the name
 425      * are converted to lowercase.
 426      *
 427      * @param name the name of the DTD
 428      * @return the DTD which corresponds to <code>name</code>
 429      * @throws IOException if an I/O error occurs
 430      */
 431     public static DTD getDTD(String name) throws IOException {
 432         name = name.toLowerCase();
 433         DTD dtd = getDtdHash().get(name);
 434         if (dtd == null)
 435           dtd = new DTD(name);
 436 
 437         return dtd;
 438     }
 439 
 440     private static Hashtable<String, DTD> getDtdHash() {
 441         AppContext appContext = AppContext.getAppContext();
 442 
 443         @SuppressWarnings("unchecked")
 444         Hashtable<String, DTD> result = (Hashtable<String, DTD>) appContext.get(DTD_HASH_KEY);
 445 
 446         if (result == null) {
 447             result = new Hashtable<String, DTD>();
 448 
 449             appContext.put(DTD_HASH_KEY, result);
 450         }
 451 
 452         return result;
 453     }
 454 
 455     /**
 456      * Recreates a DTD from an archived format.
 457      * @param in  the <code>DataInputStream</code> to read from
 458      * @throws IOException if an I/O error occurs
 459      */
 460     public void read(DataInputStream in) throws IOException {
 461         if (in.readInt() != FILE_VERSION) {
 462         }
 463 
 464         //
 465         // Read the list of names
 466         //
 467         String[] names = new String[in.readShort()];
 468         for (int i = 0; i < names.length; i++) {
 469             names[i] = in.readUTF();
 470         }
 471 
 472 
 473         //
 474         // Read the entities
 475         //
 476         int num = in.readShort();
 477         for (int i = 0; i < num; i++) {
 478             short nameId = in.readShort();
 479             int type = in.readByte();
 480             String name = in.readUTF();
 481             defEntity(names[nameId], type | GENERAL, name);
 482         }
 483 
 484         // Read the elements
 485         //
 486         num = in.readShort();
 487         for (int i = 0; i < num; i++) {
 488             short nameId = in.readShort();
 489             int type = in.readByte();
 490             byte flags = in.readByte();
 491             ContentModel m = readContentModel(in, names);
 492             String[] exclusions = readNameArray(in, names);
 493             String[] inclusions = readNameArray(in, names);
 494             AttributeList atts = readAttributeList(in, names);
 495             defElement(names[nameId], type,
 496                        ((flags & 0x01) != 0), ((flags & 0x02) != 0),
 497                        m, exclusions, inclusions, atts);
 498         }
 499     }
 500 
 501     private ContentModel readContentModel(DataInputStream in, String[] names)
 502                 throws IOException {
 503         byte flag = in.readByte();
 504         switch(flag) {
 505             case 0:             // null
 506                 return null;
 507             case 1: {           // content_c
 508                 int type = in.readByte();
 509                 ContentModel m = readContentModel(in, names);
 510                 ContentModel next = readContentModel(in, names);
 511                 return defContentModel(type, m, next);
 512             }
 513             case 2: {           // content_e
 514                 int type = in.readByte();
 515                 Element el = getElement(names[in.readShort()]);
 516                 ContentModel next = readContentModel(in, names);
 517                 return defContentModel(type, el, next);
 518             }
 519         default:
 520                 throw new IOException("bad bdtd");
 521         }
 522     }
 523 
 524     private String[] readNameArray(DataInputStream in, String[] names)
 525                 throws IOException {
 526         int num = in.readShort();
 527         if (num == 0) {
 528             return null;
 529         }
 530         String[] result = new String[num];
 531         for (int i = 0; i < num; i++) {
 532             result[i] = names[in.readShort()];
 533         }
 534         return result;
 535     }
 536 
 537 
 538     private AttributeList readAttributeList(DataInputStream in, String[] names)
 539                 throws IOException  {
 540         AttributeList result = null;
 541         for (int num = in.readByte(); num > 0; --num) {
 542             short nameId = in.readShort();
 543             int type = in.readByte();
 544             int modifier = in.readByte();
 545             short valueId = in.readShort();
 546             String value = (valueId == -1) ? null : names[valueId];
 547             Vector<String> values = null;
 548             short numValues = in.readShort();
 549             if (numValues > 0) {
 550                 values = new Vector<String>(numValues);
 551                 for (int i = 0; i < numValues; i++) {
 552                     values.addElement(names[in.readShort()]);
 553                 }
 554             }
 555 result = new AttributeList(names[nameId], type, modifier, value,
 556                                        values, result);
 557             // We reverse the order of the linked list by doing this, but
 558             // that order isn't important.
 559         }
 560         return result;
 561     }
 562 
 563 }