1 /*
   2  * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package javax.swing.text.html.parser;
  27 
  28 import sun.awt.AppContext;
  29 
  30 import java.io.PrintStream;
  31 import java.io.File;
  32 import java.io.FileInputStream;
  33 import java.io.InputStream;
  34 import java.io.IOException;
  35 import java.io.FileNotFoundException;
  36 import java.io.BufferedInputStream;
  37 import java.io.DataInputStream;
  38 import java.util.Hashtable;
  39 import java.util.Vector;
  40 import java.util.BitSet;
  41 import java.util.StringTokenizer;
  42 import java.util.Enumeration;
  43 import java.util.Properties;
  44 import java.net.URL;
  45 
  46 /**
  47  * The representation of an SGML DTD.  DTD describes a document
  48  * syntax and is used in parsing of HTML documents.  It contains
  49  * a list of elements and their attributes as well as a list of
  50  * entities defined in the DTD.
  51  *
  52  * @see Element
  53  * @see AttributeList
  54  * @see ContentModel
  55  * @see Parser
  56  * @author Arthur van Hoff
  57  */
  58 public
  59 class DTD implements DTDConstants {
  60     public String name;
  61     public Vector<Element> elements = new Vector<Element>();
  62     public Hashtable<String,Element> elementHash
  63         = new Hashtable<String,Element>();
  64     public Hashtable<Object,Entity> entityHash
  65         = new Hashtable<Object,Entity>();
  66     public final Element pcdata = getElement("#pcdata");
  67     public final Element html = getElement("html");
  68     public final Element meta = getElement("meta");
  69     public final Element base = getElement("base");
  70     public final Element isindex = getElement("isindex");
  71     public final Element head = getElement("head");
  72     public final Element body = getElement("body");
  73     public final Element applet = getElement("applet");
  74     public final Element param = getElement("param");
  75     public final Element p = getElement("p");
  76     public final Element title = getElement("title");
  77     final Element style = getElement("style");
  78     final Element link = getElement("link");
  79     final Element script = getElement("script");
  80 
  81     public static final int FILE_VERSION = 1;
  82 
  83     /**
  84      * Creates a new DTD with the specified name.
  85      * @param name the name, as a <code>String</code> of the new DTD
  86      */
  87     protected DTD(String name) {
  88         this.name = name;
  89         defEntity("#RE", GENERAL, '\r');
  90         defEntity("#RS", GENERAL, '\n');
  91         defEntity("#SPACE", GENERAL, ' ');
  92         defineElement("unknown", EMPTY, false, true, null, null, null, null);
  93     }
  94 
  95     /**
  96      * Gets the name of the DTD.
  97      * @return the name of the DTD
  98      */
  99     public String getName() {
 100         return name;
 101     }
 102 
 103     /**
 104      * Gets an entity by name.
 105      * @param name  the entity name
 106      * @return the <code>Entity</code> corresponding to the
 107      *   <code>name</code> <code>String</code>
 108      */
 109     public Entity getEntity(String name) {
 110         return entityHash.get(name);
 111     }
 112 
 113     /**
 114      * Gets a character entity.
 115      * @param ch  the character
 116      * @return the <code>Entity</code> corresponding to the
 117      *    <code>ch</code> character
 118      */
 119     public Entity getEntity(int ch) {
 120         return entityHash.get(Integer.valueOf(ch));
 121     }
 122 
 123     /**
 124      * Returns <code>true</code> if the element is part of the DTD,
 125      * otherwise returns <code>false</code>.
 126      *
 127      * @param  name the requested <code>String</code>
 128      * @return <code>true</code> if <code>name</code> exists as
 129      *   part of the DTD, otherwise returns <code>false</code>
 130      */
 131     boolean elementExists(String name) {
 132         return !"unknown".equals(name) && (elementHash.get(name) != null);
 133     }
 134 
 135     /**
 136      * Gets an element by name. A new element is
 137      * created if the element doesn't exist.
 138      *
 139      * @param name the requested <code>String</code>
 140      * @return the <code>Element</code> corresponding to
 141      *   <code>name</code>, which may be newly created
 142      */
 143     public Element getElement(String name) {
 144         Element e = elementHash.get(name);
 145         if (e == null) {
 146             e = new Element(name, elements.size());
 147             elements.addElement(e);
 148             elementHash.put(name, e);
 149         }
 150         return e;
 151     }
 152 
 153     /**
 154      * Gets an element by index.
 155      *
 156      * @param index the requested index
 157      * @return the <code>Element</code> corresponding to
 158      *   <code>index</code>
 159      */
 160     public Element getElement(int index) {
 161         return elements.elementAt(index);
 162     }
 163 
 164     /**
 165      * Defines an entity.  If the <code>Entity</code> specified
 166      * by <code>name</code>, <code>type</code>, and <code>data</code>
 167      * exists, it is returned; otherwise a new <code>Entity</code>
 168      * is created and is returned.
 169      *
 170      * @param name the name of the <code>Entity</code> as a <code>String</code>
 171      * @param type the type of the <code>Entity</code>
 172      * @param data the <code>Entity</code>'s data
 173      * @return the <code>Entity</code> requested or a new <code>Entity</code>
 174      *   if not found
 175      */
 176     public Entity defineEntity(String name, int type, char data[]) {
 177         Entity ent = entityHash.get(name);
 178         if (ent == null) {
 179             ent = new Entity(name, type, data);
 180             entityHash.put(name, ent);
 181             if (((type & GENERAL) != 0) && (data.length == 1)) {
 182                 switch (type & ~GENERAL) {
 183                   case CDATA:
 184                   case SDATA:
 185                       entityHash.put(Integer.valueOf(data[0]), ent);
 186                     break;
 187                 }
 188             }
 189         }
 190         return ent;
 191     }
 192 
 193     /**
 194      * Returns the <code>Element</code> which matches the
 195      * specified parameters.  If one doesn't exist, a new
 196      * one is created and returned.
 197      *
 198      * @param name        the name of the <code>Element</code>
 199      * @param type        the type of the <code>Element</code>
 200      * @param omitStart   <code>true</code> if start should be omitted
 201      * @param omitEnd     <code>true</code> if end should be omitted
 202      * @param content     the <code>ContentModel</code>
 203      * @param exclusions  the set of elements that must not occur inside the element
 204      * @param inclusions  the set of elements that can occur inside the element
 205      * @param atts        the <code>AttributeList</code> specifying the
 206      *                    <code>Element</code>
 207      * @return the <code>Element</code> specified
 208      */
 209     public Element defineElement(String name, int type,
 210                        boolean omitStart, boolean omitEnd, ContentModel content,
 211                        BitSet exclusions, BitSet inclusions, AttributeList atts) {
 212         Element e = getElement(name);
 213         e.type = type;
 214         e.oStart = omitStart;
 215         e.oEnd = omitEnd;
 216         e.content = content;
 217         e.exclusions = exclusions;
 218         e.inclusions = inclusions;
 219         e.atts = atts;
 220         return e;
 221     }
 222 
 223     /**
 224      * Defines attributes for an {@code Element}.
 225      *
 226      * @param name the name of the <code>Element</code>
 227      * @param atts the <code>AttributeList</code> specifying the
 228      *    <code>Element</code>
 229      */
 230     public void defineAttributes(String name, AttributeList atts) {
 231         Element e = getElement(name);
 232         e.atts = atts;
 233     }
 234 
 235     /**
 236      * Creates and returns a character <code>Entity</code>.
 237      * @param name the entity's name
 238      * @param type the entity's type
 239      * @param ch   the entity's value (character)
 240      * @return the new character <code>Entity</code>
 241      */
 242     public Entity defEntity(String name, int type, int ch) {
 243         char data[] = {(char)ch};
 244         return defineEntity(name, type, data);
 245     }
 246 
 247     /**
 248      * Creates and returns an <code>Entity</code>.
 249      * @param name the entity's name
 250      * @param type the entity's type
 251      * @param str  the entity's data section
 252      * @return the new <code>Entity</code>
 253      */
 254     protected Entity defEntity(String name, int type, String str) {
 255         int len = str.length();
 256         char data[] = new char[len];
 257         str.getChars(0, len, data, 0);
 258         return defineEntity(name, type, data);
 259     }
 260 
 261     /**
 262      * Creates and returns an <code>Element</code>.
 263      * @param name        the element's name
 264      * @param type        the element's type
 265      * @param omitStart   {@code true} if the element needs no starting tag
 266      * @param omitEnd     {@code true} if the element needs no closing tag
 267      * @param content     the element's content
 268      * @param exclusions  the elements that must be excluded from the content of the element
 269      * @param inclusions  the elements that can be included as the content of the element
 270      * @param atts        the attributes of the element
 271      * @return the new <code>Element</code>
 272      */
 273     protected Element defElement(String name, int type,
 274                        boolean omitStart, boolean omitEnd, ContentModel content,
 275                        String[] exclusions, String[] inclusions, AttributeList atts) {
 276         BitSet excl = null;
 277         if (exclusions != null && exclusions.length > 0) {
 278             excl = new BitSet();
 279             for (String str : exclusions) {
 280                 if (str.length() > 0) {
 281                     excl.set(getElement(str).getIndex());
 282                 }
 283             }
 284         }
 285         BitSet incl = null;
 286         if (inclusions != null && inclusions.length > 0) {
 287             incl = new BitSet();
 288             for (String str : inclusions) {
 289                 if (str.length() > 0) {
 290                     incl.set(getElement(str).getIndex());
 291                 }
 292             }
 293         }
 294         return defineElement(name, type, omitStart, omitEnd, content, excl, incl, atts);
 295     }
 296 
 297     /**
 298      * Creates and returns an <code>AttributeList</code> responding to a new attribute.
 299      * @param name      the attribute's name
 300      * @param type      the attribute's type
 301      * @param modifier  the attribute's modifier
 302      * @param value     the default value of the attribute
 303      * @param values    the allowed values for the attribute (multiple values could be separated by '|')
 304      * @param atts      the previous attribute of the element; to be placed to {@code AttributeList.next},
 305      *                  creating a linked list
 306      * @return the new <code>AttributeList</code>
 307      */
 308     protected AttributeList defAttributeList(String name, int type, int modifier,
 309                                              String value, String values, AttributeList atts) {
 310         Vector<String> vals = null;
 311         if (values != null) {
 312             vals = new Vector<String>();
 313             for (StringTokenizer s = new StringTokenizer(values, "|") ; s.hasMoreTokens() ;) {
 314                 String str = s.nextToken();
 315                 if (str.length() > 0) {
 316                     vals.addElement(str);
 317                 }
 318             }
 319         }
 320         return new AttributeList(name, type, modifier, value, vals, atts);
 321     }
 322 
 323     /**
 324      * Creates and returns a new content model.
 325      * @param type the type of the new content model
 326      * @param obj  the content of the content model
 327      * @param next pointer to the next content model
 328      * @return the new <code>ContentModel</code>
 329      */
 330     protected ContentModel defContentModel(int type, Object obj, ContentModel next) {
 331         return new ContentModel(type, obj, next);
 332     }
 333 
 334     /**
 335      * Returns a string representation of this DTD.
 336      * @return the string representation of this DTD
 337      */
 338     public String toString() {
 339         return name;
 340     }
 341 
 342     /**
 343      * The hashtable key of DTDs in AppContext.
 344      */
 345     private static final Object DTD_HASH_KEY = new Object();
 346 
 347     public static void putDTDHash(String name, DTD dtd) {
 348         getDtdHash().put(name, dtd);
 349     }
 350 
 351     /**
 352      * Returns a DTD with the specified <code>name</code>.  If
 353      * a DTD with that name doesn't exist, one is created
 354      * and returned.  Any uppercase characters in the name
 355      * are converted to lowercase.
 356      *
 357      * @param name the name of the DTD
 358      * @return the DTD which corresponds to <code>name</code>
 359      * @throws IOException if an I/O error occurs
 360      */
 361     public static DTD getDTD(String name) throws IOException {
 362         name = name.toLowerCase();
 363         DTD dtd = getDtdHash().get(name);
 364         if (dtd == null)
 365           dtd = new DTD(name);
 366 
 367         return dtd;
 368     }
 369 
 370     private static Hashtable<String, DTD> getDtdHash() {
 371         AppContext appContext = AppContext.getAppContext();
 372 
 373         Hashtable<String, DTD> result = (Hashtable<String, DTD>) appContext.get(DTD_HASH_KEY);
 374 
 375         if (result == null) {
 376             result = new Hashtable<String, DTD>();
 377 
 378             appContext.put(DTD_HASH_KEY, result);
 379         }
 380 
 381         return result;
 382     }
 383 
 384     /**
 385      * Recreates a DTD from an archived format.
 386      * @param in  the <code>DataInputStream</code> to read from
 387      * @throws IOException if an I/O error occurs
 388      */
 389     public void read(DataInputStream in) throws IOException {
 390         if (in.readInt() != FILE_VERSION) {
 391         }
 392 
 393         //
 394         // Read the list of names
 395         //
 396         String[] names = new String[in.readShort()];
 397         for (int i = 0; i < names.length; i++) {
 398             names[i] = in.readUTF();
 399         }
 400 
 401 
 402         //
 403         // Read the entities
 404         //
 405         int num = in.readShort();
 406         for (int i = 0; i < num; i++) {
 407             short nameId = in.readShort();
 408             int type = in.readByte();
 409             String name = in.readUTF();
 410             defEntity(names[nameId], type | GENERAL, name);
 411         }
 412 
 413         // Read the elements
 414         //
 415         num = in.readShort();
 416         for (int i = 0; i < num; i++) {
 417             short nameId = in.readShort();
 418             int type = in.readByte();
 419             byte flags = in.readByte();
 420             ContentModel m = readContentModel(in, names);
 421             String[] exclusions = readNameArray(in, names);
 422             String[] inclusions = readNameArray(in, names);
 423             AttributeList atts = readAttributeList(in, names);
 424             defElement(names[nameId], type,
 425                        ((flags & 0x01) != 0), ((flags & 0x02) != 0),
 426                        m, exclusions, inclusions, atts);
 427         }
 428     }
 429 
 430     private ContentModel readContentModel(DataInputStream in, String[] names)
 431                 throws IOException {
 432         byte flag = in.readByte();
 433         switch(flag) {
 434             case 0:             // null
 435                 return null;
 436             case 1: {           // content_c
 437                 int type = in.readByte();
 438                 ContentModel m = readContentModel(in, names);
 439                 ContentModel next = readContentModel(in, names);
 440                 return defContentModel(type, m, next);
 441             }
 442             case 2: {           // content_e
 443                 int type = in.readByte();
 444                 Element el = getElement(names[in.readShort()]);
 445                 ContentModel next = readContentModel(in, names);
 446                 return defContentModel(type, el, next);
 447             }
 448         default:
 449                 throw new IOException("bad bdtd");
 450         }
 451     }
 452 
 453     private String[] readNameArray(DataInputStream in, String[] names)
 454                 throws IOException {
 455         int num = in.readShort();
 456         if (num == 0) {
 457             return null;
 458         }
 459         String[] result = new String[num];
 460         for (int i = 0; i < num; i++) {
 461             result[i] = names[in.readShort()];
 462         }
 463         return result;
 464     }
 465 
 466 
 467     private AttributeList readAttributeList(DataInputStream in, String[] names)
 468                 throws IOException  {
 469         AttributeList result = null;
 470         for (int num = in.readByte(); num > 0; --num) {
 471             short nameId = in.readShort();
 472             int type = in.readByte();
 473             int modifier = in.readByte();
 474             short valueId = in.readShort();
 475             String value = (valueId == -1) ? null : names[valueId];
 476             Vector<String> values = null;
 477             short numValues = in.readShort();
 478             if (numValues > 0) {
 479                 values = new Vector<String>(numValues);
 480                 for (int i = 0; i < numValues; i++) {
 481                     values.addElement(names[in.readShort()]);
 482                 }
 483             }
 484 result = new AttributeList(names[nameId], type, modifier, value,
 485                                        values, result);
 486             // We reverse the order of the linked list by doing this, but
 487             // that order isn't important.
 488         }
 489         return result;
 490     }
 491 
 492 }