1 /*
   2  * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.serializer;
  22 
  23 import java.io.IOException;
  24 import java.util.Properties;
  25 
  26 import javax.xml.transform.Result;
  27 
  28 import org.xml.sax.Attributes;
  29 import org.xml.sax.SAXException;
  30 
  31 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
  32 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
  33 
  34 /**
  35  * This serializer takes a series of SAX or
  36  * SAX-like events and writes its output
  37  * to the given stream.
  38  *
  39  * This class is not a public API, it is public
  40  * because it is used from another package.
  41  *
  42  * @xsl.usage internal
  43  * @LastModified: Sept 2018
  44  */
  45 public final class ToHTMLStream extends ToStream
  46 {
  47 
  48     /** This flag is set while receiving events from the DTD */
  49     protected boolean m_inDTD = false;
  50 
  51     /** True if the previous element is a block element. */
  52     private boolean m_isprevblock = false;
  53 
  54     /**
  55      * Map that tells which XML characters should have special treatment, and it
  56      *  provides character to entity name lookup.
  57      */
  58     private static final CharInfo m_htmlcharInfo =
  59 //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
  60         CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
  61 
  62     /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
  63     static final Trie m_elementFlags = new Trie();
  64 
  65     static {
  66         initTagReference(m_elementFlags);
  67     }
  68     static void initTagReference(Trie m_elementFlags) {
  69 
  70         // HTML 4.0 loose DTD
  71         m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
  72         m_elementFlags.put(
  73             "FRAME",
  74             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  75         m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
  76         m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
  77         m_elementFlags.put(
  78             "ISINDEX",
  79             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  80         m_elementFlags.put(
  81             "APPLET",
  82             new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
  83         m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
  84         m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
  85         m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
  86 
  87         // HTML 4.0 strict DTD
  88         m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  89         m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  90         m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  91         m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  92         m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  93         m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
  94         m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
  95         m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
  96         m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
  97         m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
  98         m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
  99         m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
 100         m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
 101         m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
 102         m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
 103         m_elementFlags.put(
 104             "SUP",
 105             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 106         m_elementFlags.put(
 107             "SUB",
 108             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 109         m_elementFlags.put(
 110             "SPAN",
 111             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 112         m_elementFlags.put(
 113             "BDO",
 114             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 115         m_elementFlags.put(
 116             "BR",
 117             new ElemDesc(
 118                 0
 119                     | ElemDesc.SPECIAL
 120                     | ElemDesc.ASPECIAL
 121                     | ElemDesc.EMPTY
 122                     | ElemDesc.BLOCK));
 123         m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
 124         m_elementFlags.put(
 125             "ADDRESS",
 126             new ElemDesc(
 127                 0
 128                     | ElemDesc.BLOCK
 129                     | ElemDesc.BLOCKFORM
 130                     | ElemDesc.BLOCKFORMFIELDSET));
 131         m_elementFlags.put(
 132             "DIV",
 133             new ElemDesc(
 134                 0
 135                     | ElemDesc.BLOCK
 136                     | ElemDesc.BLOCKFORM
 137                     | ElemDesc.BLOCKFORMFIELDSET));
 138         m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
 139         m_elementFlags.put(
 140             "MAP",
 141             new ElemDesc(
 142                 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
 143         m_elementFlags.put(
 144             "AREA",
 145             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 146         m_elementFlags.put(
 147             "LINK",
 148             new ElemDesc(
 149                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 150         m_elementFlags.put(
 151             "IMG",
 152             new ElemDesc(
 153                 0
 154                     | ElemDesc.SPECIAL
 155                     | ElemDesc.ASPECIAL
 156                     | ElemDesc.EMPTY
 157                     | ElemDesc.WHITESPACESENSITIVE));
 158         m_elementFlags.put(
 159             "OBJECT",
 160             new ElemDesc(
 161                 0
 162                     | ElemDesc.SPECIAL
 163                     | ElemDesc.ASPECIAL
 164                     | ElemDesc.HEADMISC
 165                     | ElemDesc.WHITESPACESENSITIVE));
 166         m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
 167         m_elementFlags.put(
 168             "HR",
 169             new ElemDesc(
 170                 0
 171                     | ElemDesc.BLOCK
 172                     | ElemDesc.BLOCKFORM
 173                     | ElemDesc.BLOCKFORMFIELDSET
 174                     | ElemDesc.EMPTY));
 175         m_elementFlags.put(
 176             "P",
 177             new ElemDesc(
 178                 0
 179                     | ElemDesc.BLOCK
 180                     | ElemDesc.BLOCKFORM
 181                     | ElemDesc.BLOCKFORMFIELDSET));
 182         m_elementFlags.put(
 183             "H1",
 184             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 185         m_elementFlags.put(
 186             "H2",
 187             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 188         m_elementFlags.put(
 189             "H3",
 190             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 191         m_elementFlags.put(
 192             "H4",
 193             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 194         m_elementFlags.put(
 195             "H5",
 196             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 197         m_elementFlags.put(
 198             "H6",
 199             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 200         m_elementFlags.put(
 201             "PRE",
 202             new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
 203         m_elementFlags.put(
 204             "Q",
 205             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 206         m_elementFlags.put(
 207             "BLOCKQUOTE",
 208             new ElemDesc(
 209                 0
 210                     | ElemDesc.BLOCK
 211                     | ElemDesc.BLOCKFORM
 212                     | ElemDesc.BLOCKFORMFIELDSET));
 213         m_elementFlags.put("INS", new ElemDesc(0));
 214         m_elementFlags.put("DEL", new ElemDesc(0));
 215         m_elementFlags.put(
 216             "DL",
 217             new ElemDesc(
 218                 0
 219                     | ElemDesc.BLOCK
 220                     | ElemDesc.BLOCKFORM
 221                     | ElemDesc.BLOCKFORMFIELDSET));
 222         m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
 223         m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
 224         m_elementFlags.put(
 225             "OL",
 226             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 227         m_elementFlags.put(
 228             "UL",
 229             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 230         m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
 231         m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
 232         m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
 233         m_elementFlags.put(
 234             "INPUT",
 235             new ElemDesc(
 236                 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
 237         m_elementFlags.put(
 238             "SELECT",
 239             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 240         m_elementFlags.put("OPTGROUP", new ElemDesc(0));
 241         m_elementFlags.put("OPTION", new ElemDesc(0));
 242         m_elementFlags.put(
 243             "TEXTAREA",
 244             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 245         m_elementFlags.put(
 246             "FIELDSET",
 247             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
 248         m_elementFlags.put("LEGEND", new ElemDesc(0));
 249         m_elementFlags.put(
 250             "BUTTON",
 251             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 252         m_elementFlags.put(
 253             "TABLE",
 254             new ElemDesc(
 255                 0
 256                     | ElemDesc.BLOCK
 257                     | ElemDesc.BLOCKFORM
 258                     | ElemDesc.BLOCKFORMFIELDSET));
 259         m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
 260         m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
 261         m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
 262         m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
 263         m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
 264         m_elementFlags.put(
 265             "COL",
 266             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 267         m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
 268         m_elementFlags.put("TH", new ElemDesc(0));
 269         m_elementFlags.put("TD", new ElemDesc(0));
 270         m_elementFlags.put(
 271             "HEAD",
 272             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
 273         m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
 274         m_elementFlags.put(
 275             "BASE",
 276             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 277         m_elementFlags.put(
 278             "META",
 279             new ElemDesc(
 280                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 281         m_elementFlags.put(
 282             "STYLE",
 283             new ElemDesc(
 284                 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
 285         m_elementFlags.put(
 286             "SCRIPT",
 287             new ElemDesc(
 288                 0
 289                     | ElemDesc.SPECIAL
 290                     | ElemDesc.ASPECIAL
 291                     | ElemDesc.HEADMISC
 292                     | ElemDesc.RAW));
 293         m_elementFlags.put(
 294             "NOSCRIPT",
 295             new ElemDesc(
 296                 0
 297                     | ElemDesc.BLOCK
 298                     | ElemDesc.BLOCKFORM
 299                     | ElemDesc.BLOCKFORMFIELDSET));
 300         m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
 301 
 302         // From "John Ky" <hand@syd.speednet.com.au
 303         // Transitional Document Type Definition ()
 304         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
 305         m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 306 
 307         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
 308         m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 309         m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 310 
 311         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
 312         m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 313 
 314         // From "John Ky" <hand@syd.speednet.com.au
 315         m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 316 
 317         // HTML 4.0, section 16.5
 318         m_elementFlags.put(
 319             "IFRAME",
 320             new ElemDesc(
 321                 0
 322                     | ElemDesc.BLOCK
 323                     | ElemDesc.BLOCKFORM
 324                     | ElemDesc.BLOCKFORMFIELDSET));
 325 
 326         // Netscape 4 extension
 327         m_elementFlags.put(
 328             "LAYER",
 329             new ElemDesc(
 330                 0
 331                     | ElemDesc.BLOCK
 332                     | ElemDesc.BLOCKFORM
 333                     | ElemDesc.BLOCKFORMFIELDSET));
 334         // Netscape 4 extension
 335         m_elementFlags.put(
 336             "ILAYER",
 337             new ElemDesc(
 338                 0
 339                     | ElemDesc.BLOCK
 340                     | ElemDesc.BLOCKFORM
 341                     | ElemDesc.BLOCKFORMFIELDSET));
 342 
 343 
 344         // NOW FOR ATTRIBUTE INFORMATION . . .
 345         ElemDesc elemDesc;
 346 
 347 
 348         // ----------------------------------------------
 349         elemDesc = (ElemDesc) m_elementFlags.get("a");
 350         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 351         elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
 352 
 353         // ----------------------------------------------
 354         elemDesc = (ElemDesc) m_elementFlags.get("area");
 355         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 356         elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
 357 
 358         // ----------------------------------------------
 359         elemDesc = (ElemDesc) m_elementFlags.get("base");
 360         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 361 
 362         // ----------------------------------------------
 363         elemDesc = (ElemDesc) m_elementFlags.get("button");
 364         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 365 
 366         // ----------------------------------------------
 367         elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
 368         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 369 
 370         // ----------------------------------------------
 371         elemDesc = (ElemDesc) m_elementFlags.get("del");
 372         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 373 
 374         // ----------------------------------------------
 375         elemDesc = (ElemDesc) m_elementFlags.get("dir");
 376         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 377 
 378         // ----------------------------------------------
 379 
 380         elemDesc = (ElemDesc) m_elementFlags.get("div");
 381         elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
 382         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 383 
 384         // ----------------------------------------------
 385         elemDesc = (ElemDesc) m_elementFlags.get("dl");
 386         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 387 
 388         // ----------------------------------------------
 389         elemDesc = (ElemDesc) m_elementFlags.get("form");
 390         elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
 391 
 392         // ----------------------------------------------
 393         // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
 394         elemDesc = (ElemDesc) m_elementFlags.get("frame");
 395         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 396         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 397         elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
 398 
 399         // ----------------------------------------------
 400         elemDesc = (ElemDesc) m_elementFlags.get("head");
 401         elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
 402 
 403         // ----------------------------------------------
 404         elemDesc = (ElemDesc) m_elementFlags.get("hr");
 405         elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
 406 
 407         // ----------------------------------------------
 408         // HTML 4.0, section 16.5
 409         elemDesc = (ElemDesc) m_elementFlags.get("iframe");
 410         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 411         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 412 
 413         // ----------------------------------------------
 414         // Netscape 4 extension
 415         elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
 416         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 417 
 418         // ----------------------------------------------
 419         elemDesc = (ElemDesc) m_elementFlags.get("img");
 420         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 421         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 422         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 423         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 424 
 425         // ----------------------------------------------
 426         elemDesc = (ElemDesc) m_elementFlags.get("input");
 427         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 428         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 429         elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
 430         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 431         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 432         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 433 
 434         // ----------------------------------------------
 435         elemDesc = (ElemDesc) m_elementFlags.get("ins");
 436         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 437 
 438         // ----------------------------------------------
 439         // Netscape 4 extension
 440         elemDesc = (ElemDesc) m_elementFlags.get("layer");
 441         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 442 
 443         // ----------------------------------------------
 444         elemDesc = (ElemDesc) m_elementFlags.get("link");
 445         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 446 
 447         // ----------------------------------------------
 448         elemDesc = (ElemDesc) m_elementFlags.get("menu");
 449         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 450 
 451         // ----------------------------------------------
 452         elemDesc = (ElemDesc) m_elementFlags.get("object");
 453         elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
 454         elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
 455         elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
 456         elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
 457         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 458         elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
 459 
 460         // ----------------------------------------------
 461         elemDesc = (ElemDesc) m_elementFlags.get("ol");
 462         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 463 
 464         // ----------------------------------------------
 465         elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
 466         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 467 
 468         // ----------------------------------------------
 469         elemDesc = (ElemDesc) m_elementFlags.get("option");
 470         elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
 471         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 472 
 473         // ----------------------------------------------
 474         elemDesc = (ElemDesc) m_elementFlags.get("q");
 475         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 476 
 477         // ----------------------------------------------
 478         elemDesc = (ElemDesc) m_elementFlags.get("script");
 479         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 480         elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
 481         elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
 482 
 483         // ----------------------------------------------
 484         elemDesc = (ElemDesc) m_elementFlags.get("select");
 485         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 486         elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
 487 
 488         // ----------------------------------------------
 489         elemDesc = (ElemDesc) m_elementFlags.get("table");
 490         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 491 
 492         // ----------------------------------------------
 493         elemDesc = (ElemDesc) m_elementFlags.get("td");
 494         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 495 
 496         // ----------------------------------------------
 497         elemDesc = (ElemDesc) m_elementFlags.get("textarea");
 498         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 499         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 500 
 501         // ----------------------------------------------
 502         elemDesc = (ElemDesc) m_elementFlags.get("th");
 503         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 504 
 505         // ----------------------------------------------
 506         // The nowrap attribute of a tr element is both
 507         // a Netscape and Internet-Explorer extension
 508         elemDesc = (ElemDesc) m_elementFlags.get("tr");
 509         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 510 
 511         // ----------------------------------------------
 512         elemDesc = (ElemDesc) m_elementFlags.get("ul");
 513         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 514     }
 515 
 516     /**
 517      * Dummy element for elements not found.
 518      */
 519     static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
 520 
 521     /** True if URLs should be specially escaped with the %xx form. */
 522     private boolean m_specialEscapeURLs = true;
 523 
 524     /** True if the META tag should be omitted. */
 525     private boolean m_omitMetaTag = false;
 526 
 527     /**
 528      * Tells if the formatter should use special URL escaping.
 529      *
 530      * @param bool True if URLs should be specially escaped with the %xx form.
 531      */
 532     public void setSpecialEscapeURLs(boolean bool)
 533     {
 534         m_specialEscapeURLs = bool;
 535     }
 536 
 537     /**
 538      * Tells if the formatter should omit the META tag.
 539      *
 540      * @param bool True if the META tag should be omitted.
 541      */
 542     public void setOmitMetaTag(boolean bool)
 543     {
 544         m_omitMetaTag = bool;
 545     }
 546 
 547     /**
 548      * Specifies an output format for this serializer. It the
 549      * serializer has already been associated with an output format,
 550      * it will switch to the new format. This method should not be
 551      * called while the serializer is in the process of serializing
 552      * a document.
 553      *
 554      * This method can be called multiple times before starting
 555      * the serialization of a particular result-tree. In principle
 556      * all serialization parameters can be changed, with the exception
 557      * of method="html" (it must be method="html" otherwise we
 558      * shouldn't even have a ToHTMLStream object here!)
 559      *
 560      * @param format The output format or serialzation parameters
 561      * to use.
 562      */
 563     public void setOutputFormat(Properties format)
 564     {
 565 
 566         m_specialEscapeURLs =
 567             OutputPropertyUtils.getBooleanProperty(
 568                 OutputPropertiesFactory.S_USE_URL_ESCAPING,
 569                 format);
 570 
 571         m_omitMetaTag =
 572             OutputPropertyUtils.getBooleanProperty(
 573                 OutputPropertiesFactory.S_OMIT_META_TAG,
 574                 format);
 575 
 576         super.setOutputFormat(format);
 577     }
 578 
 579     /**
 580      * Tells if the formatter should use special URL escaping.
 581      *
 582      * @return True if URLs should be specially escaped with the %xx form.
 583      */
 584     private final boolean getSpecialEscapeURLs()
 585     {
 586         return m_specialEscapeURLs;
 587     }
 588 
 589     /**
 590      * Tells if the formatter should omit the META tag.
 591      *
 592      * @return True if the META tag should be omitted.
 593      */
 594     private final boolean getOmitMetaTag()
 595     {
 596         return m_omitMetaTag;
 597     }
 598 
 599     /**
 600      * Get a description of the given element.
 601      *
 602      * @param name non-null name of element, case insensitive.
 603      *
 604      * @return non-null reference to ElemDesc, which may be m_dummy if no
 605      *         element description matches the given name.
 606      */
 607     public static final ElemDesc getElemDesc(String name)
 608     {
 609         /* this method used to return m_dummy  when name was null
 610          * but now it doesn't check and and requires non-null name.
 611          */
 612         Object obj = m_elementFlags.get(name);
 613         if (null != obj)
 614             return (ElemDesc)obj;
 615         return m_dummy;
 616     }
 617 
 618     /**
 619      * A Trie that is just a copy of the "static" one.
 620      * We need this one to be able to use the faster, but not thread-safe
 621      * method Trie.get2(name)
 622      */
 623     private Trie m_htmlInfo = new Trie(m_elementFlags);
 624     /**
 625      * Calls to this method could be replaced with calls to
 626      * getElemDesc(name), but this one should be faster.
 627      */
 628     private ElemDesc getElemDesc2(String name)
 629     {
 630         Object obj = m_htmlInfo.get2(name);
 631         if (null != obj)
 632             return (ElemDesc)obj;
 633         return m_dummy;
 634     }
 635 
 636     /**
 637      * Default constructor.
 638      */
 639     public ToHTMLStream()
 640     {
 641 
 642         super();
 643         m_charInfo = m_htmlcharInfo;
 644         // initialize namespaces
 645         m_prefixMap = new NamespaceMappings();
 646 
 647     }
 648 
 649     /** The name of the current element. */
 650 //    private String m_currentElementName = null;
 651 
 652     /**
 653      * Receive notification of the beginning of a document.
 654      *
 655      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 656      *            wrapping another exception.
 657      *
 658      * @throws org.xml.sax.SAXException
 659      */
 660     protected void startDocumentInternal() throws org.xml.sax.SAXException
 661     {
 662         super.startDocumentInternal();
 663 
 664         m_needToCallStartDocument = false;
 665         m_needToOutputDocTypeDecl = true;
 666         m_startNewLine = false;
 667         setOmitXMLDeclaration(true);
 668 
 669         if (true == m_needToOutputDocTypeDecl)
 670         {
 671             String doctypeSystem = getDoctypeSystem();
 672             String doctypePublic = getDoctypePublic();
 673             if ((null != doctypeSystem) || (null != doctypePublic))
 674             {
 675                 final java.io.Writer writer = m_writer;
 676                 try
 677                 {
 678                 writer.write("<!DOCTYPE html");
 679 
 680                 if (null != doctypePublic)
 681                 {
 682                     writer.write(" PUBLIC \"");
 683                     writer.write(doctypePublic);
 684                     writer.write('"');
 685                 }
 686 
 687                 if (null != doctypeSystem)
 688                 {
 689                     if (null == doctypePublic)
 690                         writer.write(" SYSTEM \"");
 691                     else
 692                         writer.write(" \"");
 693 
 694                     writer.write(doctypeSystem);
 695                     writer.write('"');
 696                 }
 697 
 698                 writer.write('>');
 699                 outputLineSep();
 700                 }
 701                 catch(IOException e)
 702                 {
 703                     throw new SAXException(e);
 704                 }
 705             }
 706         }
 707 
 708         m_needToOutputDocTypeDecl = false;
 709     }
 710 
 711     /**
 712      * Receive notification of the end of a document.
 713      *
 714      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 715      *            wrapping another exception.
 716      *
 717      * @throws org.xml.sax.SAXException
 718      */
 719     public final void endDocument() throws org.xml.sax.SAXException
 720     {
 721         if (m_doIndent) {
 722             flushCharactersBuffer();
 723         }
 724         flushPending();
 725         if (m_doIndent && !m_isprevtext)
 726         {
 727             try
 728             {
 729             outputLineSep();
 730             }
 731             catch(IOException e)
 732             {
 733                 throw new SAXException(e);
 734             }
 735         }
 736 
 737         flushWriter();
 738         if (m_tracer != null)
 739             super.fireEndDoc();
 740     }
 741 
 742     /**
 743      * If the previous is an inline element, won't insert a new line before the
 744      * text.
 745      *
 746      */
 747     protected boolean shouldIndentForText() {
 748         return super.shouldIndentForText() && m_isprevblock;
 749     }
 750 
 751     /**
 752      * Only check m_doIndent, disregard m_ispreserveSpace.
 753      *
 754      * @return True if the content should be formatted.
 755      */
 756     protected boolean shouldFormatOutput() {
 757         return m_doIndent;
 758     }
 759 
 760     /**
 761      * Receive notification of the beginning of an element.
 762      *
 763      *
 764      * @param namespaceURI
 765      * @param localName
 766      * @param name
 767      *            The element type name.
 768      * @param atts
 769      *            The attributes attached to the element, if any.
 770      * @throws org.xml.sax.SAXException
 771      *             Any SAX exception, possibly wrapping another exception.
 772      * @see #endElement
 773      * @see org.xml.sax.AttributeList
 774      */
 775     public void startElement(
 776         String namespaceURI,
 777         String localName,
 778         String name,
 779         Attributes atts)
 780         throws SAXException
 781     {
 782         if (m_doIndent) {
 783             // will add extra one if having namespace but no matter
 784             m_childNodeNum++;
 785             flushCharactersBuffer();
 786         }
 787         ElemContext elemContext = m_elemContext;
 788 
 789         // clean up any pending things first
 790         if (elemContext.m_startTagOpen)
 791         {
 792             closeStartTag();
 793             elemContext.m_startTagOpen = false;
 794         }
 795         else if (m_cdataTagOpen)
 796         {
 797             closeCDATA();
 798             m_cdataTagOpen = false;
 799         }
 800         else if (m_needToCallStartDocument)
 801         {
 802             startDocumentInternal();
 803             m_needToCallStartDocument = false;
 804         }
 805 
 806 
 807         // if this element has a namespace then treat it like XML
 808         if (null != namespaceURI && namespaceURI.length() > 0)
 809         {
 810             super.startElement(namespaceURI, localName, name, atts);
 811 
 812             return;
 813         }
 814 
 815         try
 816         {
 817             // getElemDesc2(name) is faster than getElemDesc(name)
 818             ElemDesc elemDesc = getElemDesc2(name);
 819             int elemFlags = elemDesc.getFlags();
 820 
 821             // deal with indentation issues first
 822             if (m_doIndent)
 823             {
 824                 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
 825                 if ((elemContext.m_elementName != null)
 826                         // If this element is a block element,
 827                         // or if this is not a block element, then if the
 828                         // previous is neither a text nor an inline
 829                         && (isBlockElement || (!(m_isprevtext || !m_isprevblock))))
 830                 {
 831                     m_startNewLine = true;
 832 
 833                     indent();
 834                 }
 835                 m_isprevblock = isBlockElement;
 836             }
 837 
 838             // save any attributes for later processing
 839             if (atts != null)
 840                 addAttributes(atts);
 841 
 842             m_isprevtext = false;
 843             final java.io.Writer writer = m_writer;
 844             writer.write('<');
 845             writer.write(name);
 846 
 847             if (m_doIndent) {
 848                 m_childNodeNumStack.add(m_childNodeNum);
 849                 m_childNodeNum = 0;
 850             }
 851 
 852             if (m_tracer != null)
 853                 firePseudoAttributes();
 854 
 855             if ((elemFlags & ElemDesc.EMPTY) != 0)
 856             {
 857                 // an optimization for elements which are expected
 858                 // to be empty.
 859                 m_elemContext = elemContext.push();
 860                 /* XSLTC sometimes calls namespaceAfterStartElement()
 861                  * so we need to remember the name
 862                  */
 863                 m_elemContext.m_elementName = name;
 864                 m_elemContext.m_elementDesc = elemDesc;
 865                 return;
 866             }
 867             else
 868             {
 869                 elemContext = elemContext.push(namespaceURI,localName,name);
 870                 m_elemContext = elemContext;
 871                 elemContext.m_elementDesc = elemDesc;
 872                 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
 873 
 874                 // set m_startNewLine for the next element
 875                 if (m_doIndent) {
 876                     // elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(),
 877                     // in this branch m_elemContext.m_elementName is not null
 878                     boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
 879                     if (isBlockElement)
 880                         m_startNewLine = true;
 881                 }
 882             }
 883 
 884 
 885             if ((elemFlags & ElemDesc.HEADELEM) != 0)
 886             {
 887                 // This is the <HEAD> element, do some special processing
 888                 closeStartTag();
 889                 elemContext.m_startTagOpen = false;
 890                 if (!m_omitMetaTag)
 891                 {
 892                     if (m_doIndent)
 893                         indent();
 894                     writer.write(
 895                         "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
 896                     String encoding = getEncoding();
 897                     String encode = Encodings.getMimeEncoding(encoding);
 898                     writer.write(encode);
 899                     writer.write("\">");
 900                 }
 901             }
 902         }
 903         catch (IOException e)
 904         {
 905             throw new SAXException(e);
 906         }
 907     }
 908 
 909     /**
 910      *  Receive notification of the end of an element.
 911      *
 912      *
 913      *  @param namespaceURI
 914      *  @param localName
 915      *  @param name The element type name
 916      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
 917      *             wrapping another exception.
 918      */
 919     public final void endElement(
 920         final String namespaceURI,
 921         final String localName,
 922         final String name)
 923         throws org.xml.sax.SAXException
 924     {
 925         if (m_doIndent) {
 926             flushCharactersBuffer();
 927         }
 928         // deal with any pending issues
 929         if (m_cdataTagOpen)
 930             closeCDATA();
 931 
 932         // if the element has a namespace, treat it like XML, not HTML
 933         if (null != namespaceURI && namespaceURI.length() > 0)
 934         {
 935             super.endElement(namespaceURI, localName, name);
 936 
 937             return;
 938         }
 939 
 940         try
 941         {
 942 
 943             ElemContext elemContext = m_elemContext;
 944             final ElemDesc elemDesc = elemContext.m_elementDesc;
 945             final int elemFlags = elemDesc.getFlags();
 946             final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
 947 
 948             // deal with any indentation issues
 949             if (m_doIndent)
 950             {
 951                 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
 952                 boolean shouldIndent = false;
 953 
 954                 // If this element is a block element,
 955                 // or if this is not a block element, then if the previous is
 956                 // neither a text nor an inline
 957                 if (isBlockElement || (!(m_isprevtext || !m_isprevblock)))
 958                 {
 959                     m_startNewLine = true;
 960                     shouldIndent = true;
 961                 }
 962                 if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext))
 963                     indent(elemContext.m_currentElemDepth - 1);
 964 
 965                 m_isprevblock = isBlockElement;
 966             }
 967 
 968             final java.io.Writer writer = m_writer;
 969             if (!elemContext.m_startTagOpen)
 970             {
 971                 writer.write("</");
 972                 writer.write(name);
 973                 writer.write('>');
 974             }
 975             else
 976             {
 977                 // the start-tag open when this method was called,
 978                 // so we need to process it now.
 979 
 980                 if (m_tracer != null)
 981                     super.fireStartElem(name);
 982 
 983                 // the starting tag was still open when we received this endElement() call
 984                 // so we need to process any gathered attributes NOW, before they go away.
 985                 int nAttrs = m_attributes.getLength();
 986                 if (nAttrs > 0)
 987                 {
 988                     processAttributes(m_writer, nAttrs);
 989                     // clear attributes object for re-use with next element
 990                     m_attributes.clear();
 991                 }
 992                 if (!elemEmpty)
 993                 {
 994                     // As per Dave/Paul recommendation 12/06/2000
 995                     // if (shouldIndent)
 996                     // writer.write('>');
 997                     //  indent(m_currentIndent);
 998 
 999                     writer.write("></");
1000                     writer.write(name);
1001                     writer.write('>');
1002                 }
1003                 else
1004                 {
1005                     writer.write('>');
1006                 }
1007             }
1008 
1009             if (m_doIndent) {
1010                 m_childNodeNum = m_childNodeNumStack.remove(m_childNodeNumStack.size() - 1);
1011                 // clean up because the element has ended
1012                 m_isprevtext = false;
1013             }
1014             // fire off the end element event
1015             if (m_tracer != null)
1016                 super.fireEndElem(name);
1017 
1018             // OPTIMIZE-EMPTY
1019             if (elemEmpty)
1020             {
1021                 // a quick exit if the HTML element had no children.
1022                 // This block of code can be removed if the corresponding block of code
1023                 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
1024                 m_elemContext = elemContext.m_prev;
1025                 return;
1026             }
1027 
1028             // some more clean because the element has ended.
1029             m_elemContext = elemContext.m_prev;
1030 //            m_isRawStack.pop();
1031         }
1032         catch (IOException e)
1033         {
1034             throw new SAXException(e);
1035         }
1036     }
1037 
1038     /**
1039      * Process an attribute.
1040      * @param   writer The writer to write the processed output to.
1041      * @param   name   The name of the attribute.
1042      * @param   value   The value of the attribute.
1043      * @param   elemDesc The description of the HTML element
1044      *           that has this attribute.
1045      *
1046      * @throws org.xml.sax.SAXException
1047      */
1048     protected void processAttribute(
1049         java.io.Writer writer,
1050         String name,
1051         String value,
1052         ElemDesc elemDesc)
1053         throws IOException, SAXException
1054     {
1055         writer.write(' ');
1056 
1057         if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1058             && elemDesc != null
1059             && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1060         {
1061             writer.write(name);
1062         }
1063         else
1064         {
1065             // %REVIEW% %OPT%
1066             // Two calls to single-char write may NOT
1067             // be more efficient than one to string-write...
1068             writer.write(name);
1069             writer.write("=\"");
1070             if (   elemDesc != null
1071                 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1072                 writeAttrURI(writer, value, m_specialEscapeURLs);
1073             else
1074                 writeAttrString(writer, value, this.getEncoding());
1075             writer.write('"');
1076 
1077         }
1078     }
1079 
1080     /**
1081      * Tell if a character is an ASCII digit.
1082      */
1083     private boolean isASCIIDigit(char c)
1084     {
1085         return (c >= '0' && c <= '9');
1086     }
1087 
1088     /**
1089      * Make an integer into an HH hex value.
1090      * Does no checking on the size of the input, since this
1091      * is only meant to be used locally by writeAttrURI.
1092      *
1093      * @param i must be a value less than 255.
1094      *
1095      * @return should be a two character string.
1096      */
1097     private static String makeHHString(int i)
1098     {
1099         String s = Integer.toHexString(i).toUpperCase();
1100         if (s.length() == 1)
1101         {
1102             s = "0" + s;
1103         }
1104         return s;
1105     }
1106 
1107     /**
1108     * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1109     * @param str must be 2 characters long
1110     *
1111     * @return true or false
1112     */
1113     private boolean isHHSign(String str)
1114     {
1115         boolean sign = true;
1116         try
1117         {
1118             char r = (char) Integer.parseInt(str, 16);
1119         }
1120         catch (NumberFormatException e)
1121         {
1122             sign = false;
1123         }
1124         return sign;
1125     }
1126 
1127     /**
1128      * Write the specified <var>string</var> after substituting non ASCII characters,
1129      * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1130      *
1131      * @param   string      String to convert to XML format.
1132      * @param doURLEscaping True if we should try to encode as
1133      *                      per http://www.ietf.org/rfc/rfc2396.txt.
1134      *
1135      * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1136      */
1137     public void writeAttrURI(
1138         final java.io.Writer writer, String string, boolean doURLEscaping)
1139         throws IOException
1140     {
1141         // http://www.ietf.org/rfc/rfc2396.txt says:
1142         // A URI is always in an "escaped" form, since escaping or unescaping a
1143         // completed URI might change its semantics.  Normally, the only time
1144         // escape encodings can safely be made is when the URI is being created
1145         // from its component parts; each component may have its own set of
1146         // characters that are reserved, so only the mechanism responsible for
1147         // generating or interpreting that component can determine whether or
1148         // not escaping a character will change its semantics. Likewise, a URI
1149         // must be separated into its components before the escaped characters
1150         // within those components can be safely decoded.
1151         //
1152         // ...So we do our best to do limited escaping of the URL, without
1153         // causing damage.  If the URL is already properly escaped, in theory, this
1154         // function should not change the string value.
1155 
1156         final int end = string.length();
1157         if (end > m_attrBuff.length)
1158         {
1159            m_attrBuff = new char[end*2 + 1];
1160         }
1161         string.getChars(0,end, m_attrBuff, 0);
1162         final char[] chars = m_attrBuff;
1163 
1164         int cleanStart = 0;
1165         int cleanLength = 0;
1166 
1167 
1168         char ch = 0;
1169         for (int i = 0; i < end; i++)
1170         {
1171             ch = chars[i];
1172 
1173             if ((ch < 32) || (ch > 126))
1174             {
1175                 if (cleanLength > 0)
1176                 {
1177                     writer.write(chars, cleanStart, cleanLength);
1178                     cleanLength = 0;
1179                 }
1180                 if (doURLEscaping)
1181                 {
1182                     // Encode UTF16 to UTF8.
1183                     // Reference is Unicode, A Primer, by Tony Graham.
1184                     // Page 92.
1185 
1186                     // Note that Kay doesn't escape 0x20...
1187                     //  if(ch == 0x20) // Not sure about this... -sb
1188                     //  {
1189                     //    writer.write(ch);
1190                     //  }
1191                     //  else
1192                     if (ch <= 0x7F)
1193                     {
1194                         writer.write('%');
1195                         writer.write(makeHHString(ch));
1196                     }
1197                     else if (ch <= 0x7FF)
1198                     {
1199                         // Clear low 6 bits before rotate, put high 4 bits in low byte,
1200                         // and set two high bits.
1201                         int high = (ch >> 6) | 0xC0;
1202                         int low = (ch & 0x3F) | 0x80;
1203                         // First 6 bits, + high bit
1204                         writer.write('%');
1205                         writer.write(makeHHString(high));
1206                         writer.write('%');
1207                         writer.write(makeHHString(low));
1208                     }
1209                     else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1210                     {
1211                         // I'm sure this can be done in 3 instructions, but I choose
1212                         // to try and do it exactly like it is done in the book, at least
1213                         // until we are sure this is totally clean.  I don't think performance
1214                         // is a big issue with this particular function, though I could be
1215                         // wrong.  Also, the stuff below clearly does more masking than
1216                         // it needs to do.
1217 
1218                         // Clear high 6 bits.
1219                         int highSurrogate = ((int) ch) & 0x03FF;
1220 
1221                         // Middle 4 bits (wwww) + 1
1222                         // "Note that the value of wwww from the high surrogate bit pattern
1223                         // is incremented to make the uuuuu bit pattern in the scalar value
1224                         // so the surrogate pair don't address the BMP."
1225                         int wwww = ((highSurrogate & 0x03C0) >> 6);
1226                         int uuuuu = wwww + 1;
1227 
1228                         // next 4 bits
1229                         int zzzz = (highSurrogate & 0x003C) >> 2;
1230 
1231                         // low 2 bits
1232                         int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1233 
1234                         // Get low surrogate character.
1235                         ch = chars[++i];
1236 
1237                         // Clear high 6 bits.
1238                         int lowSurrogate = ((int) ch) & 0x03FF;
1239 
1240                         // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1241                         yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1242 
1243                         // bottom 6 bits.
1244                         int xxxxxx = (lowSurrogate & 0x003F);
1245 
1246                         int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1247                         int byte2 =
1248                             0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1249                         int byte3 = 0x80 | yyyyyy;
1250                         int byte4 = 0x80 | xxxxxx;
1251 
1252                         writer.write('%');
1253                         writer.write(makeHHString(byte1));
1254                         writer.write('%');
1255                         writer.write(makeHHString(byte2));
1256                         writer.write('%');
1257                         writer.write(makeHHString(byte3));
1258                         writer.write('%');
1259                         writer.write(makeHHString(byte4));
1260                     }
1261                     else
1262                     {
1263                         int high = (ch >> 12) | 0xE0; // top 4 bits
1264                         int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1265                         // middle 6 bits
1266                         int low = (ch & 0x3F) | 0x80;
1267                         // First 6 bits, + high bit
1268                         writer.write('%');
1269                         writer.write(makeHHString(high));
1270                         writer.write('%');
1271                         writer.write(makeHHString(middle));
1272                         writer.write('%');
1273                         writer.write(makeHHString(low));
1274                     }
1275 
1276                 }
1277                 else if (escapingNotNeeded(ch))
1278                 {
1279                     writer.write(ch);
1280                 }
1281                 else
1282                 {
1283                     writer.write("&#");
1284                     writer.write(Integer.toString(ch));
1285                     writer.write(';');
1286                 }
1287                 // In this character range we have first written out any previously accumulated
1288                 // "clean" characters, then processed the current more complicated character,
1289                 // which may have incremented "i".
1290                 // We now we reset the next possible clean character.
1291                 cleanStart = i + 1;
1292             }
1293             // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1294             // not allowing quotes in the URI proper syntax, nor in the fragment
1295             // identifier, we believe that it's OK to double escape quotes.
1296             else if (ch == '"')
1297             {
1298                 // If the character is a '%' number number, try to avoid double-escaping.
1299                 // There is a question if this is legal behavior.
1300 
1301                 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1302                 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1303 
1304                 //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1305 
1306                 // We are no longer escaping '%'
1307 
1308                 if (cleanLength > 0)
1309                 {
1310                     writer.write(chars, cleanStart, cleanLength);
1311                     cleanLength = 0;
1312                 }
1313 
1314 
1315                 // Mike Kay encodes this as ", so he may know something I don't?
1316                 if (doURLEscaping)
1317                     writer.write("%22");
1318                 else
1319                     writer.write("&quot;"); // we have to escape this, I guess.
1320 
1321                 // We have written out any clean characters, then the escaped '%' and now we
1322                 // We now we reset the next possible clean character.
1323                 cleanStart = i + 1;
1324             }
1325             else if (ch == '&')
1326             {
1327                 // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
1328                 // instead of "&" to avoid confusion with the beginning of a character
1329                 // reference (entity reference open delimiter).
1330                 if (cleanLength > 0)
1331                 {
1332                     writer.write(chars, cleanStart, cleanLength);
1333                     cleanLength = 0;
1334                 }
1335                 writer.write("&amp;");
1336                 cleanStart = i + 1;
1337             }
1338             else
1339             {
1340                 // no processing for this character, just count how
1341                 // many characters in a row that we have that need no processing
1342                 cleanLength++;
1343             }
1344         }
1345 
1346         // are there any clean characters at the end of the array
1347         // that we haven't processed yet?
1348         if (cleanLength > 1)
1349         {
1350             // if the whole string can be written out as-is do so
1351             // otherwise write out the clean chars at the end of the
1352             // array
1353             if (cleanStart == 0)
1354                 writer.write(string);
1355             else
1356                 writer.write(chars, cleanStart, cleanLength);
1357         }
1358         else if (cleanLength == 1)
1359         {
1360             // a little optimization for 1 clean character
1361             // (we could have let the previous if(...) handle them all)
1362             writer.write(ch);
1363         }
1364     }
1365 
1366     /**
1367      * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1368      * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1369      *
1370      * @param   string      String to convert to XML format.
1371      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1372      *
1373      * @throws org.xml.sax.SAXException
1374      */
1375     public void writeAttrString(
1376         final java.io.Writer writer, String string, String encoding)
1377         throws IOException, SAXException
1378     {
1379         final int end = string.length();
1380         if (end > m_attrBuff.length)
1381         {
1382             m_attrBuff = new char[end * 2 + 1];
1383         }
1384         string.getChars(0, end, m_attrBuff, 0);
1385         final char[] chars = m_attrBuff;
1386 
1387 
1388 
1389         int cleanStart = 0;
1390         int cleanLength = 0;
1391 
1392         char ch = 0;
1393         for (int i = 0; i < end; i++)
1394         {
1395             ch = chars[i];
1396 
1397             // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1398             // System.out.println("ch: "+(int)ch);
1399             // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1400             // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1401             if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
1402             {
1403                 cleanLength++;
1404             }
1405             else if ('<' == ch || '>' == ch)
1406             {
1407                 cleanLength++; // no escaping in this case, as specified in 15.2
1408             }
1409             else if (
1410                 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1411             {
1412                 cleanLength++; // no escaping in this case, as specified in 15.2
1413             }
1414             else
1415             {
1416                 if (cleanLength > 0)
1417                 {
1418                     writer.write(chars,cleanStart,cleanLength);
1419                     cleanLength = 0;
1420                 }
1421                 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1422 
1423                 if (i != pos)
1424                 {
1425                     i = pos - 1;
1426                 }
1427                 else
1428                 {
1429                     if (Encodings.isHighUTF16Surrogate(ch) ||
1430                             Encodings.isLowUTF16Surrogate(ch))
1431                     {
1432                         if (writeUTF16Surrogate(ch, chars, i, end) >= 0) {
1433                             // move the index if the low surrogate is consumed
1434                             // as writeUTF16Surrogate has written the pair
1435                             if (Encodings.isHighUTF16Surrogate(ch)) {
1436                                 i++;
1437                             }
1438                         }
1439                     }
1440 
1441                     // The next is kind of a hack to keep from escaping in the case
1442                     // of Shift_JIS and the like.
1443 
1444                     /*
1445                     else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1446                     && (ch != 160))
1447                     {
1448                     writer.write(ch);  // no escaping in this case
1449                     }
1450                     else
1451                     */
1452                     String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1453                     if (null != outputStringForChar)
1454                     {
1455                         writer.write(outputStringForChar);
1456                     }
1457                     else if (escapingNotNeeded(ch))
1458                     {
1459                         writer.write(ch); // no escaping in this case
1460                     }
1461                     else
1462                     {
1463                         writer.write("&#");
1464                         writer.write(Integer.toString(ch));
1465                         writer.write(';');
1466                     }
1467                 }
1468                 cleanStart = i + 1;
1469             }
1470         } // end of for()
1471 
1472         // are there any clean characters at the end of the array
1473         // that we haven't processed yet?
1474         if (cleanLength > 1)
1475         {
1476             // if the whole string can be written out as-is do so
1477             // otherwise write out the clean chars at the end of the
1478             // array
1479             if (cleanStart == 0)
1480                 writer.write(string);
1481             else
1482                 writer.write(chars, cleanStart, cleanLength);
1483         }
1484         else if (cleanLength == 1)
1485         {
1486             // a little optimization for 1 clean character
1487             // (we could have let the previous if(...) handle them all)
1488             writer.write(ch);
1489         }
1490     }
1491 
1492 
1493 
1494     /**
1495      * Receive notification of character data.
1496      *
1497      * <p>The Parser will call this method to report each chunk of
1498      * character data.  SAX parsers may return all contiguous character
1499      * data in a single chunk, or they may split it into several
1500      * chunks; however, all of the characters in any single event
1501      * must come from the same external entity, so that the Locator
1502      * provides useful information.</p>
1503      *
1504      * <p>The application must not attempt to read from the array
1505      * outside of the specified range.</p>
1506      *
1507      * <p>Note that some parsers will report whitespace using the
1508      * ignorableWhitespace() method rather than this one (validating
1509      * parsers must do so).</p>
1510      *
1511      * @param chars The characters from the XML document.
1512      * @param start The start position in the array.
1513      * @param length The number of characters to read from the array.
1514      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1515      *            wrapping another exception.
1516      * @see #ignorableWhitespace
1517      * @see org.xml.sax.Locator
1518      *
1519      * @throws org.xml.sax.SAXException
1520      */
1521     public final void characters(char chars[], int start, int length)
1522         throws org.xml.sax.SAXException
1523     {
1524 
1525         if (m_elemContext.m_isRaw)
1526         {
1527             try
1528             {
1529                 if (m_elemContext.m_startTagOpen)
1530                 {
1531                     closeStartTag();
1532                     m_elemContext.m_startTagOpen = false;
1533                 }
1534 
1535 //              With m_ispreserve just set true it looks like shouldIndent()
1536 //              will always return false, so drop any possible indentation.
1537 //              if (shouldIndent())
1538 //                  indent();
1539 
1540                 // writer.write("<![CDATA[");
1541                 // writer.write(chars, start, length);
1542                 writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1543                 m_isprevtext = true;
1544                 // writer.write("]]>");
1545 
1546                 // time to generate characters event
1547                 if (m_tracer != null)
1548                     super.fireCharEvent(chars, start, length);
1549 
1550                 return;
1551             }
1552             catch (IOException ioe)
1553             {
1554                 throw new org.xml.sax.SAXException(
1555                     Utils.messages.createMessage(
1556                         MsgKey.ER_OIERROR,
1557                         null),
1558                     ioe);
1559                 //"IO error", ioe);
1560             }
1561         }
1562         else
1563         {
1564             super.characters(chars, start, length);
1565         }
1566     }
1567 
1568     /**
1569      *  Receive notification of cdata.
1570      *
1571      *  <p>The Parser will call this method to report each chunk of
1572      *  character data.  SAX parsers may return all contiguous character
1573      *  data in a single chunk, or they may split it into several
1574      *  chunks; however, all of the characters in any single event
1575      *  must come from the same external entity, so that the Locator
1576      *  provides useful information.</p>
1577      *
1578      *  <p>The application must not attempt to read from the array
1579      *  outside of the specified range.</p>
1580      *
1581      *  <p>Note that some parsers will report whitespace using the
1582      *  ignorableWhitespace() method rather than this one (validating
1583      *  parsers must do so).</p>
1584      *
1585      *  @param ch The characters from the XML document.
1586      *  @param start The start position in the array.
1587      *  @param length The number of characters to read from the array.
1588      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1589      *             wrapping another exception.
1590      *  @see #ignorableWhitespace
1591      *  @see org.xml.sax.Locator
1592      *
1593      * @throws org.xml.sax.SAXException
1594      */
1595     public final void cdata(char ch[], int start, int length)
1596         throws org.xml.sax.SAXException
1597     {
1598         if ((null != m_elemContext.m_elementName)
1599             && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1600                 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1601         {
1602             try
1603             {
1604                 if (m_elemContext.m_startTagOpen)
1605                 {
1606                     closeStartTag();
1607                     m_elemContext.m_startTagOpen = false;
1608                 }
1609 
1610                 if (shouldIndent())
1611                     indent();
1612 
1613                 // writer.write(ch, start, length);
1614                 writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1615             }
1616             catch (IOException ioe)
1617             {
1618                 throw new org.xml.sax.SAXException(
1619                     Utils.messages.createMessage(
1620                         MsgKey.ER_OIERROR,
1621                         null),
1622                     ioe);
1623                 //"IO error", ioe);
1624             }
1625         }
1626         else
1627         {
1628             super.cdata(ch, start, length);
1629         }
1630     }
1631 
1632     /**
1633      *  Receive notification of a processing instruction.
1634      *
1635      *  @param target The processing instruction target.
1636      *  @param data The processing instruction data, or null if
1637      *         none was supplied.
1638      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1639      *             wrapping another exception.
1640      *
1641      * @throws org.xml.sax.SAXException
1642      */
1643     public void processingInstruction(String target, String data)
1644         throws org.xml.sax.SAXException
1645     {
1646         if (m_doIndent) {
1647             m_childNodeNum++;
1648             flushCharactersBuffer();
1649         }
1650         // Process any pending starDocument and startElement first.
1651         flushPending();
1652 
1653         // Use a fairly nasty hack to tell if the next node is supposed to be
1654         // unescaped text.
1655         if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1656         {
1657             startNonEscaping();
1658         }
1659         else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1660         {
1661             endNonEscaping();
1662         }
1663         else
1664         {
1665             try
1666             {
1667             if (m_elemContext.m_startTagOpen)
1668             {
1669                 closeStartTag();
1670                 m_elemContext.m_startTagOpen = false;
1671             }
1672             else if (m_needToCallStartDocument)
1673                 startDocumentInternal();
1674 
1675             if (shouldIndent())
1676                 indent();
1677 
1678             final java.io.Writer writer = m_writer;
1679             //writer.write("<?" + target);
1680             writer.write("<?");
1681             writer.write(target);
1682 
1683             if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1684                 writer.write(' ');
1685 
1686             //writer.write(data + ">"); // different from XML
1687             writer.write(data); // different from XML
1688             writer.write('>'); // different from XML
1689 
1690             // Always output a newline char if not inside of an
1691             // element. The whitespace is not significant in that
1692             // case.
1693             if (m_elemContext.m_currentElemDepth <= 0)
1694                 outputLineSep();
1695 
1696             m_startNewLine = true;
1697             }
1698             catch(IOException e)
1699             {
1700                 throw new SAXException(e);
1701             }
1702         }
1703 
1704         // now generate the PI event
1705         if (m_tracer != null)
1706             super.fireEscapingEvent(target, data);
1707      }
1708 
1709     /**
1710      * Receive notivication of a entityReference.
1711      *
1712      * @param name non-null reference to entity name string.
1713      *
1714      * @throws org.xml.sax.SAXException
1715      */
1716     public final void entityReference(String name)
1717         throws org.xml.sax.SAXException
1718     {
1719         try
1720         {
1721 
1722         final java.io.Writer writer = m_writer;
1723         writer.write('&');
1724         writer.write(name);
1725         writer.write(';');
1726 
1727         } catch(IOException e)
1728         {
1729             throw new SAXException(e);
1730         }
1731     }
1732     /**
1733      * @see ExtendedContentHandler#endElement(String)
1734      */
1735     public final void endElement(String elemName) throws SAXException
1736     {
1737         endElement(null, null, elemName);
1738     }
1739 
1740     /**
1741      * Process the attributes, which means to write out the currently
1742      * collected attributes to the writer. The attributes are not
1743      * cleared by this method
1744      *
1745      * @param writer the writer to write processed attributes to.
1746      * @param nAttrs the number of attributes in m_attributes
1747      * to be processed
1748      *
1749      * @throws org.xml.sax.SAXException
1750      */
1751     public void processAttributes(java.io.Writer writer, int nAttrs)
1752         throws IOException,SAXException
1753     {
1754             /*
1755              * process the collected attributes
1756              */
1757             for (int i = 0; i < nAttrs; i++)
1758             {
1759                 processAttribute(
1760                     writer,
1761                     m_attributes.getQName(i),
1762                     m_attributes.getValue(i),
1763                     m_elemContext.m_elementDesc);
1764             }
1765     }
1766 
1767     /**
1768      * For the enclosing elements starting tag write out out any attributes
1769      * followed by ">"
1770      *
1771      *@throws org.xml.sax.SAXException
1772      */
1773     protected void closeStartTag() throws SAXException
1774     {
1775             try
1776             {
1777 
1778             // finish processing attributes, time to fire off the start element event
1779             if (m_tracer != null)
1780                 super.fireStartElem(m_elemContext.m_elementName);
1781 
1782             int nAttrs = m_attributes.getLength();
1783             if (nAttrs>0)
1784             {
1785                 processAttributes(m_writer, nAttrs);
1786                 // clear attributes object for re-use with next element
1787                 m_attributes.clear();
1788             }
1789 
1790             m_writer.write('>');
1791 
1792             /* whether Xalan or XSLTC, we have the prefix mappings now, so
1793              * lets determine if the current element is specified in the cdata-
1794              * section-elements list.
1795              */
1796             if (m_StringOfCDATASections != null)
1797                 m_elemContext.m_isCdataSection = isCdataSection();
1798 
1799             }
1800             catch(IOException e)
1801             {
1802                 throw new SAXException(e);
1803             }
1804     }
1805 
1806         /**
1807          * This method is used when a prefix/uri namespace mapping
1808          * is indicated after the element was started with a
1809          * startElement() and before and endElement().
1810          * startPrefixMapping(prefix,uri) would be used before the
1811          * startElement() call.
1812          * @param uri the URI of the namespace
1813          * @param prefix the prefix associated with the given URI.
1814          *
1815          * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1816          */
1817         public void namespaceAfterStartElement(String prefix, String uri)
1818             throws SAXException
1819         {
1820             // hack for XSLTC with finding URI for default namespace
1821             if (m_elemContext.m_elementURI == null)
1822             {
1823                 String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1824                 if (prefix1 == null && EMPTYSTRING.equals(prefix))
1825                 {
1826                     // the elements URI is not known yet, and it
1827                     // doesn't have a prefix, and we are currently
1828                     // setting the uri for prefix "", so we have
1829                     // the uri for the element... lets remember it
1830                     m_elemContext.m_elementURI = uri;
1831                 }
1832             }
1833             startPrefixMapping(prefix,uri,false);
1834         }
1835 
1836     public void startDTD(String name, String publicId, String systemId)
1837         throws SAXException
1838     {
1839         m_inDTD = true;
1840         super.startDTD(name, publicId, systemId);
1841     }
1842 
1843     /**
1844      * Report the end of DTD declarations.
1845      * @throws org.xml.sax.SAXException The application may raise an exception.
1846      * @see #startDTD
1847      */
1848     public void endDTD() throws org.xml.sax.SAXException
1849     {
1850         m_inDTD = false;
1851         /* for ToHTMLStream the DOCTYPE is entirely output in the
1852          * startDocumentInternal() method, so don't do anything here
1853          */
1854     }
1855     /**
1856      * This method does nothing.
1857      */
1858     public void attributeDecl(
1859         String eName,
1860         String aName,
1861         String type,
1862         String valueDefault,
1863         String value)
1864         throws SAXException
1865     {
1866         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1867     }
1868 
1869     /**
1870      * This method does nothing.
1871      */
1872     public void elementDecl(String name, String model) throws SAXException
1873     {
1874         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1875     }
1876     /**
1877      * This method does nothing.
1878      */
1879     public void internalEntityDecl(String name, String value)
1880         throws SAXException
1881     {
1882         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1883     }
1884     /**
1885      * This method does nothing.
1886      */
1887     public void externalEntityDecl(
1888         String name,
1889         String publicId,
1890         String systemId)
1891         throws SAXException
1892     {
1893         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1894     }
1895 
1896     /**
1897      * This method is used to add an attribute to the currently open element.
1898      * The caller has guaranted that this attribute is unique, which means that it
1899      * not been seen before and will not be seen again.
1900      *
1901      * @param name the qualified name of the attribute
1902      * @param value the value of the attribute which can contain only
1903      * ASCII printable characters characters in the range 32 to 127 inclusive.
1904      * @param flags the bit values of this integer give optimization information.
1905      */
1906     public void addUniqueAttribute(String name, String value, int flags)
1907         throws SAXException
1908     {
1909         try
1910         {
1911             final java.io.Writer writer = m_writer;
1912             if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1913             {
1914                 // "flags" has indicated that the characters
1915                 // '>'  '<'   '&'  and '"' are not in the value and
1916                 // m_htmlcharInfo has recorded that there are no other
1917                 // entities in the range 0 to 127 so we write out the
1918                 // value directly
1919                 writer.write(' ');
1920                 writer.write(name);
1921                 writer.write("=\"");
1922                 writer.write(value);
1923                 writer.write('"');
1924             }
1925             else if (
1926                 (flags & HTML_ATTREMPTY) > 0
1927                     && (value.length() == 0 || value.equalsIgnoreCase(name)))
1928             {
1929                 writer.write(' ');
1930                 writer.write(name);
1931             }
1932             else
1933             {
1934                 writer.write(' ');
1935                 writer.write(name);
1936                 writer.write("=\"");
1937                 if ((flags & HTML_ATTRURL) > 0)
1938                 {
1939                     writeAttrURI(writer, value, m_specialEscapeURLs);
1940                 }
1941                 else
1942                 {
1943                     writeAttrString(writer, value, this.getEncoding());
1944                 }
1945                 writer.write('"');
1946             }
1947         } catch (IOException e) {
1948             throw new SAXException(e);
1949         }
1950     }
1951 
1952     public void comment(char ch[], int start, int length)
1953             throws SAXException
1954     {
1955         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1956         if (m_inDTD)
1957             return;
1958         super.comment(ch, start, length);
1959     }
1960 
1961     public boolean reset()
1962     {
1963         boolean ret = super.reset();
1964         if (!ret)
1965             return false;
1966         initToHTMLStream();
1967         return true;
1968     }
1969 
1970     private void initToHTMLStream()
1971     {
1972         m_isprevblock = false;
1973         m_inDTD = false;
1974         m_omitMetaTag = false;
1975         m_specialEscapeURLs = true;
1976     }
1977 
1978     static class Trie
1979     {
1980         /**
1981          * A digital search trie for 7-bit ASCII text
1982          * The API is a subset of java.util.Hashtable
1983          * The key must be a 7-bit ASCII string
1984          * The value may be any Java Object
1985          * One can get an object stored in a trie from its key,
1986          * but the search is either case sensitive or case
1987          * insensitive to the characters in the key, and this
1988          * choice of sensitivity or insensitivity is made when
1989          * the Trie is created, before any objects are put in it.
1990          *
1991          * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
1992          * It exists to cut the serializers dependancy on that package.
1993          *
1994          * @xsl.usage internal
1995          */
1996 
1997         /** Size of the m_nextChar array.  */
1998         public static final int ALPHA_SIZE = 128;
1999 
2000         /** The root node of the tree.    */
2001         final Node m_Root;
2002 
2003         /** helper buffer to convert Strings to char arrays */
2004         private char[] m_charBuffer = new char[0];
2005 
2006         /** true if the search for an object is lower case only with the key */
2007         private final boolean m_lowerCaseOnly;
2008 
2009         /**
2010          * Construct the trie that has a case insensitive search.
2011          */
2012         public Trie()
2013         {
2014             m_Root = new Node();
2015             m_lowerCaseOnly = false;
2016         }
2017 
2018         /**
2019          * Construct the trie given the desired case sensitivity with the key.
2020          * @param lowerCaseOnly true if the search keys are to be loser case only,
2021          * not case insensitive.
2022          */
2023         public Trie(boolean lowerCaseOnly)
2024         {
2025             m_Root = new Node();
2026             m_lowerCaseOnly = lowerCaseOnly;
2027         }
2028 
2029         /**
2030          * Put an object into the trie for lookup.
2031          *
2032          * @param key must be a 7-bit ASCII string
2033          * @param value any java object.
2034          *
2035          * @return The old object that matched key, or null.
2036          */
2037         public Object put(String key, Object value)
2038         {
2039 
2040             final int len = key.length();
2041             if (len > m_charBuffer.length)
2042             {
2043                 // make the biggest buffer ever needed in get(String)
2044                 m_charBuffer = new char[len];
2045             }
2046 
2047             Node node = m_Root;
2048 
2049             for (int i = 0; i < len; i++)
2050             {
2051                 Node nextNode =
2052                     node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2053 
2054                 if (nextNode != null)
2055                 {
2056                     node = nextNode;
2057                 }
2058                 else
2059                 {
2060                     for (; i < len; i++)
2061                     {
2062                         Node newNode = new Node();
2063                         if (m_lowerCaseOnly)
2064                         {
2065                             // put this value into the tree only with a lower case key
2066                             node.m_nextChar[Character.toLowerCase(
2067                                 key.charAt(i))] =
2068                                 newNode;
2069                         }
2070                         else
2071                         {
2072                             // put this value into the tree with a case insensitive key
2073                             node.m_nextChar[Character.toUpperCase(
2074                                 key.charAt(i))] =
2075                                 newNode;
2076                             node.m_nextChar[Character.toLowerCase(
2077                                 key.charAt(i))] =
2078                                 newNode;
2079                         }
2080                         node = newNode;
2081                     }
2082                     break;
2083                 }
2084             }
2085 
2086             Object ret = node.m_Value;
2087 
2088             node.m_Value = value;
2089 
2090             return ret;
2091         }
2092 
2093         /**
2094          * Get an object that matches the key.
2095          *
2096          * @param key must be a 7-bit ASCII string
2097          *
2098          * @return The object that matches the key, or null.
2099          */
2100         public Object get(final String key)
2101         {
2102 
2103             final int len = key.length();
2104 
2105             /* If the name is too long, we won't find it, this also keeps us
2106              * from overflowing m_charBuffer
2107              */
2108             if (m_charBuffer.length < len)
2109                 return null;
2110 
2111             Node node = m_Root;
2112             switch (len) // optimize the look up based on the number of chars
2113             {
2114                 // case 0 looks silly, but the generated bytecode runs
2115                 // faster for lookup of elements of length 2 with this in
2116                 // and a fair bit faster.  Don't know why.
2117                 case 0 :
2118                     {
2119                         return null;
2120                     }
2121 
2122                 case 1 :
2123                     {
2124                         final char ch = key.charAt(0);
2125                         if (ch < ALPHA_SIZE)
2126                         {
2127                             node = node.m_nextChar[ch];
2128                             if (node != null)
2129                                 return node.m_Value;
2130                         }
2131                         return null;
2132                     }
2133                     //                comment out case 2 because the default is faster
2134                     //                case 2 :
2135                     //                    {
2136                     //                        final char ch0 = key.charAt(0);
2137                     //                        final char ch1 = key.charAt(1);
2138                     //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2139                     //                        {
2140                     //                            node = node.m_nextChar[ch0];
2141                     //                            if (node != null)
2142                     //                            {
2143                     //
2144                     //                                if (ch1 < ALPHA_SIZE)
2145                     //                                {
2146                     //                                    node = node.m_nextChar[ch1];
2147                     //                                    if (node != null)
2148                     //                                        return node.m_Value;
2149                     //                                }
2150                     //                            }
2151                     //                        }
2152                     //                        return null;
2153                     //                   }
2154                 default :
2155                     {
2156                         for (int i = 0; i < len; i++)
2157                         {
2158                             // A thread-safe way to loop over the characters
2159                             final char ch = key.charAt(i);
2160                             if (ALPHA_SIZE <= ch)
2161                             {
2162                                 // the key is not 7-bit ASCII so we won't find it here
2163                                 return null;
2164                             }
2165 
2166                             node = node.m_nextChar[ch];
2167                             if (node == null)
2168                                 return null;
2169                         }
2170 
2171                         return node.m_Value;
2172                     }
2173             }
2174         }
2175 
2176         /**
2177          * The node representation for the trie.
2178          * @xsl.usage internal
2179          */
2180         private class Node
2181         {
2182 
2183             /**
2184              * Constructor, creates a Node[ALPHA_SIZE].
2185              */
2186             Node()
2187             {
2188                 m_nextChar = new Node[ALPHA_SIZE];
2189                 m_Value = null;
2190             }
2191 
2192             /** The next nodes.   */
2193             final Node m_nextChar[];
2194 
2195             /** The value.   */
2196             Object m_Value;
2197         }
2198         /**
2199          * Construct the trie from another Trie.
2200          * Both the existing Trie and this new one share the same table for
2201          * lookup, and it is assumed that the table is fully populated and
2202          * not changing anymore.
2203          *
2204          * @param existingTrie the Trie that this one is a copy of.
2205          */
2206         public Trie(Trie existingTrie)
2207         {
2208             // copy some fields from the existing Trie into this one.
2209             m_Root = existingTrie.m_Root;
2210             m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2211 
2212             // get a buffer just big enough to hold the longest key in the table.
2213             int max = existingTrie.getLongestKeyLength();
2214             m_charBuffer = new char[max];
2215         }
2216 
2217         /**
2218          * Get an object that matches the key.
2219          * This method is faster than get(), but is not thread-safe.
2220          *
2221          * @param key must be a 7-bit ASCII string
2222          *
2223          * @return The object that matches the key, or null.
2224          */
2225         public Object get2(final String key)
2226         {
2227 
2228             final int len = key.length();
2229 
2230             /* If the name is too long, we won't find it, this also keeps us
2231              * from overflowing m_charBuffer
2232              */
2233             if (m_charBuffer.length < len)
2234                 return null;
2235 
2236             Node node = m_Root;
2237             switch (len) // optimize the look up based on the number of chars
2238             {
2239                 // case 0 looks silly, but the generated bytecode runs
2240                 // faster for lookup of elements of length 2 with this in
2241                 // and a fair bit faster.  Don't know why.
2242                 case 0 :
2243                     {
2244                         return null;
2245                     }
2246 
2247                 case 1 :
2248                     {
2249                         final char ch = key.charAt(0);
2250                         if (ch < ALPHA_SIZE)
2251                         {
2252                             node = node.m_nextChar[ch];
2253                             if (node != null)
2254                                 return node.m_Value;
2255                         }
2256                         return null;
2257                     }
2258                 default :
2259                     {
2260                         /* Copy string into array. This is not thread-safe because
2261                          * it modifies the contents of m_charBuffer. If multiple
2262                          * threads were to use this Trie they all would be
2263                          * using this same array (not good). So this
2264                          * method is not thread-safe, but it is faster because
2265                          * converting to a char[] and looping over elements of
2266                          * the array is faster than a String's charAt(i).
2267                          */
2268                         key.getChars(0, len, m_charBuffer, 0);
2269 
2270                         for (int i = 0; i < len; i++)
2271                         {
2272                             final char ch = m_charBuffer[i];
2273                             if (ALPHA_SIZE <= ch)
2274                             {
2275                                 // the key is not 7-bit ASCII so we won't find it here
2276                                 return null;
2277                             }
2278 
2279                             node = node.m_nextChar[ch];
2280                             if (node == null)
2281                                 return null;
2282                         }
2283 
2284                         return node.m_Value;
2285                     }
2286             }
2287         }
2288 
2289         /**
2290          * Get the length of the longest key used in the table.
2291          */
2292         public int getLongestKeyLength()
2293         {
2294             return m_charBuffer.length;
2295         }
2296     }
2297 }