1 /*
   2  * Copyright (c) 2014, 2016 Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.serializer;
  22 
  23 import java.io.IOException;
  24 import java.io.OutputStream;
  25 import java.io.UnsupportedEncodingException;
  26 import java.util.Properties;
  27 
  28 import javax.xml.transform.Result;
  29 
  30 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
  31 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
  32 import org.xml.sax.Attributes;
  33 import org.xml.sax.SAXException;
  34 
  35 /**
  36  * This serializer takes a series of SAX or
  37  * SAX-like events and writes its output
  38  * to the given stream.
  39  *
  40  * This class is not a public API, it is public
  41  * because it is used from another package.
  42  *
  43  * @xsl.usage internal
  44  */
  45 public final class ToHTMLStream extends ToStream
  46 {
  47 
  48     /** This flag is set while receiving events from the DTD */
  49     protected boolean m_inDTD = false;
  50 
  51     /** True if the previous element is a block element. */
  52     private boolean m_isprevblock = false;
  53 
  54     /**
  55      * Map that tells which XML characters should have special treatment, and it
  56      *  provides character to entity name lookup.
  57      */
  58     private static final CharInfo m_htmlcharInfo =
  59 //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
  60         CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
  61 
  62     /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
  63     static final Trie m_elementFlags = new Trie();
  64 
  65     static {
  66         initTagReference(m_elementFlags);
  67     }
  68     static void initTagReference(Trie m_elementFlags) {
  69 
  70         // HTML 4.0 loose DTD
  71         m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
  72         m_elementFlags.put(
  73             "FRAME",
  74             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  75         m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
  76         m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
  77         m_elementFlags.put(
  78             "ISINDEX",
  79             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  80         m_elementFlags.put(
  81             "APPLET",
  82             new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
  83         m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
  84         m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
  85         m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
  86 
  87         // HTML 4.0 strict DTD
  88         m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  89         m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  90         m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  91         m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  92         m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  93         m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
  94         m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
  95         m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
  96         m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
  97         m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
  98         m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
  99         m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
 100         m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
 101         m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
 102         m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
 103         m_elementFlags.put(
 104             "SUP",
 105             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 106         m_elementFlags.put(
 107             "SUB",
 108             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 109         m_elementFlags.put(
 110             "SPAN",
 111             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 112         m_elementFlags.put(
 113             "BDO",
 114             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 115         m_elementFlags.put(
 116             "BR",
 117             new ElemDesc(
 118                 0
 119                     | ElemDesc.SPECIAL
 120                     | ElemDesc.ASPECIAL
 121                     | ElemDesc.EMPTY
 122                     | ElemDesc.BLOCK));
 123         m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
 124         m_elementFlags.put(
 125             "ADDRESS",
 126             new ElemDesc(
 127                 0
 128                     | ElemDesc.BLOCK
 129                     | ElemDesc.BLOCKFORM
 130                     | ElemDesc.BLOCKFORMFIELDSET));
 131         m_elementFlags.put(
 132             "DIV",
 133             new ElemDesc(
 134                 0
 135                     | ElemDesc.BLOCK
 136                     | ElemDesc.BLOCKFORM
 137                     | ElemDesc.BLOCKFORMFIELDSET));
 138         m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
 139         m_elementFlags.put(
 140             "MAP",
 141             new ElemDesc(
 142                 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
 143         m_elementFlags.put(
 144             "AREA",
 145             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 146         m_elementFlags.put(
 147             "LINK",
 148             new ElemDesc(
 149                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 150         m_elementFlags.put(
 151             "IMG",
 152             new ElemDesc(
 153                 0
 154                     | ElemDesc.SPECIAL
 155                     | ElemDesc.ASPECIAL
 156                     | ElemDesc.EMPTY
 157                     | ElemDesc.WHITESPACESENSITIVE));
 158         m_elementFlags.put(
 159             "OBJECT",
 160             new ElemDesc(
 161                 0
 162                     | ElemDesc.SPECIAL
 163                     | ElemDesc.ASPECIAL
 164                     | ElemDesc.HEADMISC
 165                     | ElemDesc.WHITESPACESENSITIVE));
 166         m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
 167         m_elementFlags.put(
 168             "HR",
 169             new ElemDesc(
 170                 0
 171                     | ElemDesc.BLOCK
 172                     | ElemDesc.BLOCKFORM
 173                     | ElemDesc.BLOCKFORMFIELDSET
 174                     | ElemDesc.EMPTY));
 175         m_elementFlags.put(
 176             "P",
 177             new ElemDesc(
 178                 0
 179                     | ElemDesc.BLOCK
 180                     | ElemDesc.BLOCKFORM
 181                     | ElemDesc.BLOCKFORMFIELDSET));
 182         m_elementFlags.put(
 183             "H1",
 184             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 185         m_elementFlags.put(
 186             "H2",
 187             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 188         m_elementFlags.put(
 189             "H3",
 190             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 191         m_elementFlags.put(
 192             "H4",
 193             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 194         m_elementFlags.put(
 195             "H5",
 196             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 197         m_elementFlags.put(
 198             "H6",
 199             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 200         m_elementFlags.put(
 201             "PRE",
 202             new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
 203         m_elementFlags.put(
 204             "Q",
 205             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 206         m_elementFlags.put(
 207             "BLOCKQUOTE",
 208             new ElemDesc(
 209                 0
 210                     | ElemDesc.BLOCK
 211                     | ElemDesc.BLOCKFORM
 212                     | ElemDesc.BLOCKFORMFIELDSET));
 213         m_elementFlags.put("INS", new ElemDesc(0));
 214         m_elementFlags.put("DEL", new ElemDesc(0));
 215         m_elementFlags.put(
 216             "DL",
 217             new ElemDesc(
 218                 0
 219                     | ElemDesc.BLOCK
 220                     | ElemDesc.BLOCKFORM
 221                     | ElemDesc.BLOCKFORMFIELDSET));
 222         m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
 223         m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
 224         m_elementFlags.put(
 225             "OL",
 226             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 227         m_elementFlags.put(
 228             "UL",
 229             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 230         m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
 231         m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
 232         m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
 233         m_elementFlags.put(
 234             "INPUT",
 235             new ElemDesc(
 236                 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
 237         m_elementFlags.put(
 238             "SELECT",
 239             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 240         m_elementFlags.put("OPTGROUP", new ElemDesc(0));
 241         m_elementFlags.put("OPTION", new ElemDesc(0));
 242         m_elementFlags.put(
 243             "TEXTAREA",
 244             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 245         m_elementFlags.put(
 246             "FIELDSET",
 247             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
 248         m_elementFlags.put("LEGEND", new ElemDesc(0));
 249         m_elementFlags.put(
 250             "BUTTON",
 251             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 252         m_elementFlags.put(
 253             "TABLE",
 254             new ElemDesc(
 255                 0
 256                     | ElemDesc.BLOCK
 257                     | ElemDesc.BLOCKFORM
 258                     | ElemDesc.BLOCKFORMFIELDSET));
 259         m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
 260         m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
 261         m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
 262         m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
 263         m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
 264         m_elementFlags.put(
 265             "COL",
 266             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 267         m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
 268         m_elementFlags.put("TH", new ElemDesc(0));
 269         m_elementFlags.put("TD", new ElemDesc(0));
 270         m_elementFlags.put(
 271             "HEAD",
 272             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
 273         m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
 274         m_elementFlags.put(
 275             "BASE",
 276             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 277         m_elementFlags.put(
 278             "META",
 279             new ElemDesc(
 280                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 281         m_elementFlags.put(
 282             "STYLE",
 283             new ElemDesc(
 284                 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
 285         m_elementFlags.put(
 286             "SCRIPT",
 287             new ElemDesc(
 288                 0
 289                     | ElemDesc.SPECIAL
 290                     | ElemDesc.ASPECIAL
 291                     | ElemDesc.HEADMISC
 292                     | ElemDesc.RAW));
 293         m_elementFlags.put(
 294             "NOSCRIPT",
 295             new ElemDesc(
 296                 0
 297                     | ElemDesc.BLOCK
 298                     | ElemDesc.BLOCKFORM
 299                     | ElemDesc.BLOCKFORMFIELDSET));
 300         m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
 301 
 302         // From "John Ky" <hand@syd.speednet.com.au
 303         // Transitional Document Type Definition ()
 304         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
 305         m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 306 
 307         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
 308         m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 309         m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 310 
 311         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
 312         m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 313 
 314         // From "John Ky" <hand@syd.speednet.com.au
 315         m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 316 
 317         // HTML 4.0, section 16.5
 318         m_elementFlags.put(
 319             "IFRAME",
 320             new ElemDesc(
 321                 0
 322                     | ElemDesc.BLOCK
 323                     | ElemDesc.BLOCKFORM
 324                     | ElemDesc.BLOCKFORMFIELDSET));
 325 
 326         // Netscape 4 extension
 327         m_elementFlags.put(
 328             "LAYER",
 329             new ElemDesc(
 330                 0
 331                     | ElemDesc.BLOCK
 332                     | ElemDesc.BLOCKFORM
 333                     | ElemDesc.BLOCKFORMFIELDSET));
 334         // Netscape 4 extension
 335         m_elementFlags.put(
 336             "ILAYER",
 337             new ElemDesc(
 338                 0
 339                     | ElemDesc.BLOCK
 340                     | ElemDesc.BLOCKFORM
 341                     | ElemDesc.BLOCKFORMFIELDSET));
 342 
 343 
 344         // NOW FOR ATTRIBUTE INFORMATION . . .
 345         ElemDesc elemDesc;
 346 
 347 
 348         // ----------------------------------------------
 349         elemDesc = (ElemDesc) m_elementFlags.get("a");
 350         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 351         elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
 352 
 353         // ----------------------------------------------
 354         elemDesc = (ElemDesc) m_elementFlags.get("area");
 355         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 356         elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
 357 
 358         // ----------------------------------------------
 359         elemDesc = (ElemDesc) m_elementFlags.get("base");
 360         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 361 
 362         // ----------------------------------------------
 363         elemDesc = (ElemDesc) m_elementFlags.get("button");
 364         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 365 
 366         // ----------------------------------------------
 367         elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
 368         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 369 
 370         // ----------------------------------------------
 371         elemDesc = (ElemDesc) m_elementFlags.get("del");
 372         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 373 
 374         // ----------------------------------------------
 375         elemDesc = (ElemDesc) m_elementFlags.get("dir");
 376         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 377 
 378         // ----------------------------------------------
 379 
 380         elemDesc = (ElemDesc) m_elementFlags.get("div");
 381         elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
 382         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 383 
 384         // ----------------------------------------------
 385         elemDesc = (ElemDesc) m_elementFlags.get("dl");
 386         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 387 
 388         // ----------------------------------------------
 389         elemDesc = (ElemDesc) m_elementFlags.get("form");
 390         elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
 391 
 392         // ----------------------------------------------
 393         // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
 394         elemDesc = (ElemDesc) m_elementFlags.get("frame");
 395         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 396         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 397         elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
 398 
 399         // ----------------------------------------------
 400         elemDesc = (ElemDesc) m_elementFlags.get("head");
 401         elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
 402 
 403         // ----------------------------------------------
 404         elemDesc = (ElemDesc) m_elementFlags.get("hr");
 405         elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
 406 
 407         // ----------------------------------------------
 408         // HTML 4.0, section 16.5
 409         elemDesc = (ElemDesc) m_elementFlags.get("iframe");
 410         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 411         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 412 
 413         // ----------------------------------------------
 414         // Netscape 4 extension
 415         elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
 416         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 417 
 418         // ----------------------------------------------
 419         elemDesc = (ElemDesc) m_elementFlags.get("img");
 420         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 421         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 422         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 423         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 424 
 425         // ----------------------------------------------
 426         elemDesc = (ElemDesc) m_elementFlags.get("input");
 427         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 428         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 429         elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
 430         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 431         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 432         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 433 
 434         // ----------------------------------------------
 435         elemDesc = (ElemDesc) m_elementFlags.get("ins");
 436         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 437 
 438         // ----------------------------------------------
 439         // Netscape 4 extension
 440         elemDesc = (ElemDesc) m_elementFlags.get("layer");
 441         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 442 
 443         // ----------------------------------------------
 444         elemDesc = (ElemDesc) m_elementFlags.get("link");
 445         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 446 
 447         // ----------------------------------------------
 448         elemDesc = (ElemDesc) m_elementFlags.get("menu");
 449         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 450 
 451         // ----------------------------------------------
 452         elemDesc = (ElemDesc) m_elementFlags.get("object");
 453         elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
 454         elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
 455         elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
 456         elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
 457         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 458         elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
 459 
 460         // ----------------------------------------------
 461         elemDesc = (ElemDesc) m_elementFlags.get("ol");
 462         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 463 
 464         // ----------------------------------------------
 465         elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
 466         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 467 
 468         // ----------------------------------------------
 469         elemDesc = (ElemDesc) m_elementFlags.get("option");
 470         elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
 471         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 472 
 473         // ----------------------------------------------
 474         elemDesc = (ElemDesc) m_elementFlags.get("q");
 475         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 476 
 477         // ----------------------------------------------
 478         elemDesc = (ElemDesc) m_elementFlags.get("script");
 479         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 480         elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
 481         elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
 482 
 483         // ----------------------------------------------
 484         elemDesc = (ElemDesc) m_elementFlags.get("select");
 485         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 486         elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
 487 
 488         // ----------------------------------------------
 489         elemDesc = (ElemDesc) m_elementFlags.get("table");
 490         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 491 
 492         // ----------------------------------------------
 493         elemDesc = (ElemDesc) m_elementFlags.get("td");
 494         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 495 
 496         // ----------------------------------------------
 497         elemDesc = (ElemDesc) m_elementFlags.get("textarea");
 498         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 499         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 500 
 501         // ----------------------------------------------
 502         elemDesc = (ElemDesc) m_elementFlags.get("th");
 503         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 504 
 505         // ----------------------------------------------
 506         // The nowrap attribute of a tr element is both
 507         // a Netscape and Internet-Explorer extension
 508         elemDesc = (ElemDesc) m_elementFlags.get("tr");
 509         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 510 
 511         // ----------------------------------------------
 512         elemDesc = (ElemDesc) m_elementFlags.get("ul");
 513         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 514     }
 515 
 516     /**
 517      * Dummy element for elements not found.
 518      */
 519     static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
 520 
 521     /** True if URLs should be specially escaped with the %xx form. */
 522     private boolean m_specialEscapeURLs = true;
 523 
 524     /** True if the META tag should be omitted. */
 525     private boolean m_omitMetaTag = false;
 526 
 527     /**
 528      * Tells if the formatter should use special URL escaping.
 529      *
 530      * @param bool True if URLs should be specially escaped with the %xx form.
 531      */
 532     public void setSpecialEscapeURLs(boolean bool)
 533     {
 534         m_specialEscapeURLs = bool;
 535     }
 536 
 537     /**
 538      * Tells if the formatter should omit the META tag.
 539      *
 540      * @param bool True if the META tag should be omitted.
 541      */
 542     public void setOmitMetaTag(boolean bool)
 543     {
 544         m_omitMetaTag = bool;
 545     }
 546 
 547     /**
 548      * Specifies an output format for this serializer. It the
 549      * serializer has already been associated with an output format,
 550      * it will switch to the new format. This method should not be
 551      * called while the serializer is in the process of serializing
 552      * a document.
 553      *
 554      * This method can be called multiple times before starting
 555      * the serialization of a particular result-tree. In principle
 556      * all serialization parameters can be changed, with the exception
 557      * of method="html" (it must be method="html" otherwise we
 558      * shouldn't even have a ToHTMLStream object here!)
 559      *
 560      * @param format The output format or serialzation parameters
 561      * to use.
 562      */
 563     public void setOutputFormat(Properties format)
 564     {
 565 
 566         m_specialEscapeURLs =
 567             OutputPropertyUtils.getBooleanProperty(
 568                 OutputPropertiesFactory.S_USE_URL_ESCAPING,
 569                 format);
 570 
 571         m_omitMetaTag =
 572             OutputPropertyUtils.getBooleanProperty(
 573                 OutputPropertiesFactory.S_OMIT_META_TAG,
 574                 format);
 575 
 576         super.setOutputFormat(format);
 577     }
 578 
 579     /**
 580      * Tells if the formatter should use special URL escaping.
 581      *
 582      * @return True if URLs should be specially escaped with the %xx form.
 583      */
 584     private final boolean getSpecialEscapeURLs()
 585     {
 586         return m_specialEscapeURLs;
 587     }
 588 
 589     /**
 590      * Tells if the formatter should omit the META tag.
 591      *
 592      * @return True if the META tag should be omitted.
 593      */
 594     private final boolean getOmitMetaTag()
 595     {
 596         return m_omitMetaTag;
 597     }
 598 
 599     /**
 600      * Get a description of the given element.
 601      *
 602      * @param name non-null name of element, case insensitive.
 603      *
 604      * @return non-null reference to ElemDesc, which may be m_dummy if no
 605      *         element description matches the given name.
 606      */
 607     public static final ElemDesc getElemDesc(String name)
 608     {
 609         /* this method used to return m_dummy  when name was null
 610          * but now it doesn't check and and requires non-null name.
 611          */
 612         Object obj = m_elementFlags.get(name);
 613         if (null != obj)
 614             return (ElemDesc)obj;
 615         return m_dummy;
 616     }
 617 
 618     /**
 619      * A Trie that is just a copy of the "static" one.
 620      * We need this one to be able to use the faster, but not thread-safe
 621      * method Trie.get2(name)
 622      */
 623     private Trie m_htmlInfo = new Trie(m_elementFlags);
 624     /**
 625      * Calls to this method could be replaced with calls to
 626      * getElemDesc(name), but this one should be faster.
 627      */
 628     private ElemDesc getElemDesc2(String name)
 629     {
 630         Object obj = m_htmlInfo.get2(name);
 631         if (null != obj)
 632             return (ElemDesc)obj;
 633         return m_dummy;
 634     }
 635 
 636     /**
 637      * Default constructor.
 638      */
 639     public ToHTMLStream()
 640     {
 641 
 642         super();
 643         m_charInfo = m_htmlcharInfo;
 644         // initialize namespaces
 645         m_prefixMap = new NamespaceMappings();
 646 
 647     }
 648 
 649     /** The name of the current element. */
 650 //    private String m_currentElementName = null;
 651 
 652     /**
 653      * Receive notification of the beginning of a document.
 654      *
 655      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 656      *            wrapping another exception.
 657      *
 658      * @throws org.xml.sax.SAXException
 659      */
 660     protected void startDocumentInternal() throws org.xml.sax.SAXException
 661     {
 662         super.startDocumentInternal();
 663 
 664         m_needToCallStartDocument = false;
 665         m_needToOutputDocTypeDecl = true;
 666         m_startNewLine = false;
 667         setOmitXMLDeclaration(true);
 668 
 669         if (true == m_needToOutputDocTypeDecl)
 670         {
 671             String doctypeSystem = getDoctypeSystem();
 672             String doctypePublic = getDoctypePublic();
 673             if ((null != doctypeSystem) || (null != doctypePublic))
 674             {
 675                 final java.io.Writer writer = m_writer;
 676                 try
 677                 {
 678                 writer.write("<!DOCTYPE html");
 679 
 680                 if (null != doctypePublic)
 681                 {
 682                     writer.write(" PUBLIC \"");
 683                     writer.write(doctypePublic);
 684                     writer.write('"');
 685                 }
 686 
 687                 if (null != doctypeSystem)
 688                 {
 689                     if (null == doctypePublic)
 690                         writer.write(" SYSTEM \"");
 691                     else
 692                         writer.write(" \"");
 693 
 694                     writer.write(doctypeSystem);
 695                     writer.write('"');
 696                 }
 697 
 698                 writer.write('>');
 699                 outputLineSep();
 700                 }
 701                 catch(IOException e)
 702                 {
 703                     throw new SAXException(e);
 704                 }
 705             }
 706         }
 707 
 708         m_needToOutputDocTypeDecl = false;
 709     }
 710 
 711     /**
 712      * Receive notification of the end of a document.
 713      *
 714      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 715      *            wrapping another exception.
 716      *
 717      * @throws org.xml.sax.SAXException
 718      */
 719     public final void endDocument() throws org.xml.sax.SAXException
 720     {
 721         flushCharactersBuffer();
 722         flushPending();
 723         if (m_doIndent && !m_isprevtext)
 724         {
 725             try
 726             {
 727             outputLineSep();
 728             }
 729             catch(IOException e)
 730             {
 731                 throw new SAXException(e);
 732             }
 733         }
 734 
 735         flushWriter();
 736         if (m_tracer != null)
 737             super.fireEndDoc();
 738     }
 739 
 740     /**
 741      * If the previous is an inline element, won't insert a new line before the
 742      * text.
 743      *
 744      */
 745     protected boolean shouldIndentForText() {
 746         return super.shouldIndentForText() && m_isprevblock;
 747     }
 748 
 749     /**
 750      * Only check m_doIndent, disregard m_ispreserveSpace.
 751      *
 752      * @return True if the content should be formatted.
 753      */
 754     protected boolean shouldFormatOutput() {
 755         return m_doIndent;
 756     }
 757 
 758     /**
 759      * Receive notification of the beginning of an element.
 760      *
 761      *
 762      * @param namespaceURI
 763      * @param localName
 764      * @param name
 765      *            The element type name.
 766      * @param atts
 767      *            The attributes attached to the element, if any.
 768      * @throws org.xml.sax.SAXException
 769      *             Any SAX exception, possibly wrapping another exception.
 770      * @see #endElement
 771      * @see org.xml.sax.AttributeList
 772      */
 773     public void startElement(
 774         String namespaceURI,
 775         String localName,
 776         String name,
 777         Attributes atts)
 778         throws org.xml.sax.SAXException
 779     {
 780         //will add extra one if having namespace but no matter
 781         m_childNodeNum++;
 782         flushCharactersBuffer();
 783         ElemContext elemContext = m_elemContext;
 784 
 785         // clean up any pending things first
 786         if (elemContext.m_startTagOpen)
 787         {
 788             closeStartTag();
 789             elemContext.m_startTagOpen = false;
 790         }
 791         else if (m_cdataTagOpen)
 792         {
 793             closeCDATA();
 794             m_cdataTagOpen = false;
 795         }
 796         else if (m_needToCallStartDocument)
 797         {
 798             startDocumentInternal();
 799             m_needToCallStartDocument = false;
 800         }
 801 
 802 
 803         // if this element has a namespace then treat it like XML
 804         if (null != namespaceURI && namespaceURI.length() > 0)
 805         {
 806             super.startElement(namespaceURI, localName, name, atts);
 807 
 808             return;
 809         }
 810 
 811         try
 812         {
 813             // getElemDesc2(name) is faster than getElemDesc(name)
 814             ElemDesc elemDesc = getElemDesc2(name);
 815             int elemFlags = elemDesc.getFlags();
 816 
 817             // deal with indentation issues first
 818             if (m_doIndent)
 819             {
 820                 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
 821                 if ((null != elemContext.m_elementName)
 822                         // If this element is a block element,
 823                         // or if this is not a block element, then if the
 824                         // previous is neither a text nor an inline
 825                         && (isBlockElement || (!(m_isprevtext || !m_isprevblock))))
 826                 {
 827                     m_startNewLine = true;
 828 
 829                     indent();
 830                 }
 831                 m_isprevblock = isBlockElement;
 832             }
 833 
 834             // save any attributes for later processing
 835             if (atts != null)
 836                 addAttributes(atts);
 837 
 838             m_isprevtext = false;
 839             final java.io.Writer writer = m_writer;
 840             writer.write('<');
 841             writer.write(name);
 842 
 843             m_childNodeNumStack.push(m_childNodeNum);
 844             m_childNodeNum = 0;
 845 
 846             if (m_tracer != null)
 847                 firePseudoAttributes();
 848 
 849             if ((elemFlags & ElemDesc.EMPTY) != 0)
 850             {
 851                 // an optimization for elements which are expected
 852                 // to be empty.
 853                 m_elemContext = elemContext.push();
 854                 /* XSLTC sometimes calls namespaceAfterStartElement()
 855                  * so we need to remember the name
 856                  */
 857                 m_elemContext.m_elementName = name;
 858                 m_elemContext.m_elementDesc = elemDesc;
 859                 return;
 860             }
 861             else
 862             {
 863                 elemContext = elemContext.push(namespaceURI,localName,name);
 864                 m_elemContext = elemContext;
 865                 elemContext.m_elementDesc = elemDesc;
 866                 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
 867 
 868                 // set m_startNewLine for the next element
 869                 if (m_doIndent) {
 870                     // elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(),
 871                     // in this branch m_elemContext.m_elementName is not null
 872                     boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
 873                     if (isBlockElement)
 874                         m_startNewLine = true;
 875                 }
 876             }
 877 
 878 
 879             if ((elemFlags & ElemDesc.HEADELEM) != 0)
 880             {
 881                 // This is the <HEAD> element, do some special processing
 882                 closeStartTag();
 883                 elemContext.m_startTagOpen = false;
 884                 if (!m_omitMetaTag)
 885                 {
 886                     if (m_doIndent)
 887                         indent();
 888                     writer.write(
 889                         "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
 890                     String encoding = getEncoding();
 891                     String encode = Encodings.getMimeEncoding(encoding);
 892                     writer.write(encode);
 893                     writer.write("\">");
 894                 }
 895             }
 896         }
 897         catch (IOException e)
 898         {
 899             throw new SAXException(e);
 900         }
 901     }
 902 
 903     /**
 904      *  Receive notification of the end of an element.
 905      *
 906      *
 907      *  @param namespaceURI
 908      *  @param localName
 909      *  @param name The element type name
 910      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
 911      *             wrapping another exception.
 912      */
 913     public final void endElement(
 914         final String namespaceURI,
 915         final String localName,
 916         final String name)
 917         throws org.xml.sax.SAXException
 918     {
 919         flushCharactersBuffer();
 920         // deal with any pending issues
 921         if (m_cdataTagOpen)
 922             closeCDATA();
 923 
 924         // if the element has a namespace, treat it like XML, not HTML
 925         if (null != namespaceURI && namespaceURI.length() > 0)
 926         {
 927             super.endElement(namespaceURI, localName, name);
 928 
 929             return;
 930         }
 931 
 932         try
 933         {
 934 
 935             ElemContext elemContext = m_elemContext;
 936             final ElemDesc elemDesc = elemContext.m_elementDesc;
 937             final int elemFlags = elemDesc.getFlags();
 938             final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
 939 
 940             // deal with any indentation issues
 941             if (m_doIndent)
 942             {
 943                 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
 944                 boolean shouldIndent = false;
 945 
 946                 // If this element is a block element,
 947                 // or if this is not a block element, then if the previous is
 948                 // neither a text nor an inline
 949                 if (isBlockElement || (!(m_isprevtext || !m_isprevblock)))
 950                 {
 951                     m_startNewLine = true;
 952                     shouldIndent = true;
 953                 }
 954                 if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext))
 955                     indent(elemContext.m_currentElemDepth - 1);
 956 
 957                 m_isprevblock = isBlockElement;
 958             }
 959 
 960             final java.io.Writer writer = m_writer;
 961             if (!elemContext.m_startTagOpen)
 962             {
 963                 writer.write("</");
 964                 writer.write(name);
 965                 writer.write('>');
 966             }
 967             else
 968             {
 969                 // the start-tag open when this method was called,
 970                 // so we need to process it now.
 971 
 972                 if (m_tracer != null)
 973                     super.fireStartElem(name);
 974 
 975                 // the starting tag was still open when we received this endElement() call
 976                 // so we need to process any gathered attributes NOW, before they go away.
 977                 int nAttrs = m_attributes.getLength();
 978                 if (nAttrs > 0)
 979                 {
 980                     processAttributes(m_writer, nAttrs);
 981                     // clear attributes object for re-use with next element
 982                     m_attributes.clear();
 983                 }
 984                 if (!elemEmpty)
 985                 {
 986                     // As per Dave/Paul recommendation 12/06/2000
 987                     // if (shouldIndent)
 988                     // writer.write('>');
 989                     //  indent(m_currentIndent);
 990 
 991                     writer.write("></");
 992                     writer.write(name);
 993                     writer.write('>');
 994                 }
 995                 else
 996                 {
 997                     writer.write('>');
 998                 }
 999             }
1000 
1001             m_childNodeNum = m_childNodeNumStack.pop();
1002             // clean up because the element has ended
1003             if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
1004                 m_ispreserve = true;
1005             m_isprevtext = false;
1006 
1007             // fire off the end element event
1008             if (m_tracer != null)
1009                 super.fireEndElem(name);
1010 
1011             // OPTIMIZE-EMPTY
1012             if (elemEmpty)
1013             {
1014                 // a quick exit if the HTML element had no children.
1015                 // This block of code can be removed if the corresponding block of code
1016                 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
1017                 m_elemContext = elemContext.m_prev;
1018                 return;
1019             }
1020 
1021             // some more clean because the element has ended.
1022             if (!elemContext.m_startTagOpen)
1023             {
1024                 if (m_doIndent && !m_preserves.isEmpty())
1025                     m_preserves.pop();
1026             }
1027             m_elemContext = elemContext.m_prev;
1028 //            m_isRawStack.pop();
1029         }
1030         catch (IOException e)
1031         {
1032             throw new SAXException(e);
1033         }
1034     }
1035 
1036     /**
1037      * Process an attribute.
1038      * @param   writer The writer to write the processed output to.
1039      * @param   name   The name of the attribute.
1040      * @param   value   The value of the attribute.
1041      * @param   elemDesc The description of the HTML element
1042      *           that has this attribute.
1043      *
1044      * @throws org.xml.sax.SAXException
1045      */
1046     protected void processAttribute(
1047         java.io.Writer writer,
1048         String name,
1049         String value,
1050         ElemDesc elemDesc)
1051         throws IOException
1052     {
1053         writer.write(' ');
1054 
1055         if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1056             && elemDesc != null
1057             && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1058         {
1059             writer.write(name);
1060         }
1061         else
1062         {
1063             // %REVIEW% %OPT%
1064             // Two calls to single-char write may NOT
1065             // be more efficient than one to string-write...
1066             writer.write(name);
1067             writer.write("=\"");
1068             if (   elemDesc != null
1069                 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1070                 writeAttrURI(writer, value, m_specialEscapeURLs);
1071             else
1072                 writeAttrString(writer, value, this.getEncoding());
1073             writer.write('"');
1074 
1075         }
1076     }
1077 
1078     /**
1079      * Tell if a character is an ASCII digit.
1080      */
1081     private boolean isASCIIDigit(char c)
1082     {
1083         return (c >= '0' && c <= '9');
1084     }
1085 
1086     /**
1087      * Make an integer into an HH hex value.
1088      * Does no checking on the size of the input, since this
1089      * is only meant to be used locally by writeAttrURI.
1090      *
1091      * @param i must be a value less than 255.
1092      *
1093      * @return should be a two character string.
1094      */
1095     private static String makeHHString(int i)
1096     {
1097         String s = Integer.toHexString(i).toUpperCase();
1098         if (s.length() == 1)
1099         {
1100             s = "0" + s;
1101         }
1102         return s;
1103     }
1104 
1105     /**
1106     * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1107     * @param str must be 2 characters long
1108     *
1109     * @return true or false
1110     */
1111     private boolean isHHSign(String str)
1112     {
1113         boolean sign = true;
1114         try
1115         {
1116             char r = (char) Integer.parseInt(str, 16);
1117         }
1118         catch (NumberFormatException e)
1119         {
1120             sign = false;
1121         }
1122         return sign;
1123     }
1124 
1125     /**
1126      * Write the specified <var>string</var> after substituting non ASCII characters,
1127      * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1128      *
1129      * @param   string      String to convert to XML format.
1130      * @param doURLEscaping True if we should try to encode as
1131      *                      per http://www.ietf.org/rfc/rfc2396.txt.
1132      *
1133      * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1134      */
1135     public void writeAttrURI(
1136         final java.io.Writer writer, String string, boolean doURLEscaping)
1137         throws IOException
1138     {
1139         // http://www.ietf.org/rfc/rfc2396.txt says:
1140         // A URI is always in an "escaped" form, since escaping or unescaping a
1141         // completed URI might change its semantics.  Normally, the only time
1142         // escape encodings can safely be made is when the URI is being created
1143         // from its component parts; each component may have its own set of
1144         // characters that are reserved, so only the mechanism responsible for
1145         // generating or interpreting that component can determine whether or
1146         // not escaping a character will change its semantics. Likewise, a URI
1147         // must be separated into its components before the escaped characters
1148         // within those components can be safely decoded.
1149         //
1150         // ...So we do our best to do limited escaping of the URL, without
1151         // causing damage.  If the URL is already properly escaped, in theory, this
1152         // function should not change the string value.
1153 
1154         final int end = string.length();
1155         if (end > m_attrBuff.length)
1156         {
1157            m_attrBuff = new char[end*2 + 1];
1158         }
1159         string.getChars(0,end, m_attrBuff, 0);
1160         final char[] chars = m_attrBuff;
1161 
1162         int cleanStart = 0;
1163         int cleanLength = 0;
1164 
1165 
1166         char ch = 0;
1167         for (int i = 0; i < end; i++)
1168         {
1169             ch = chars[i];
1170 
1171             if ((ch < 32) || (ch > 126))
1172             {
1173                 if (cleanLength > 0)
1174                 {
1175                     writer.write(chars, cleanStart, cleanLength);
1176                     cleanLength = 0;
1177                 }
1178                 if (doURLEscaping)
1179                 {
1180                     // Encode UTF16 to UTF8.
1181                     // Reference is Unicode, A Primer, by Tony Graham.
1182                     // Page 92.
1183 
1184                     // Note that Kay doesn't escape 0x20...
1185                     //  if(ch == 0x20) // Not sure about this... -sb
1186                     //  {
1187                     //    writer.write(ch);
1188                     //  }
1189                     //  else
1190                     if (ch <= 0x7F)
1191                     {
1192                         writer.write('%');
1193                         writer.write(makeHHString(ch));
1194                     }
1195                     else if (ch <= 0x7FF)
1196                     {
1197                         // Clear low 6 bits before rotate, put high 4 bits in low byte,
1198                         // and set two high bits.
1199                         int high = (ch >> 6) | 0xC0;
1200                         int low = (ch & 0x3F) | 0x80;
1201                         // First 6 bits, + high bit
1202                         writer.write('%');
1203                         writer.write(makeHHString(high));
1204                         writer.write('%');
1205                         writer.write(makeHHString(low));
1206                     }
1207                     else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1208                     {
1209                         // I'm sure this can be done in 3 instructions, but I choose
1210                         // to try and do it exactly like it is done in the book, at least
1211                         // until we are sure this is totally clean.  I don't think performance
1212                         // is a big issue with this particular function, though I could be
1213                         // wrong.  Also, the stuff below clearly does more masking than
1214                         // it needs to do.
1215 
1216                         // Clear high 6 bits.
1217                         int highSurrogate = ((int) ch) & 0x03FF;
1218 
1219                         // Middle 4 bits (wwww) + 1
1220                         // "Note that the value of wwww from the high surrogate bit pattern
1221                         // is incremented to make the uuuuu bit pattern in the scalar value
1222                         // so the surrogate pair don't address the BMP."
1223                         int wwww = ((highSurrogate & 0x03C0) >> 6);
1224                         int uuuuu = wwww + 1;
1225 
1226                         // next 4 bits
1227                         int zzzz = (highSurrogate & 0x003C) >> 2;
1228 
1229                         // low 2 bits
1230                         int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1231 
1232                         // Get low surrogate character.
1233                         ch = chars[++i];
1234 
1235                         // Clear high 6 bits.
1236                         int lowSurrogate = ((int) ch) & 0x03FF;
1237 
1238                         // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1239                         yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1240 
1241                         // bottom 6 bits.
1242                         int xxxxxx = (lowSurrogate & 0x003F);
1243 
1244                         int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1245                         int byte2 =
1246                             0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1247                         int byte3 = 0x80 | yyyyyy;
1248                         int byte4 = 0x80 | xxxxxx;
1249 
1250                         writer.write('%');
1251                         writer.write(makeHHString(byte1));
1252                         writer.write('%');
1253                         writer.write(makeHHString(byte2));
1254                         writer.write('%');
1255                         writer.write(makeHHString(byte3));
1256                         writer.write('%');
1257                         writer.write(makeHHString(byte4));
1258                     }
1259                     else
1260                     {
1261                         int high = (ch >> 12) | 0xE0; // top 4 bits
1262                         int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1263                         // middle 6 bits
1264                         int low = (ch & 0x3F) | 0x80;
1265                         // First 6 bits, + high bit
1266                         writer.write('%');
1267                         writer.write(makeHHString(high));
1268                         writer.write('%');
1269                         writer.write(makeHHString(middle));
1270                         writer.write('%');
1271                         writer.write(makeHHString(low));
1272                     }
1273 
1274                 }
1275                 else if (escapingNotNeeded(ch))
1276                 {
1277                     writer.write(ch);
1278                 }
1279                 else
1280                 {
1281                     writer.write("&#");
1282                     writer.write(Integer.toString(ch));
1283                     writer.write(';');
1284                 }
1285                 // In this character range we have first written out any previously accumulated
1286                 // "clean" characters, then processed the current more complicated character,
1287                 // which may have incremented "i".
1288                 // We now we reset the next possible clean character.
1289                 cleanStart = i + 1;
1290             }
1291             // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1292             // not allowing quotes in the URI proper syntax, nor in the fragment
1293             // identifier, we believe that it's OK to double escape quotes.
1294             else if (ch == '"')
1295             {
1296                 // If the character is a '%' number number, try to avoid double-escaping.
1297                 // There is a question if this is legal behavior.
1298 
1299                 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1300                 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1301 
1302                 //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1303 
1304                 // We are no longer escaping '%'
1305 
1306                 if (cleanLength > 0)
1307                 {
1308                     writer.write(chars, cleanStart, cleanLength);
1309                     cleanLength = 0;
1310                 }
1311 
1312 
1313                 // Mike Kay encodes this as ", so he may know something I don't?
1314                 if (doURLEscaping)
1315                     writer.write("%22");
1316                 else
1317                     writer.write("&quot;"); // we have to escape this, I guess.
1318 
1319                 // We have written out any clean characters, then the escaped '%' and now we
1320                 // We now we reset the next possible clean character.
1321                 cleanStart = i + 1;
1322             }
1323             else if (ch == '&')
1324             {
1325                 // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
1326                 // instead of "&" to avoid confusion with the beginning of a character
1327                 // reference (entity reference open delimiter).
1328                 if (cleanLength > 0)
1329                 {
1330                     writer.write(chars, cleanStart, cleanLength);
1331                     cleanLength = 0;
1332                 }
1333                 writer.write("&amp;");
1334                 cleanStart = i + 1;
1335             }
1336             else
1337             {
1338                 // no processing for this character, just count how
1339                 // many characters in a row that we have that need no processing
1340                 cleanLength++;
1341             }
1342         }
1343 
1344         // are there any clean characters at the end of the array
1345         // that we haven't processed yet?
1346         if (cleanLength > 1)
1347         {
1348             // if the whole string can be written out as-is do so
1349             // otherwise write out the clean chars at the end of the
1350             // array
1351             if (cleanStart == 0)
1352                 writer.write(string);
1353             else
1354                 writer.write(chars, cleanStart, cleanLength);
1355         }
1356         else if (cleanLength == 1)
1357         {
1358             // a little optimization for 1 clean character
1359             // (we could have let the previous if(...) handle them all)
1360             writer.write(ch);
1361         }
1362     }
1363 
1364     /**
1365      * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1366      * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1367      *
1368      * @param   string      String to convert to XML format.
1369      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1370      *
1371      * @throws org.xml.sax.SAXException
1372      */
1373     public void writeAttrString(
1374         final java.io.Writer writer, String string, String encoding)
1375         throws IOException
1376     {
1377         final int end = string.length();
1378         if (end > m_attrBuff.length)
1379         {
1380             m_attrBuff = new char[end * 2 + 1];
1381         }
1382         string.getChars(0, end, m_attrBuff, 0);
1383         final char[] chars = m_attrBuff;
1384 
1385 
1386 
1387         int cleanStart = 0;
1388         int cleanLength = 0;
1389 
1390         char ch = 0;
1391         for (int i = 0; i < end; i++)
1392         {
1393             ch = chars[i];
1394 
1395             // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1396             // System.out.println("ch: "+(int)ch);
1397             // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1398             // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1399             if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
1400             {
1401                 cleanLength++;
1402             }
1403             else if ('<' == ch || '>' == ch)
1404             {
1405                 cleanLength++; // no escaping in this case, as specified in 15.2
1406             }
1407             else if (
1408                 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1409             {
1410                 cleanLength++; // no escaping in this case, as specified in 15.2
1411             }
1412             else
1413             {
1414                 if (cleanLength > 0)
1415                 {
1416                     writer.write(chars,cleanStart,cleanLength);
1417                     cleanLength = 0;
1418                 }
1419                 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1420 
1421                 if (i != pos)
1422                 {
1423                     i = pos - 1;
1424                 }
1425                 else
1426                 {
1427                     if (Encodings.isHighUTF16Surrogate(ch))
1428                     {
1429 
1430                             writeUTF16Surrogate(ch, chars, i, end);
1431                             i++; // two input characters processed
1432                                  // this increments by one and the for()
1433                                  // loop itself increments by another one.
1434                     }
1435 
1436                     // The next is kind of a hack to keep from escaping in the case
1437                     // of Shift_JIS and the like.
1438 
1439                     /*
1440                     else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1441                     && (ch != 160))
1442                     {
1443                     writer.write(ch);  // no escaping in this case
1444                     }
1445                     else
1446                     */
1447                     String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1448                     if (null != outputStringForChar)
1449                     {
1450                         writer.write(outputStringForChar);
1451                     }
1452                     else if (escapingNotNeeded(ch))
1453                     {
1454                         writer.write(ch); // no escaping in this case
1455                     }
1456                     else
1457                     {
1458                         writer.write("&#");
1459                         writer.write(Integer.toString(ch));
1460                         writer.write(';');
1461                     }
1462                 }
1463                 cleanStart = i + 1;
1464             }
1465         } // end of for()
1466 
1467         // are there any clean characters at the end of the array
1468         // that we haven't processed yet?
1469         if (cleanLength > 1)
1470         {
1471             // if the whole string can be written out as-is do so
1472             // otherwise write out the clean chars at the end of the
1473             // array
1474             if (cleanStart == 0)
1475                 writer.write(string);
1476             else
1477                 writer.write(chars, cleanStart, cleanLength);
1478         }
1479         else if (cleanLength == 1)
1480         {
1481             // a little optimization for 1 clean character
1482             // (we could have let the previous if(...) handle them all)
1483             writer.write(ch);
1484         }
1485     }
1486 
1487 
1488 
1489     /**
1490      * Receive notification of character data.
1491      *
1492      * <p>The Parser will call this method to report each chunk of
1493      * character data.  SAX parsers may return all contiguous character
1494      * data in a single chunk, or they may split it into several
1495      * chunks; however, all of the characters in any single event
1496      * must come from the same external entity, so that the Locator
1497      * provides useful information.</p>
1498      *
1499      * <p>The application must not attempt to read from the array
1500      * outside of the specified range.</p>
1501      *
1502      * <p>Note that some parsers will report whitespace using the
1503      * ignorableWhitespace() method rather than this one (validating
1504      * parsers must do so).</p>
1505      *
1506      * @param chars The characters from the XML document.
1507      * @param start The start position in the array.
1508      * @param length The number of characters to read from the array.
1509      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1510      *            wrapping another exception.
1511      * @see #ignorableWhitespace
1512      * @see org.xml.sax.Locator
1513      *
1514      * @throws org.xml.sax.SAXException
1515      */
1516     public final void characters(char chars[], int start, int length)
1517         throws org.xml.sax.SAXException
1518     {
1519 
1520         if (m_elemContext.m_isRaw)
1521         {
1522             try
1523             {
1524                 if (m_elemContext.m_startTagOpen)
1525                 {
1526                     closeStartTag();
1527                     m_elemContext.m_startTagOpen = false;
1528                 }
1529                 m_ispreserve = true;
1530 
1531 //              With m_ispreserve just set true it looks like shouldIndent()
1532 //              will always return false, so drop any possible indentation.
1533 //              if (shouldIndent())
1534 //                  indent();
1535 
1536                 // writer.write("<![CDATA[");
1537                 // writer.write(chars, start, length);
1538                 writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1539                 m_isprevtext = true;
1540                 // writer.write("]]>");
1541 
1542                 // time to generate characters event
1543                 if (m_tracer != null)
1544                     super.fireCharEvent(chars, start, length);
1545 
1546                 return;
1547             }
1548             catch (IOException ioe)
1549             {
1550                 throw new org.xml.sax.SAXException(
1551                     Utils.messages.createMessage(
1552                         MsgKey.ER_OIERROR,
1553                         null),
1554                     ioe);
1555                 //"IO error", ioe);
1556             }
1557         }
1558         else
1559         {
1560             super.characters(chars, start, length);
1561         }
1562     }
1563 
1564     /**
1565      *  Receive notification of cdata.
1566      *
1567      *  <p>The Parser will call this method to report each chunk of
1568      *  character data.  SAX parsers may return all contiguous character
1569      *  data in a single chunk, or they may split it into several
1570      *  chunks; however, all of the characters in any single event
1571      *  must come from the same external entity, so that the Locator
1572      *  provides useful information.</p>
1573      *
1574      *  <p>The application must not attempt to read from the array
1575      *  outside of the specified range.</p>
1576      *
1577      *  <p>Note that some parsers will report whitespace using the
1578      *  ignorableWhitespace() method rather than this one (validating
1579      *  parsers must do so).</p>
1580      *
1581      *  @param ch The characters from the XML document.
1582      *  @param start The start position in the array.
1583      *  @param length The number of characters to read from the array.
1584      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1585      *             wrapping another exception.
1586      *  @see #ignorableWhitespace
1587      *  @see org.xml.sax.Locator
1588      *
1589      * @throws org.xml.sax.SAXException
1590      */
1591     public final void cdata(char ch[], int start, int length)
1592         throws org.xml.sax.SAXException
1593     {
1594         if ((null != m_elemContext.m_elementName)
1595             && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1596                 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1597         {
1598             try
1599             {
1600                 if (m_elemContext.m_startTagOpen)
1601                 {
1602                     closeStartTag();
1603                     m_elemContext.m_startTagOpen = false;
1604                 }
1605 
1606                 m_ispreserve = true;
1607 
1608                 if (shouldIndent())
1609                     indent();
1610 
1611                 // writer.write(ch, start, length);
1612                 writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1613             }
1614             catch (IOException ioe)
1615             {
1616                 throw new org.xml.sax.SAXException(
1617                     Utils.messages.createMessage(
1618                         MsgKey.ER_OIERROR,
1619                         null),
1620                     ioe);
1621                 //"IO error", ioe);
1622             }
1623         }
1624         else
1625         {
1626             super.cdata(ch, start, length);
1627         }
1628     }
1629 
1630     /**
1631      *  Receive notification of a processing instruction.
1632      *
1633      *  @param target The processing instruction target.
1634      *  @param data The processing instruction data, or null if
1635      *         none was supplied.
1636      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1637      *             wrapping another exception.
1638      *
1639      * @throws org.xml.sax.SAXException
1640      */
1641     public void processingInstruction(String target, String data)
1642         throws org.xml.sax.SAXException
1643     {
1644         m_childNodeNum++;
1645         flushCharactersBuffer();
1646         // Process any pending starDocument and startElement first.
1647         flushPending();
1648 
1649         // Use a fairly nasty hack to tell if the next node is supposed to be
1650         // unescaped text.
1651         if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1652         {
1653             startNonEscaping();
1654         }
1655         else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1656         {
1657             endNonEscaping();
1658         }
1659         else
1660         {
1661             try
1662             {
1663             if (m_elemContext.m_startTagOpen)
1664             {
1665                 closeStartTag();
1666                 m_elemContext.m_startTagOpen = false;
1667             }
1668             else if (m_needToCallStartDocument)
1669                 startDocumentInternal();
1670 
1671             if (shouldIndent())
1672                 indent();
1673 
1674             final java.io.Writer writer = m_writer;
1675             //writer.write("<?" + target);
1676             writer.write("<?");
1677             writer.write(target);
1678 
1679             if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1680                 writer.write(' ');
1681 
1682             //writer.write(data + ">"); // different from XML
1683             writer.write(data); // different from XML
1684             writer.write('>'); // different from XML
1685 
1686             // Always output a newline char if not inside of an
1687             // element. The whitespace is not significant in that
1688             // case.
1689             if (m_elemContext.m_currentElemDepth <= 0)
1690                 outputLineSep();
1691 
1692             m_startNewLine = true;
1693             }
1694             catch(IOException e)
1695             {
1696                 throw new SAXException(e);
1697             }
1698         }
1699 
1700         // now generate the PI event
1701         if (m_tracer != null)
1702             super.fireEscapingEvent(target, data);
1703      }
1704 
1705     /**
1706      * Receive notivication of a entityReference.
1707      *
1708      * @param name non-null reference to entity name string.
1709      *
1710      * @throws org.xml.sax.SAXException
1711      */
1712     public final void entityReference(String name)
1713         throws org.xml.sax.SAXException
1714     {
1715         try
1716         {
1717 
1718         final java.io.Writer writer = m_writer;
1719         writer.write('&');
1720         writer.write(name);
1721         writer.write(';');
1722 
1723         } catch(IOException e)
1724         {
1725             throw new SAXException(e);
1726         }
1727     }
1728     /**
1729      * @see ExtendedContentHandler#endElement(String)
1730      */
1731     public final void endElement(String elemName) throws SAXException
1732     {
1733         endElement(null, null, elemName);
1734     }
1735 
1736     /**
1737      * Process the attributes, which means to write out the currently
1738      * collected attributes to the writer. The attributes are not
1739      * cleared by this method
1740      *
1741      * @param writer the writer to write processed attributes to.
1742      * @param nAttrs the number of attributes in m_attributes
1743      * to be processed
1744      *
1745      * @throws org.xml.sax.SAXException
1746      */
1747     public void processAttributes(java.io.Writer writer, int nAttrs)
1748         throws IOException,SAXException
1749     {
1750             /*
1751              * process the collected attributes
1752              */
1753             for (int i = 0; i < nAttrs; i++)
1754             {
1755                 processAttribute(
1756                     writer,
1757                     m_attributes.getQName(i),
1758                     m_attributes.getValue(i),
1759                     m_elemContext.m_elementDesc);
1760             }
1761     }
1762 
1763     /**
1764      * For the enclosing elements starting tag write out out any attributes
1765      * followed by ">"
1766      *
1767      *@throws org.xml.sax.SAXException
1768      */
1769     protected void closeStartTag() throws SAXException
1770     {
1771             try
1772             {
1773 
1774             // finish processing attributes, time to fire off the start element event
1775             if (m_tracer != null)
1776                 super.fireStartElem(m_elemContext.m_elementName);
1777 
1778             int nAttrs = m_attributes.getLength();
1779             if (nAttrs>0)
1780             {
1781                 processAttributes(m_writer, nAttrs);
1782                 // clear attributes object for re-use with next element
1783                 m_attributes.clear();
1784             }
1785 
1786             m_writer.write('>');
1787 
1788             /* whether Xalan or XSLTC, we have the prefix mappings now, so
1789              * lets determine if the current element is specified in the cdata-
1790              * section-elements list.
1791              */
1792             if (m_StringOfCDATASections != null)
1793                 m_elemContext.m_isCdataSection = isCdataSection();
1794             if (m_doIndent)
1795             {
1796                 m_isprevtext = false;
1797                 m_preserves.push(m_ispreserve);
1798             }
1799 
1800             }
1801             catch(IOException e)
1802             {
1803                 throw new SAXException(e);
1804             }
1805     }
1806 
1807         /**
1808          * This method is used when a prefix/uri namespace mapping
1809          * is indicated after the element was started with a
1810          * startElement() and before and endElement().
1811          * startPrefixMapping(prefix,uri) would be used before the
1812          * startElement() call.
1813          * @param uri the URI of the namespace
1814          * @param prefix the prefix associated with the given URI.
1815          *
1816          * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1817          */
1818         public void namespaceAfterStartElement(String prefix, String uri)
1819             throws SAXException
1820         {
1821             // hack for XSLTC with finding URI for default namespace
1822             if (m_elemContext.m_elementURI == null)
1823             {
1824                 String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1825                 if (prefix1 == null && EMPTYSTRING.equals(prefix))
1826                 {
1827                     // the elements URI is not known yet, and it
1828                     // doesn't have a prefix, and we are currently
1829                     // setting the uri for prefix "", so we have
1830                     // the uri for the element... lets remember it
1831                     m_elemContext.m_elementURI = uri;
1832                 }
1833             }
1834             startPrefixMapping(prefix,uri,false);
1835         }
1836 
1837     public void startDTD(String name, String publicId, String systemId)
1838         throws SAXException
1839     {
1840         m_inDTD = true;
1841         super.startDTD(name, publicId, systemId);
1842     }
1843 
1844     /**
1845      * Report the end of DTD declarations.
1846      * @throws org.xml.sax.SAXException The application may raise an exception.
1847      * @see #startDTD
1848      */
1849     public void endDTD() throws org.xml.sax.SAXException
1850     {
1851         m_inDTD = false;
1852         /* for ToHTMLStream the DOCTYPE is entirely output in the
1853          * startDocumentInternal() method, so don't do anything here
1854          */
1855     }
1856     /**
1857      * This method does nothing.
1858      */
1859     public void attributeDecl(
1860         String eName,
1861         String aName,
1862         String type,
1863         String valueDefault,
1864         String value)
1865         throws SAXException
1866     {
1867         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1868     }
1869 
1870     /**
1871      * This method does nothing.
1872      */
1873     public void elementDecl(String name, String model) throws SAXException
1874     {
1875         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1876     }
1877     /**
1878      * This method does nothing.
1879      */
1880     public void internalEntityDecl(String name, String value)
1881         throws SAXException
1882     {
1883         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1884     }
1885     /**
1886      * This method does nothing.
1887      */
1888     public void externalEntityDecl(
1889         String name,
1890         String publicId,
1891         String systemId)
1892         throws SAXException
1893     {
1894         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1895     }
1896 
1897     /**
1898      * This method is used to add an attribute to the currently open element.
1899      * The caller has guaranted that this attribute is unique, which means that it
1900      * not been seen before and will not be seen again.
1901      *
1902      * @param name the qualified name of the attribute
1903      * @param value the value of the attribute which can contain only
1904      * ASCII printable characters characters in the range 32 to 127 inclusive.
1905      * @param flags the bit values of this integer give optimization information.
1906      */
1907     public void addUniqueAttribute(String name, String value, int flags)
1908         throws SAXException
1909     {
1910         try
1911         {
1912             final java.io.Writer writer = m_writer;
1913             if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1914             {
1915                 // "flags" has indicated that the characters
1916                 // '>'  '<'   '&'  and '"' are not in the value and
1917                 // m_htmlcharInfo has recorded that there are no other
1918                 // entities in the range 0 to 127 so we write out the
1919                 // value directly
1920                 writer.write(' ');
1921                 writer.write(name);
1922                 writer.write("=\"");
1923                 writer.write(value);
1924                 writer.write('"');
1925             }
1926             else if (
1927                 (flags & HTML_ATTREMPTY) > 0
1928                     && (value.length() == 0 || value.equalsIgnoreCase(name)))
1929             {
1930                 writer.write(' ');
1931                 writer.write(name);
1932             }
1933             else
1934             {
1935                 writer.write(' ');
1936                 writer.write(name);
1937                 writer.write("=\"");
1938                 if ((flags & HTML_ATTRURL) > 0)
1939                 {
1940                     writeAttrURI(writer, value, m_specialEscapeURLs);
1941                 }
1942                 else
1943                 {
1944                     writeAttrString(writer, value, this.getEncoding());
1945                 }
1946                 writer.write('"');
1947             }
1948         } catch (IOException e) {
1949             throw new SAXException(e);
1950         }
1951     }
1952 
1953     public void comment(char ch[], int start, int length)
1954             throws SAXException
1955     {
1956         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1957         if (m_inDTD)
1958             return;
1959         super.comment(ch, start, length);
1960     }
1961 
1962     public boolean reset()
1963     {
1964         boolean ret = super.reset();
1965         if (!ret)
1966             return false;
1967         initToHTMLStream();
1968         return true;
1969     }
1970 
1971     private void initToHTMLStream()
1972     {
1973 //        m_elementDesc = null;
1974         m_isprevblock = false;
1975         m_inDTD = false;
1976 //        m_isRawStack.clear();
1977         m_omitMetaTag = false;
1978         m_specialEscapeURLs = true;
1979     }
1980 
1981     static class Trie
1982     {
1983         /**
1984          * A digital search trie for 7-bit ASCII text
1985          * The API is a subset of java.util.Hashtable
1986          * The key must be a 7-bit ASCII string
1987          * The value may be any Java Object
1988          * One can get an object stored in a trie from its key,
1989          * but the search is either case sensitive or case
1990          * insensitive to the characters in the key, and this
1991          * choice of sensitivity or insensitivity is made when
1992          * the Trie is created, before any objects are put in it.
1993          *
1994          * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
1995          * It exists to cut the serializers dependancy on that package.
1996          *
1997          * @xsl.usage internal
1998          */
1999 
2000         /** Size of the m_nextChar array.  */
2001         public static final int ALPHA_SIZE = 128;
2002 
2003         /** The root node of the tree.    */
2004         final Node m_Root;
2005 
2006         /** helper buffer to convert Strings to char arrays */
2007         private char[] m_charBuffer = new char[0];
2008 
2009         /** true if the search for an object is lower case only with the key */
2010         private final boolean m_lowerCaseOnly;
2011 
2012         /**
2013          * Construct the trie that has a case insensitive search.
2014          */
2015         public Trie()
2016         {
2017             m_Root = new Node();
2018             m_lowerCaseOnly = false;
2019         }
2020 
2021         /**
2022          * Construct the trie given the desired case sensitivity with the key.
2023          * @param lowerCaseOnly true if the search keys are to be loser case only,
2024          * not case insensitive.
2025          */
2026         public Trie(boolean lowerCaseOnly)
2027         {
2028             m_Root = new Node();
2029             m_lowerCaseOnly = lowerCaseOnly;
2030         }
2031 
2032         /**
2033          * Put an object into the trie for lookup.
2034          *
2035          * @param key must be a 7-bit ASCII string
2036          * @param value any java object.
2037          *
2038          * @return The old object that matched key, or null.
2039          */
2040         public Object put(String key, Object value)
2041         {
2042 
2043             final int len = key.length();
2044             if (len > m_charBuffer.length)
2045             {
2046                 // make the biggest buffer ever needed in get(String)
2047                 m_charBuffer = new char[len];
2048             }
2049 
2050             Node node = m_Root;
2051 
2052             for (int i = 0; i < len; i++)
2053             {
2054                 Node nextNode =
2055                     node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2056 
2057                 if (nextNode != null)
2058                 {
2059                     node = nextNode;
2060                 }
2061                 else
2062                 {
2063                     for (; i < len; i++)
2064                     {
2065                         Node newNode = new Node();
2066                         if (m_lowerCaseOnly)
2067                         {
2068                             // put this value into the tree only with a lower case key
2069                             node.m_nextChar[Character.toLowerCase(
2070                                 key.charAt(i))] =
2071                                 newNode;
2072                         }
2073                         else
2074                         {
2075                             // put this value into the tree with a case insensitive key
2076                             node.m_nextChar[Character.toUpperCase(
2077                                 key.charAt(i))] =
2078                                 newNode;
2079                             node.m_nextChar[Character.toLowerCase(
2080                                 key.charAt(i))] =
2081                                 newNode;
2082                         }
2083                         node = newNode;
2084                     }
2085                     break;
2086                 }
2087             }
2088 
2089             Object ret = node.m_Value;
2090 
2091             node.m_Value = value;
2092 
2093             return ret;
2094         }
2095 
2096         /**
2097          * Get an object that matches the key.
2098          *
2099          * @param key must be a 7-bit ASCII string
2100          *
2101          * @return The object that matches the key, or null.
2102          */
2103         public Object get(final String key)
2104         {
2105 
2106             final int len = key.length();
2107 
2108             /* If the name is too long, we won't find it, this also keeps us
2109              * from overflowing m_charBuffer
2110              */
2111             if (m_charBuffer.length < len)
2112                 return null;
2113 
2114             Node node = m_Root;
2115             switch (len) // optimize the look up based on the number of chars
2116             {
2117                 // case 0 looks silly, but the generated bytecode runs
2118                 // faster for lookup of elements of length 2 with this in
2119                 // and a fair bit faster.  Don't know why.
2120                 case 0 :
2121                     {
2122                         return null;
2123                     }
2124 
2125                 case 1 :
2126                     {
2127                         final char ch = key.charAt(0);
2128                         if (ch < ALPHA_SIZE)
2129                         {
2130                             node = node.m_nextChar[ch];
2131                             if (node != null)
2132                                 return node.m_Value;
2133                         }
2134                         return null;
2135                     }
2136                     //                comment out case 2 because the default is faster
2137                     //                case 2 :
2138                     //                    {
2139                     //                        final char ch0 = key.charAt(0);
2140                     //                        final char ch1 = key.charAt(1);
2141                     //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2142                     //                        {
2143                     //                            node = node.m_nextChar[ch0];
2144                     //                            if (node != null)
2145                     //                            {
2146                     //
2147                     //                                if (ch1 < ALPHA_SIZE)
2148                     //                                {
2149                     //                                    node = node.m_nextChar[ch1];
2150                     //                                    if (node != null)
2151                     //                                        return node.m_Value;
2152                     //                                }
2153                     //                            }
2154                     //                        }
2155                     //                        return null;
2156                     //                   }
2157                 default :
2158                     {
2159                         for (int i = 0; i < len; i++)
2160                         {
2161                             // A thread-safe way to loop over the characters
2162                             final char ch = key.charAt(i);
2163                             if (ALPHA_SIZE <= ch)
2164                             {
2165                                 // the key is not 7-bit ASCII so we won't find it here
2166                                 return null;
2167                             }
2168 
2169                             node = node.m_nextChar[ch];
2170                             if (node == null)
2171                                 return null;
2172                         }
2173 
2174                         return node.m_Value;
2175                     }
2176             }
2177         }
2178 
2179         /**
2180          * The node representation for the trie.
2181          * @xsl.usage internal
2182          */
2183         private class Node
2184         {
2185 
2186             /**
2187              * Constructor, creates a Node[ALPHA_SIZE].
2188              */
2189             Node()
2190             {
2191                 m_nextChar = new Node[ALPHA_SIZE];
2192                 m_Value = null;
2193             }
2194 
2195             /** The next nodes.   */
2196             final Node m_nextChar[];
2197 
2198             /** The value.   */
2199             Object m_Value;
2200         }
2201         /**
2202          * Construct the trie from another Trie.
2203          * Both the existing Trie and this new one share the same table for
2204          * lookup, and it is assumed that the table is fully populated and
2205          * not changing anymore.
2206          *
2207          * @param existingTrie the Trie that this one is a copy of.
2208          */
2209         public Trie(Trie existingTrie)
2210         {
2211             // copy some fields from the existing Trie into this one.
2212             m_Root = existingTrie.m_Root;
2213             m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2214 
2215             // get a buffer just big enough to hold the longest key in the table.
2216             int max = existingTrie.getLongestKeyLength();
2217             m_charBuffer = new char[max];
2218         }
2219 
2220         /**
2221          * Get an object that matches the key.
2222          * This method is faster than get(), but is not thread-safe.
2223          *
2224          * @param key must be a 7-bit ASCII string
2225          *
2226          * @return The object that matches the key, or null.
2227          */
2228         public Object get2(final String key)
2229         {
2230 
2231             final int len = key.length();
2232 
2233             /* If the name is too long, we won't find it, this also keeps us
2234              * from overflowing m_charBuffer
2235              */
2236             if (m_charBuffer.length < len)
2237                 return null;
2238 
2239             Node node = m_Root;
2240             switch (len) // optimize the look up based on the number of chars
2241             {
2242                 // case 0 looks silly, but the generated bytecode runs
2243                 // faster for lookup of elements of length 2 with this in
2244                 // and a fair bit faster.  Don't know why.
2245                 case 0 :
2246                     {
2247                         return null;
2248                     }
2249 
2250                 case 1 :
2251                     {
2252                         final char ch = key.charAt(0);
2253                         if (ch < ALPHA_SIZE)
2254                         {
2255                             node = node.m_nextChar[ch];
2256                             if (node != null)
2257                                 return node.m_Value;
2258                         }
2259                         return null;
2260                     }
2261                 default :
2262                     {
2263                         /* Copy string into array. This is not thread-safe because
2264                          * it modifies the contents of m_charBuffer. If multiple
2265                          * threads were to use this Trie they all would be
2266                          * using this same array (not good). So this
2267                          * method is not thread-safe, but it is faster because
2268                          * converting to a char[] and looping over elements of
2269                          * the array is faster than a String's charAt(i).
2270                          */
2271                         key.getChars(0, len, m_charBuffer, 0);
2272 
2273                         for (int i = 0; i < len; i++)
2274                         {
2275                             final char ch = m_charBuffer[i];
2276                             if (ALPHA_SIZE <= ch)
2277                             {
2278                                 // the key is not 7-bit ASCII so we won't find it here
2279                                 return null;
2280                             }
2281 
2282                             node = node.m_nextChar[ch];
2283                             if (node == null)
2284                                 return null;
2285                         }
2286 
2287                         return node.m_Value;
2288                     }
2289             }
2290         }
2291 
2292         /**
2293          * Get the length of the longest key used in the table.
2294          */
2295         public int getLongestKeyLength()
2296         {
2297             return m_charBuffer.length;
2298         }
2299     }
2300 }