1 /*
   2  * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Copyright 2001-2004 The Apache Software Foundation.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19 /*
  20  * $Id: ToHTMLStream.java,v 1.2.4.1 2005/09/15 08:15:26 suresh_emailid Exp $
  21  */
  22 package com.sun.org.apache.xml.internal.serializer;
  23 
  24 import java.io.IOException;
  25 import java.io.OutputStream;
  26 import java.io.UnsupportedEncodingException;
  27 import java.util.Properties;
  28 
  29 import javax.xml.transform.Result;
  30 
  31 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
  32 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
  33 import org.xml.sax.Attributes;
  34 import org.xml.sax.SAXException;
  35 
  36 /**
  37  * This serializer takes a series of SAX or
  38  * SAX-like events and writes its output
  39  * to the given stream.
  40  *
  41  * This class is not a public API, it is public
  42  * because it is used from another package.
  43  *
  44  * @xsl.usage internal
  45  * @LastModified: Sept 2018
  46  */
  47 public final class ToHTMLStream extends ToStream
  48 {
  49 
  50     /** This flag is set while receiving events from the DTD */
  51     protected boolean m_inDTD = false;
  52 
  53     /** True if the current element is a block element.  (seems like
  54      *  this needs to be a stack. -sb). */
  55     private boolean m_inBlockElem = false;
  56 
  57     /**
  58      * Map that tells which XML characters should have special treatment, and it
  59      *  provides character to entity name lookup.
  60      */
  61     private static final CharInfo m_htmlcharInfo =
  62 //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
  63         CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
  64 
  65     /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
  66     static final Trie m_elementFlags = new Trie();
  67 
  68     static {
  69         initTagReference(m_elementFlags);
  70     }
  71     static void initTagReference(Trie m_elementFlags) {
  72 
  73         // HTML 4.0 loose DTD
  74         m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
  75         m_elementFlags.put(
  76             "FRAME",
  77             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  78         m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
  79         m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
  80         m_elementFlags.put(
  81             "ISINDEX",
  82             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  83         m_elementFlags.put(
  84             "APPLET",
  85             new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
  86         m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
  87         m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
  88         m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
  89 
  90         // HTML 4.0 strict DTD
  91         m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  92         m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  93         m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  94         m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  95         m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  96         m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
  97         m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
  98         m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
  99         m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
 100         m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
 101         m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
 102         m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
 103         m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
 104         m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
 105         m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
 106         m_elementFlags.put(
 107             "SUP",
 108             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 109         m_elementFlags.put(
 110             "SUB",
 111             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 112         m_elementFlags.put(
 113             "SPAN",
 114             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 115         m_elementFlags.put(
 116             "BDO",
 117             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 118         m_elementFlags.put(
 119             "BR",
 120             new ElemDesc(
 121                 0
 122                     | ElemDesc.SPECIAL
 123                     | ElemDesc.ASPECIAL
 124                     | ElemDesc.EMPTY
 125                     | ElemDesc.BLOCK));
 126         m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
 127         m_elementFlags.put(
 128             "ADDRESS",
 129             new ElemDesc(
 130                 0
 131                     | ElemDesc.BLOCK
 132                     | ElemDesc.BLOCKFORM
 133                     | ElemDesc.BLOCKFORMFIELDSET));
 134         m_elementFlags.put(
 135             "DIV",
 136             new ElemDesc(
 137                 0
 138                     | ElemDesc.BLOCK
 139                     | ElemDesc.BLOCKFORM
 140                     | ElemDesc.BLOCKFORMFIELDSET));
 141         m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
 142         m_elementFlags.put(
 143             "MAP",
 144             new ElemDesc(
 145                 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
 146         m_elementFlags.put(
 147             "AREA",
 148             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 149         m_elementFlags.put(
 150             "LINK",
 151             new ElemDesc(
 152                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 153         m_elementFlags.put(
 154             "IMG",
 155             new ElemDesc(
 156                 0
 157                     | ElemDesc.SPECIAL
 158                     | ElemDesc.ASPECIAL
 159                     | ElemDesc.EMPTY
 160                     | ElemDesc.WHITESPACESENSITIVE));
 161         m_elementFlags.put(
 162             "OBJECT",
 163             new ElemDesc(
 164                 0
 165                     | ElemDesc.SPECIAL
 166                     | ElemDesc.ASPECIAL
 167                     | ElemDesc.HEADMISC
 168                     | ElemDesc.WHITESPACESENSITIVE));
 169         m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
 170         m_elementFlags.put(
 171             "HR",
 172             new ElemDesc(
 173                 0
 174                     | ElemDesc.BLOCK
 175                     | ElemDesc.BLOCKFORM
 176                     | ElemDesc.BLOCKFORMFIELDSET
 177                     | ElemDesc.EMPTY));
 178         m_elementFlags.put(
 179             "P",
 180             new ElemDesc(
 181                 0
 182                     | ElemDesc.BLOCK
 183                     | ElemDesc.BLOCKFORM
 184                     | ElemDesc.BLOCKFORMFIELDSET));
 185         m_elementFlags.put(
 186             "H1",
 187             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 188         m_elementFlags.put(
 189             "H2",
 190             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 191         m_elementFlags.put(
 192             "H3",
 193             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 194         m_elementFlags.put(
 195             "H4",
 196             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 197         m_elementFlags.put(
 198             "H5",
 199             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 200         m_elementFlags.put(
 201             "H6",
 202             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 203         m_elementFlags.put(
 204             "PRE",
 205             new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
 206         m_elementFlags.put(
 207             "Q",
 208             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 209         m_elementFlags.put(
 210             "BLOCKQUOTE",
 211             new ElemDesc(
 212                 0
 213                     | ElemDesc.BLOCK
 214                     | ElemDesc.BLOCKFORM
 215                     | ElemDesc.BLOCKFORMFIELDSET));
 216         m_elementFlags.put("INS", new ElemDesc(0));
 217         m_elementFlags.put("DEL", new ElemDesc(0));
 218         m_elementFlags.put(
 219             "DL",
 220             new ElemDesc(
 221                 0
 222                     | ElemDesc.BLOCK
 223                     | ElemDesc.BLOCKFORM
 224                     | ElemDesc.BLOCKFORMFIELDSET));
 225         m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
 226         m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
 227         m_elementFlags.put(
 228             "OL",
 229             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 230         m_elementFlags.put(
 231             "UL",
 232             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 233         m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
 234         m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
 235         m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
 236         m_elementFlags.put(
 237             "INPUT",
 238             new ElemDesc(
 239                 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
 240         m_elementFlags.put(
 241             "SELECT",
 242             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 243         m_elementFlags.put("OPTGROUP", new ElemDesc(0));
 244         m_elementFlags.put("OPTION", new ElemDesc(0));
 245         m_elementFlags.put(
 246             "TEXTAREA",
 247             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 248         m_elementFlags.put(
 249             "FIELDSET",
 250             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
 251         m_elementFlags.put("LEGEND", new ElemDesc(0));
 252         m_elementFlags.put(
 253             "BUTTON",
 254             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 255         m_elementFlags.put(
 256             "TABLE",
 257             new ElemDesc(
 258                 0
 259                     | ElemDesc.BLOCK
 260                     | ElemDesc.BLOCKFORM
 261                     | ElemDesc.BLOCKFORMFIELDSET));
 262         m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
 263         m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
 264         m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
 265         m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
 266         m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
 267         m_elementFlags.put(
 268             "COL",
 269             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 270         m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
 271         m_elementFlags.put("TH", new ElemDesc(0));
 272         m_elementFlags.put("TD", new ElemDesc(0));
 273         m_elementFlags.put(
 274             "HEAD",
 275             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
 276         m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
 277         m_elementFlags.put(
 278             "BASE",
 279             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 280         m_elementFlags.put(
 281             "META",
 282             new ElemDesc(
 283                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 284         m_elementFlags.put(
 285             "STYLE",
 286             new ElemDesc(
 287                 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
 288         m_elementFlags.put(
 289             "SCRIPT",
 290             new ElemDesc(
 291                 0
 292                     | ElemDesc.SPECIAL
 293                     | ElemDesc.ASPECIAL
 294                     | ElemDesc.HEADMISC
 295                     | ElemDesc.RAW));
 296         m_elementFlags.put(
 297             "NOSCRIPT",
 298             new ElemDesc(
 299                 0
 300                     | ElemDesc.BLOCK
 301                     | ElemDesc.BLOCKFORM
 302                     | ElemDesc.BLOCKFORMFIELDSET));
 303         m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
 304 
 305         // From "John Ky" <hand@syd.speednet.com.au
 306         // Transitional Document Type Definition ()
 307         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
 308         m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 309 
 310         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
 311         m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 312         m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 313 
 314         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
 315         m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 316 
 317         // From "John Ky" <hand@syd.speednet.com.au
 318         m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 319 
 320         // HTML 4.0, section 16.5
 321         m_elementFlags.put(
 322             "IFRAME",
 323             new ElemDesc(
 324                 0
 325                     | ElemDesc.BLOCK
 326                     | ElemDesc.BLOCKFORM
 327                     | ElemDesc.BLOCKFORMFIELDSET));
 328 
 329         // Netscape 4 extension
 330         m_elementFlags.put(
 331             "LAYER",
 332             new ElemDesc(
 333                 0
 334                     | ElemDesc.BLOCK
 335                     | ElemDesc.BLOCKFORM
 336                     | ElemDesc.BLOCKFORMFIELDSET));
 337         // Netscape 4 extension
 338         m_elementFlags.put(
 339             "ILAYER",
 340             new ElemDesc(
 341                 0
 342                     | ElemDesc.BLOCK
 343                     | ElemDesc.BLOCKFORM
 344                     | ElemDesc.BLOCKFORMFIELDSET));
 345 
 346 
 347         // NOW FOR ATTRIBUTE INFORMATION . . .
 348         ElemDesc elemDesc;
 349 
 350 
 351         // ----------------------------------------------
 352         elemDesc = (ElemDesc) m_elementFlags.get("A");
 353         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 354         elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
 355 
 356         // ----------------------------------------------
 357         elemDesc = (ElemDesc) m_elementFlags.get("AREA");
 358         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 359         elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
 360 
 361         // ----------------------------------------------
 362         elemDesc = (ElemDesc) m_elementFlags.get("BASE");
 363         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 364 
 365         // ----------------------------------------------
 366         elemDesc = (ElemDesc) m_elementFlags.get("BUTTON");
 367         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 368 
 369         // ----------------------------------------------
 370         elemDesc = (ElemDesc) m_elementFlags.get("BLOCKQUOTE");
 371         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 372 
 373         // ----------------------------------------------
 374         elemDesc = (ElemDesc) m_elementFlags.get("DEL");
 375         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 376 
 377         // ----------------------------------------------
 378         elemDesc = (ElemDesc) m_elementFlags.get("DIR");
 379         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 380 
 381         // ----------------------------------------------
 382 
 383         elemDesc = (ElemDesc) m_elementFlags.get("DIV");
 384         elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
 385         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 386 
 387         // ----------------------------------------------
 388         elemDesc = (ElemDesc) m_elementFlags.get("DL");
 389         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 390 
 391         // ----------------------------------------------
 392         elemDesc = (ElemDesc) m_elementFlags.get("FORM");
 393         elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
 394 
 395         // ----------------------------------------------
 396         // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
 397         elemDesc = (ElemDesc) m_elementFlags.get("FRAME");
 398         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 399         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 400         elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
 401 
 402         // ----------------------------------------------
 403         elemDesc = (ElemDesc) m_elementFlags.get("HEAD");
 404         elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
 405 
 406         // ----------------------------------------------
 407         elemDesc = (ElemDesc) m_elementFlags.get("HR");
 408         elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
 409 
 410         // ----------------------------------------------
 411         // HTML 4.0, section 16.5
 412         elemDesc = (ElemDesc) m_elementFlags.get("IFRAME");
 413         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 414         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 415 
 416         // ----------------------------------------------
 417         // Netscape 4 extension
 418         elemDesc = (ElemDesc) m_elementFlags.get("ILAYER");
 419         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 420 
 421         // ----------------------------------------------
 422         elemDesc = (ElemDesc) m_elementFlags.get("IMG");
 423         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 424         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 425         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 426         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 427 
 428         // ----------------------------------------------
 429         elemDesc = (ElemDesc) m_elementFlags.get("INPUT");
 430         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 431         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 432         elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
 433         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 434         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 435         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 436 
 437         // ----------------------------------------------
 438         elemDesc = (ElemDesc) m_elementFlags.get("INS");
 439         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 440 
 441         // ----------------------------------------------
 442         // Netscape 4 extension
 443         elemDesc = (ElemDesc) m_elementFlags.get("LAYER");
 444         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 445 
 446         // ----------------------------------------------
 447         elemDesc = (ElemDesc) m_elementFlags.get("LINK");
 448         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 449 
 450         // ----------------------------------------------
 451         elemDesc = (ElemDesc) m_elementFlags.get("MENU");
 452         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 453 
 454         // ----------------------------------------------
 455         elemDesc = (ElemDesc) m_elementFlags.get("OBJECT");
 456         elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
 457         elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
 458         elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
 459         elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
 460         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 461         elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
 462 
 463         // ----------------------------------------------
 464         elemDesc = (ElemDesc) m_elementFlags.get("OL");
 465         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 466 
 467         // ----------------------------------------------
 468         elemDesc = (ElemDesc) m_elementFlags.get("OPTGROUP");
 469         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 470 
 471         // ----------------------------------------------
 472         elemDesc = (ElemDesc) m_elementFlags.get("OPTION");
 473         elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
 474         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 475 
 476         // ----------------------------------------------
 477         elemDesc = (ElemDesc) m_elementFlags.get("Q");
 478         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 479 
 480         // ----------------------------------------------
 481         elemDesc = (ElemDesc) m_elementFlags.get("SCRIPT");
 482         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 483         elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
 484         elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
 485 
 486         // ----------------------------------------------
 487         elemDesc = (ElemDesc) m_elementFlags.get("SELECT");
 488         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 489         elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
 490 
 491         // ----------------------------------------------
 492         elemDesc = (ElemDesc) m_elementFlags.get("TABLE");
 493         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 494 
 495         // ----------------------------------------------
 496         elemDesc = (ElemDesc) m_elementFlags.get("TD");
 497         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 498 
 499         // ----------------------------------------------
 500         elemDesc = (ElemDesc) m_elementFlags.get("TEXTAREA");
 501         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 502         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 503 
 504         // ----------------------------------------------
 505         elemDesc = (ElemDesc) m_elementFlags.get("TH");
 506         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 507 
 508         // ----------------------------------------------
 509         // The nowrap attribute of a tr element is both
 510         // a Netscape and Internet-Explorer extension
 511         elemDesc = (ElemDesc) m_elementFlags.get("TR");
 512         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 513 
 514         // ----------------------------------------------
 515         elemDesc = (ElemDesc) m_elementFlags.get("UL");
 516         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 517     }
 518 
 519     /**
 520      * Dummy element for elements not found.
 521      */
 522     static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
 523 
 524     /** True if URLs should be specially escaped with the %xx form. */
 525     private boolean m_specialEscapeURLs = true;
 526 
 527     /** True if the META tag should be omitted. */
 528     private boolean m_omitMetaTag = false;
 529 
 530     /**
 531      * Tells if the formatter should use special URL escaping.
 532      *
 533      * @param bool True if URLs should be specially escaped with the %xx form.
 534      */
 535     public void setSpecialEscapeURLs(boolean bool)
 536     {
 537         m_specialEscapeURLs = bool;
 538     }
 539 
 540     /**
 541      * Tells if the formatter should omit the META tag.
 542      *
 543      * @param bool True if the META tag should be omitted.
 544      */
 545     public void setOmitMetaTag(boolean bool)
 546     {
 547         m_omitMetaTag = bool;
 548     }
 549 
 550     /**
 551      * Specifies an output format for this serializer. It the
 552      * serializer has already been associated with an output format,
 553      * it will switch to the new format. This method should not be
 554      * called while the serializer is in the process of serializing
 555      * a document.
 556      *
 557      * This method can be called multiple times before starting
 558      * the serialization of a particular result-tree. In principle
 559      * all serialization parameters can be changed, with the exception
 560      * of method="html" (it must be method="html" otherwise we
 561      * shouldn't even have a ToHTMLStream object here!)
 562      *
 563      * @param format The output format or serialzation parameters
 564      * to use.
 565      */
 566     public void setOutputFormat(Properties format)
 567     {
 568 
 569         m_specialEscapeURLs =
 570             OutputPropertyUtils.getBooleanProperty(
 571                 OutputPropertiesFactory.S_USE_URL_ESCAPING,
 572                 format);
 573 
 574         m_omitMetaTag =
 575             OutputPropertyUtils.getBooleanProperty(
 576                 OutputPropertiesFactory.S_OMIT_META_TAG,
 577                 format);
 578 
 579         super.setOutputFormat(format);
 580     }
 581 
 582     /**
 583      * Tells if the formatter should use special URL escaping.
 584      *
 585      * @return True if URLs should be specially escaped with the %xx form.
 586      */
 587     private final boolean getSpecialEscapeURLs()
 588     {
 589         return m_specialEscapeURLs;
 590     }
 591 
 592     /**
 593      * Tells if the formatter should omit the META tag.
 594      *
 595      * @return True if the META tag should be omitted.
 596      */
 597     private final boolean getOmitMetaTag()
 598     {
 599         return m_omitMetaTag;
 600     }
 601 
 602     /**
 603      * Get a description of the given element.
 604      *
 605      * @param name non-null name of element, case insensitive.
 606      *
 607      * @return non-null reference to ElemDesc, which may be m_dummy if no
 608      *         element description matches the given name.
 609      */
 610     public static final ElemDesc getElemDesc(String name)
 611     {
 612         /* this method used to return m_dummy  when name was null
 613          * but now it doesn't check and and requires non-null name.
 614          */
 615         Object obj = m_elementFlags.get(name);
 616         if (null != obj)
 617             return (ElemDesc)obj;
 618         return m_dummy;
 619     }
 620 
 621     /**
 622      * A Trie that is just a copy of the "static" one.
 623      * We need this one to be able to use the faster, but not thread-safe
 624      * method Trie.get2(name)
 625      */
 626     private Trie m_htmlInfo = new Trie(m_elementFlags);
 627     /**
 628      * Calls to this method could be replaced with calls to
 629      * getElemDesc(name), but this one should be faster.
 630      */
 631     private ElemDesc getElemDesc2(String name)
 632     {
 633         Object obj = m_htmlInfo.get2(name);
 634         if (null != obj)
 635             return (ElemDesc)obj;
 636         return m_dummy;
 637     }
 638 
 639     /**
 640      * Default constructor.
 641      */
 642     public ToHTMLStream()
 643     {
 644 
 645         super();
 646         m_charInfo = m_htmlcharInfo;
 647         // initialize namespaces
 648         m_prefixMap = new NamespaceMappings();
 649 
 650     }
 651 
 652     /** The name of the current element. */
 653 //    private String m_currentElementName = null;
 654 
 655     /**
 656      * Receive notification of the beginning of a document.
 657      *
 658      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 659      *            wrapping another exception.
 660      *
 661      * @throws org.xml.sax.SAXException
 662      */
 663     protected void startDocumentInternal() throws org.xml.sax.SAXException
 664     {
 665         super.startDocumentInternal();
 666 
 667         m_needToCallStartDocument = false;
 668         m_needToOutputDocTypeDecl = true;
 669         m_startNewLine = false;
 670         setOmitXMLDeclaration(true);
 671 
 672         if (true == m_needToOutputDocTypeDecl)
 673         {
 674             String doctypeSystem = getDoctypeSystem();
 675             String doctypePublic = getDoctypePublic();
 676             if ((null != doctypeSystem) || (null != doctypePublic))
 677             {
 678                 final java.io.Writer writer = m_writer;
 679                 try
 680                 {
 681                 writer.write("<!DOCTYPE html");
 682 
 683                 if (null != doctypePublic)
 684                 {
 685                     writer.write(" PUBLIC \"");
 686                     writer.write(doctypePublic);
 687                     writer.write('"');
 688                 }
 689 
 690                 if (null != doctypeSystem)
 691                 {
 692                     if (null == doctypePublic)
 693                         writer.write(" SYSTEM \"");
 694                     else
 695                         writer.write(" \"");
 696 
 697                     writer.write(doctypeSystem);
 698                     writer.write('"');
 699                 }
 700 
 701                 writer.write('>');
 702                 outputLineSep();
 703                 }
 704                 catch(IOException e)
 705                 {
 706                     throw new SAXException(e);
 707                 }
 708             }
 709         }
 710 
 711         m_needToOutputDocTypeDecl = false;
 712     }
 713 
 714     /**
 715      * Receive notification of the end of a document.
 716      *
 717      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 718      *            wrapping another exception.
 719      *
 720      * @throws org.xml.sax.SAXException
 721      */
 722     public final void endDocument() throws org.xml.sax.SAXException
 723     {
 724 
 725         flushPending();
 726         if (m_doIndent && !m_isprevtext)
 727         {
 728             try
 729             {
 730             outputLineSep();
 731             }
 732             catch(IOException e)
 733             {
 734                 throw new SAXException(e);
 735             }
 736         }
 737 
 738         flushWriter();
 739         if (m_tracer != null)
 740             super.fireEndDoc();
 741     }
 742 
 743     /**
 744      *  Receive notification of the beginning of an element.
 745      *
 746      *
 747      *  @param namespaceURI
 748      *  @param localName
 749      *  @param name The element type name.
 750      *  @param atts The attributes attached to the element, if any.
 751      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
 752      *             wrapping another exception.
 753      *  @see #endElement
 754      *  @see org.xml.sax.AttributeList
 755      */
 756     public void startElement(
 757         String namespaceURI,
 758         String localName,
 759         String name,
 760         Attributes atts)
 761         throws org.xml.sax.SAXException
 762     {
 763 
 764         ElemContext elemContext = m_elemContext;
 765 
 766         // clean up any pending things first
 767         if (elemContext.m_startTagOpen)
 768         {
 769             closeStartTag();
 770             elemContext.m_startTagOpen = false;
 771         }
 772         else if (m_cdataTagOpen)
 773         {
 774             closeCDATA();
 775             m_cdataTagOpen = false;
 776         }
 777         else if (m_needToCallStartDocument)
 778         {
 779             startDocumentInternal();
 780             m_needToCallStartDocument = false;
 781         }
 782 
 783 
 784         // if this element has a namespace then treat it like XML
 785         if (null != namespaceURI && namespaceURI.length() > 0)
 786         {
 787             super.startElement(namespaceURI, localName, name, atts);
 788 
 789             return;
 790         }
 791 
 792         try
 793         {
 794             // getElemDesc2(name) is faster than getElemDesc(name)
 795             ElemDesc elemDesc = getElemDesc2(name);
 796             int elemFlags = elemDesc.getFlags();
 797 
 798             // deal with indentation issues first
 799             if (m_doIndent)
 800             {
 801 
 802                 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
 803                 if (m_ispreserve)
 804                     m_ispreserve = false;
 805                 else if (
 806                     (null != elemContext.m_elementName)
 807                     && (!m_inBlockElem
 808                         || isBlockElement) /* && !isWhiteSpaceSensitive */
 809                     )
 810                 {
 811                     m_startNewLine = true;
 812 
 813                     indent();
 814 
 815                 }
 816                 m_inBlockElem = !isBlockElement;
 817             }
 818 
 819             // save any attributes for later processing
 820             if (atts != null)
 821                 addAttributes(atts);
 822 
 823             m_isprevtext = false;
 824             final java.io.Writer writer = m_writer;
 825             writer.write('<');
 826             writer.write(name);
 827 
 828 
 829 
 830             if (m_tracer != null)
 831                 firePseudoAttributes();
 832 
 833             if ((elemFlags & ElemDesc.EMPTY) != 0)
 834             {
 835                 // an optimization for elements which are expected
 836                 // to be empty.
 837                 m_elemContext = elemContext.push();
 838                 /* XSLTC sometimes calls namespaceAfterStartElement()
 839                  * so we need to remember the name
 840                  */
 841                 m_elemContext.m_elementName = name;
 842                 m_elemContext.m_elementDesc = elemDesc;
 843                 return;
 844             }
 845             else
 846             {
 847                 elemContext = elemContext.push(namespaceURI,localName,name);
 848                 m_elemContext = elemContext;
 849                 elemContext.m_elementDesc = elemDesc;
 850                 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
 851             }
 852 
 853 
 854             if ((elemFlags & ElemDesc.HEADELEM) != 0)
 855             {
 856                 // This is the <HEAD> element, do some special processing
 857                 closeStartTag();
 858                 elemContext.m_startTagOpen = false;
 859                 if (!m_omitMetaTag)
 860                 {
 861                     if (m_doIndent)
 862                         indent();
 863                     writer.write(
 864                         "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
 865                     String encoding = getEncoding();
 866                     String encode = Encodings.getMimeEncoding(encoding);
 867                     writer.write(encode);
 868                     writer.write("\">");
 869                 }
 870             }
 871         }
 872         catch (IOException e)
 873         {
 874             throw new SAXException(e);
 875         }
 876     }
 877 
 878     /**
 879      *  Receive notification of the end of an element.
 880      *
 881      *
 882      *  @param namespaceURI
 883      *  @param localName
 884      *  @param name The element type name
 885      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
 886      *             wrapping another exception.
 887      */
 888     public final void endElement(
 889         final String namespaceURI,
 890         final String localName,
 891         final String name)
 892         throws org.xml.sax.SAXException
 893     {
 894         // deal with any pending issues
 895         if (m_cdataTagOpen)
 896             closeCDATA();
 897 
 898         // if the element has a namespace, treat it like XML, not HTML
 899         if (null != namespaceURI && namespaceURI.length() > 0)
 900         {
 901             super.endElement(namespaceURI, localName, name);
 902 
 903             return;
 904         }
 905 
 906         try
 907         {
 908 
 909             ElemContext elemContext = m_elemContext;
 910             final ElemDesc elemDesc = elemContext.m_elementDesc;
 911             final int elemFlags = elemDesc.getFlags();
 912             final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
 913 
 914             // deal with any indentation issues
 915             if (m_doIndent)
 916             {
 917                 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
 918                 boolean shouldIndent = false;
 919 
 920                 if (m_ispreserve)
 921                 {
 922                     m_ispreserve = false;
 923                 }
 924                 else if (m_doIndent && (!m_inBlockElem || isBlockElement))
 925                 {
 926                     m_startNewLine = true;
 927                     shouldIndent = true;
 928                 }
 929                 if (!elemContext.m_startTagOpen && shouldIndent)
 930                     indent(elemContext.m_currentElemDepth - 1);
 931                 m_inBlockElem = !isBlockElement;
 932             }
 933 
 934             final java.io.Writer writer = m_writer;
 935             if (!elemContext.m_startTagOpen)
 936             {
 937                 writer.write("</");
 938                 writer.write(name);
 939                 writer.write('>');
 940             }
 941             else
 942             {
 943                 // the start-tag open when this method was called,
 944                 // so we need to process it now.
 945 
 946                 if (m_tracer != null)
 947                     super.fireStartElem(name);
 948 
 949                 // the starting tag was still open when we received this endElement() call
 950                 // so we need to process any gathered attributes NOW, before they go away.
 951                 int nAttrs = m_attributes.getLength();
 952                 if (nAttrs > 0)
 953                 {
 954                     processAttributes(m_writer, nAttrs);
 955                     // clear attributes object for re-use with next element
 956                     m_attributes.clear();
 957                 }
 958                 if (!elemEmpty)
 959                 {
 960                     // As per Dave/Paul recommendation 12/06/2000
 961                     // if (shouldIndent)
 962                     // writer.write('>');
 963                     //  indent(m_currentIndent);
 964 
 965                     writer.write("></");
 966                     writer.write(name);
 967                     writer.write('>');
 968                 }
 969                 else
 970                 {
 971                     writer.write('>');
 972                 }
 973             }
 974 
 975             // clean up because the element has ended
 976             if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
 977                 m_ispreserve = true;
 978             m_isprevtext = false;
 979 
 980             // fire off the end element event
 981             if (m_tracer != null)
 982                 super.fireEndElem(name);
 983 
 984             // OPTIMIZE-EMPTY
 985             if (elemEmpty)
 986             {
 987                 // a quick exit if the HTML element had no children.
 988                 // This block of code can be removed if the corresponding block of code
 989                 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
 990                 m_elemContext = elemContext.m_prev;
 991                 return;
 992             }
 993 
 994             // some more clean because the element has ended.
 995             if (!elemContext.m_startTagOpen)
 996             {
 997                 if (m_doIndent && !m_preserves.isEmpty())
 998                     m_preserves.pop();
 999             }
1000             m_elemContext = elemContext.m_prev;
1001 //            m_isRawStack.pop();
1002         }
1003         catch (IOException e)
1004         {
1005             throw new SAXException(e);
1006         }
1007     }
1008 
1009     /**
1010      * Process an attribute.
1011      * @param   writer The writer to write the processed output to.
1012      * @param   name   The name of the attribute.
1013      * @param   value   The value of the attribute.
1014      * @param   elemDesc The description of the HTML element
1015      *           that has this attribute.
1016      *
1017      * @throws org.xml.sax.SAXException
1018      */
1019     protected void processAttribute(
1020         java.io.Writer writer,
1021         String name,
1022         String value,
1023         ElemDesc elemDesc)
1024         throws IOException, SAXException
1025     {
1026         writer.write(' ');
1027 
1028         if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1029             && elemDesc != null
1030             && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1031         {
1032             writer.write(name);
1033         }
1034         else
1035         {
1036             // %REVIEW% %OPT%
1037             // Two calls to single-char write may NOT
1038             // be more efficient than one to string-write...
1039             writer.write(name);
1040             writer.write("=\"");
1041             if (   elemDesc != null
1042                 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1043                 writeAttrURI(writer, value, m_specialEscapeURLs);
1044             else
1045                 writeAttrString(writer, value, this.getEncoding());
1046             writer.write('"');
1047 
1048         }
1049     }
1050 
1051     /**
1052      * Tell if a character is an ASCII digit.
1053      */
1054     private boolean isASCIIDigit(char c)
1055     {
1056         return (c >= '0' && c <= '9');
1057     }
1058 
1059     /**
1060      * Make an integer into an HH hex value.
1061      * Does no checking on the size of the input, since this
1062      * is only meant to be used locally by writeAttrURI.
1063      *
1064      * @param i must be a value less than 255.
1065      *
1066      * @return should be a two character string.
1067      */
1068     private static String makeHHString(int i)
1069     {
1070         String s = Integer.toHexString(i).toUpperCase();
1071         if (s.length() == 1)
1072         {
1073             s = "0" + s;
1074         }
1075         return s;
1076     }
1077 
1078     /**
1079     * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1080     * @param str must be 2 characters long
1081     *
1082     * @return true or false
1083     */
1084     private boolean isHHSign(String str)
1085     {
1086         boolean sign = true;
1087         try
1088         {
1089             char r = (char) Integer.parseInt(str, 16);
1090         }
1091         catch (NumberFormatException e)
1092         {
1093             sign = false;
1094         }
1095         return sign;
1096     }
1097 
1098     /**
1099      * Write the specified <var>string</var> after substituting non ASCII characters,
1100      * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1101      *
1102      * @param   string      String to convert to XML format.
1103      * @param doURLEscaping True if we should try to encode as
1104      *                      per http://www.ietf.org/rfc/rfc2396.txt.
1105      *
1106      * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1107      */
1108     public void writeAttrURI(
1109         final java.io.Writer writer, String string, boolean doURLEscaping)
1110         throws IOException
1111     {
1112         // http://www.ietf.org/rfc/rfc2396.txt says:
1113         // A URI is always in an "escaped" form, since escaping or unescaping a
1114         // completed URI might change its semantics.  Normally, the only time
1115         // escape encodings can safely be made is when the URI is being created
1116         // from its component parts; each component may have its own set of
1117         // characters that are reserved, so only the mechanism responsible for
1118         // generating or interpreting that component can determine whether or
1119         // not escaping a character will change its semantics. Likewise, a URI
1120         // must be separated into its components before the escaped characters
1121         // within those components can be safely decoded.
1122         //
1123         // ...So we do our best to do limited escaping of the URL, without
1124         // causing damage.  If the URL is already properly escaped, in theory, this
1125         // function should not change the string value.
1126 
1127         final int end = string.length();
1128         if (end > m_attrBuff.length)
1129         {
1130            m_attrBuff = new char[end*2 + 1];
1131         }
1132         string.getChars(0,end, m_attrBuff, 0);
1133         final char[] chars = m_attrBuff;
1134 
1135         int cleanStart = 0;
1136         int cleanLength = 0;
1137 
1138 
1139         char ch = 0;
1140         for (int i = 0; i < end; i++)
1141         {
1142             ch = chars[i];
1143 
1144             if ((ch < 32) || (ch > 126))
1145             {
1146                 if (cleanLength > 0)
1147                 {
1148                     writer.write(chars, cleanStart, cleanLength);
1149                     cleanLength = 0;
1150                 }
1151                 if (doURLEscaping)
1152                 {
1153                     // Encode UTF16 to UTF8.
1154                     // Reference is Unicode, A Primer, by Tony Graham.
1155                     // Page 92.
1156 
1157                     // Note that Kay doesn't escape 0x20...
1158                     //  if(ch == 0x20) // Not sure about this... -sb
1159                     //  {
1160                     //    writer.write(ch);
1161                     //  }
1162                     //  else
1163                     if (ch <= 0x7F)
1164                     {
1165                         writer.write('%');
1166                         writer.write(makeHHString(ch));
1167                     }
1168                     else if (ch <= 0x7FF)
1169                     {
1170                         // Clear low 6 bits before rotate, put high 4 bits in low byte,
1171                         // and set two high bits.
1172                         int high = (ch >> 6) | 0xC0;
1173                         int low = (ch & 0x3F) | 0x80;
1174                         // First 6 bits, + high bit
1175                         writer.write('%');
1176                         writer.write(makeHHString(high));
1177                         writer.write('%');
1178                         writer.write(makeHHString(low));
1179                     }
1180                     else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1181                     {
1182                         // I'm sure this can be done in 3 instructions, but I choose
1183                         // to try and do it exactly like it is done in the book, at least
1184                         // until we are sure this is totally clean.  I don't think performance
1185                         // is a big issue with this particular function, though I could be
1186                         // wrong.  Also, the stuff below clearly does more masking than
1187                         // it needs to do.
1188 
1189                         // Clear high 6 bits.
1190                         int highSurrogate = ((int) ch) & 0x03FF;
1191 
1192                         // Middle 4 bits (wwww) + 1
1193                         // "Note that the value of wwww from the high surrogate bit pattern
1194                         // is incremented to make the uuuuu bit pattern in the scalar value
1195                         // so the surrogate pair don't address the BMP."
1196                         int wwww = ((highSurrogate & 0x03C0) >> 6);
1197                         int uuuuu = wwww + 1;
1198 
1199                         // next 4 bits
1200                         int zzzz = (highSurrogate & 0x003C) >> 2;
1201 
1202                         // low 2 bits
1203                         int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1204 
1205                         // Get low surrogate character.
1206                         ch = chars[++i];
1207 
1208                         // Clear high 6 bits.
1209                         int lowSurrogate = ((int) ch) & 0x03FF;
1210 
1211                         // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1212                         yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1213 
1214                         // bottom 6 bits.
1215                         int xxxxxx = (lowSurrogate & 0x003F);
1216 
1217                         int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1218                         int byte2 =
1219                             0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1220                         int byte3 = 0x80 | yyyyyy;
1221                         int byte4 = 0x80 | xxxxxx;
1222 
1223                         writer.write('%');
1224                         writer.write(makeHHString(byte1));
1225                         writer.write('%');
1226                         writer.write(makeHHString(byte2));
1227                         writer.write('%');
1228                         writer.write(makeHHString(byte3));
1229                         writer.write('%');
1230                         writer.write(makeHHString(byte4));
1231                     }
1232                     else
1233                     {
1234                         int high = (ch >> 12) | 0xE0; // top 4 bits
1235                         int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1236                         // middle 6 bits
1237                         int low = (ch & 0x3F) | 0x80;
1238                         // First 6 bits, + high bit
1239                         writer.write('%');
1240                         writer.write(makeHHString(high));
1241                         writer.write('%');
1242                         writer.write(makeHHString(middle));
1243                         writer.write('%');
1244                         writer.write(makeHHString(low));
1245                     }
1246 
1247                 }
1248                 else if (escapingNotNeeded(ch))
1249                 {
1250                     writer.write(ch);
1251                 }
1252                 else
1253                 {
1254                     writer.write("&#");
1255                     writer.write(Integer.toString(ch));
1256                     writer.write(';');
1257                 }
1258                 // In this character range we have first written out any previously accumulated
1259                 // "clean" characters, then processed the current more complicated character,
1260                 // which may have incremented "i".
1261                 // We now we reset the next possible clean character.
1262                 cleanStart = i + 1;
1263             }
1264             // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1265             // not allowing quotes in the URI proper syntax, nor in the fragment
1266             // identifier, we believe that it's OK to double escape quotes.
1267             else if (ch == '"')
1268             {
1269                 // If the character is a '%' number number, try to avoid double-escaping.
1270                 // There is a question if this is legal behavior.
1271 
1272                 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1273                 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1274 
1275                 //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1276 
1277                 // We are no longer escaping '%'
1278 
1279                 if (cleanLength > 0)
1280                 {
1281                     writer.write(chars, cleanStart, cleanLength);
1282                     cleanLength = 0;
1283                 }
1284 
1285 
1286                 // Mike Kay encodes this as ", so he may know something I don't?
1287                 if (doURLEscaping)
1288                     writer.write("%22");
1289                 else
1290                     writer.write("&quot;"); // we have to escape this, I guess.
1291 
1292                 // We have written out any clean characters, then the escaped '%' and now we
1293                 // We now we reset the next possible clean character.
1294                 cleanStart = i + 1;
1295             }
1296             else if (ch == '&')
1297             {
1298                 // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
1299                 // instead of "&" to avoid confusion with the beginning of a character
1300                 // reference (entity reference open delimiter).
1301                 if (cleanLength > 0)
1302                 {
1303                     writer.write(chars, cleanStart, cleanLength);
1304                     cleanLength = 0;
1305                 }
1306                 writer.write("&amp;");
1307                 cleanStart = i + 1;
1308             }
1309             else
1310             {
1311                 // no processing for this character, just count how
1312                 // many characters in a row that we have that need no processing
1313                 cleanLength++;
1314             }
1315         }
1316 
1317         // are there any clean characters at the end of the array
1318         // that we haven't processed yet?
1319         if (cleanLength > 1)
1320         {
1321             // if the whole string can be written out as-is do so
1322             // otherwise write out the clean chars at the end of the
1323             // array
1324             if (cleanStart == 0)
1325                 writer.write(string);
1326             else
1327                 writer.write(chars, cleanStart, cleanLength);
1328         }
1329         else if (cleanLength == 1)
1330         {
1331             // a little optimization for 1 clean character
1332             // (we could have let the previous if(...) handle them all)
1333             writer.write(ch);
1334         }
1335     }
1336 
1337     /**
1338      * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1339      * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1340      *
1341      * @param   string      String to convert to XML format.
1342      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1343      *
1344      * @throws org.xml.sax.SAXException
1345      */
1346     public void writeAttrString(
1347         final java.io.Writer writer, String string, String encoding)
1348         throws IOException, SAXException
1349     {
1350         final int end = string.length();
1351         if (end > m_attrBuff.length)
1352         {
1353             m_attrBuff = new char[end * 2 + 1];
1354         }
1355         string.getChars(0, end, m_attrBuff, 0);
1356         final char[] chars = m_attrBuff;
1357 
1358 
1359 
1360         int cleanStart = 0;
1361         int cleanLength = 0;
1362 
1363         char ch = 0;
1364         for (int i = 0; i < end; i++)
1365         {
1366             ch = chars[i];
1367 
1368             // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1369             // System.out.println("ch: "+(int)ch);
1370             // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1371             // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1372             if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
1373             {
1374                 cleanLength++;
1375             }
1376             else if ('<' == ch || '>' == ch)
1377             {
1378                 cleanLength++; // no escaping in this case, as specified in 15.2
1379             }
1380             else if (
1381                 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1382             {
1383                 cleanLength++; // no escaping in this case, as specified in 15.2
1384             }
1385             else
1386             {
1387                 if (cleanLength > 0)
1388                 {
1389                     writer.write(chars,cleanStart,cleanLength);
1390                     cleanLength = 0;
1391                 }
1392                 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1393 
1394                 if (i != pos)
1395                 {
1396                     i = pos - 1;
1397                 }
1398                 else
1399                 {
1400                     if (Encodings.isHighUTF16Surrogate(ch) ||
1401                             Encodings.isLowUTF16Surrogate(ch))
1402                     {
1403                         if (writeUTF16Surrogate(ch, chars, i, end) >= 0) {
1404                             // move the index if the low surrogate is consumed
1405                             // as writeUTF16Surrogate has written the pair
1406                             if (Encodings.isHighUTF16Surrogate(ch)) {
1407                                 i++;
1408                             }
1409                         }
1410                     }
1411 
1412                     // The next is kind of a hack to keep from escaping in the case
1413                     // of Shift_JIS and the like.
1414 
1415                     /*
1416                     else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1417                     && (ch != 160))
1418                     {
1419                     writer.write(ch);  // no escaping in this case
1420                     }
1421                     else
1422                     */
1423                     String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1424                     if (null != outputStringForChar)
1425                     {
1426                         writer.write(outputStringForChar);
1427                     }
1428                     else if (escapingNotNeeded(ch))
1429                     {
1430                         writer.write(ch); // no escaping in this case
1431                     }
1432                     else
1433                     {
1434                         writer.write("&#");
1435                         writer.write(Integer.toString(ch));
1436                         writer.write(';');
1437                     }
1438                 }
1439                 cleanStart = i + 1;
1440             }
1441         } // end of for()
1442 
1443         // are there any clean characters at the end of the array
1444         // that we haven't processed yet?
1445         if (cleanLength > 1)
1446         {
1447             // if the whole string can be written out as-is do so
1448             // otherwise write out the clean chars at the end of the
1449             // array
1450             if (cleanStart == 0)
1451                 writer.write(string);
1452             else
1453                 writer.write(chars, cleanStart, cleanLength);
1454         }
1455         else if (cleanLength == 1)
1456         {
1457             // a little optimization for 1 clean character
1458             // (we could have let the previous if(...) handle them all)
1459             writer.write(ch);
1460         }
1461     }
1462 
1463 
1464 
1465     /**
1466      * Receive notification of character data.
1467      *
1468      * <p>The Parser will call this method to report each chunk of
1469      * character data.  SAX parsers may return all contiguous character
1470      * data in a single chunk, or they may split it into several
1471      * chunks; however, all of the characters in any single event
1472      * must come from the same external entity, so that the Locator
1473      * provides useful information.</p>
1474      *
1475      * <p>The application must not attempt to read from the array
1476      * outside of the specified range.</p>
1477      *
1478      * <p>Note that some parsers will report whitespace using the
1479      * ignorableWhitespace() method rather than this one (validating
1480      * parsers must do so).</p>
1481      *
1482      * @param chars The characters from the XML document.
1483      * @param start The start position in the array.
1484      * @param length The number of characters to read from the array.
1485      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1486      *            wrapping another exception.
1487      * @see #ignorableWhitespace
1488      * @see org.xml.sax.Locator
1489      *
1490      * @throws org.xml.sax.SAXException
1491      */
1492     public final void characters(char chars[], int start, int length)
1493         throws org.xml.sax.SAXException
1494     {
1495 
1496         if (m_elemContext.m_isRaw)
1497         {
1498             try
1499             {
1500                 if (m_elemContext.m_startTagOpen)
1501                 {
1502                     closeStartTag();
1503                     m_elemContext.m_startTagOpen = false;
1504                 }
1505                 m_ispreserve = true;
1506 
1507 //              With m_ispreserve just set true it looks like shouldIndent()
1508 //              will always return false, so drop any possible indentation.
1509 //              if (shouldIndent())
1510 //                  indent();
1511 
1512                 // writer.write("<![CDATA[");
1513                 // writer.write(chars, start, length);
1514                 writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1515 
1516                 // writer.write("]]>");
1517 
1518                 // time to generate characters event
1519                 if (m_tracer != null)
1520                     super.fireCharEvent(chars, start, length);
1521 
1522                 return;
1523             }
1524             catch (IOException ioe)
1525             {
1526                 throw new org.xml.sax.SAXException(
1527                     Utils.messages.createMessage(
1528                         MsgKey.ER_OIERROR,
1529                         null),
1530                     ioe);
1531                 //"IO error", ioe);
1532             }
1533         }
1534         else
1535         {
1536             super.characters(chars, start, length);
1537         }
1538     }
1539 
1540     /**
1541      *  Receive notification of cdata.
1542      *
1543      *  <p>The Parser will call this method to report each chunk of
1544      *  character data.  SAX parsers may return all contiguous character
1545      *  data in a single chunk, or they may split it into several
1546      *  chunks; however, all of the characters in any single event
1547      *  must come from the same external entity, so that the Locator
1548      *  provides useful information.</p>
1549      *
1550      *  <p>The application must not attempt to read from the array
1551      *  outside of the specified range.</p>
1552      *
1553      *  <p>Note that some parsers will report whitespace using the
1554      *  ignorableWhitespace() method rather than this one (validating
1555      *  parsers must do so).</p>
1556      *
1557      *  @param ch The characters from the XML document.
1558      *  @param start The start position in the array.
1559      *  @param length The number of characters to read from the array.
1560      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1561      *             wrapping another exception.
1562      *  @see #ignorableWhitespace
1563      *  @see org.xml.sax.Locator
1564      *
1565      * @throws org.xml.sax.SAXException
1566      */
1567     public final void cdata(char ch[], int start, int length)
1568         throws org.xml.sax.SAXException
1569     {
1570 
1571         if ((null != m_elemContext.m_elementName)
1572             && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1573                 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1574         {
1575             try
1576             {
1577                 if (m_elemContext.m_startTagOpen)
1578                 {
1579                     closeStartTag();
1580                     m_elemContext.m_startTagOpen = false;
1581                 }
1582 
1583                 m_ispreserve = true;
1584 
1585                 if (shouldIndent())
1586                     indent();
1587 
1588                 // writer.write(ch, start, length);
1589                 writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1590             }
1591             catch (IOException ioe)
1592             {
1593                 throw new org.xml.sax.SAXException(
1594                     Utils.messages.createMessage(
1595                         MsgKey.ER_OIERROR,
1596                         null),
1597                     ioe);
1598                 //"IO error", ioe);
1599             }
1600         }
1601         else
1602         {
1603             super.cdata(ch, start, length);
1604         }
1605     }
1606 
1607     /**
1608      *  Receive notification of a processing instruction.
1609      *
1610      *  @param target The processing instruction target.
1611      *  @param data The processing instruction data, or null if
1612      *         none was supplied.
1613      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1614      *             wrapping another exception.
1615      *
1616      * @throws org.xml.sax.SAXException
1617      */
1618     public void processingInstruction(String target, String data)
1619         throws org.xml.sax.SAXException
1620     {
1621 
1622         // Process any pending starDocument and startElement first.
1623         flushPending();
1624 
1625         // Use a fairly nasty hack to tell if the next node is supposed to be
1626         // unescaped text.
1627         if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1628         {
1629             startNonEscaping();
1630         }
1631         else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1632         {
1633             endNonEscaping();
1634         }
1635         else
1636         {
1637             try
1638             {
1639             if (m_elemContext.m_startTagOpen)
1640             {
1641                 closeStartTag();
1642                 m_elemContext.m_startTagOpen = false;
1643             }
1644             else if (m_needToCallStartDocument)
1645                 startDocumentInternal();
1646 
1647             if (shouldIndent())
1648                 indent();
1649 
1650             final java.io.Writer writer = m_writer;
1651             //writer.write("<?" + target);
1652             writer.write("<?");
1653             writer.write(target);
1654 
1655             if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1656                 writer.write(' ');
1657 
1658             //writer.write(data + ">"); // different from XML
1659             writer.write(data); // different from XML
1660             writer.write('>'); // different from XML
1661 
1662             // Always output a newline char if not inside of an
1663             // element. The whitespace is not significant in that
1664             // case.
1665             if (m_elemContext.m_currentElemDepth <= 0)
1666                 outputLineSep();
1667 
1668             m_startNewLine = true;
1669             }
1670             catch(IOException e)
1671             {
1672                 throw new SAXException(e);
1673             }
1674         }
1675 
1676         // now generate the PI event
1677         if (m_tracer != null)
1678             super.fireEscapingEvent(target, data);
1679      }
1680 
1681     /**
1682      * Receive notivication of a entityReference.
1683      *
1684      * @param name non-null reference to entity name string.
1685      *
1686      * @throws org.xml.sax.SAXException
1687      */
1688     public final void entityReference(String name)
1689         throws org.xml.sax.SAXException
1690     {
1691         try
1692         {
1693 
1694         final java.io.Writer writer = m_writer;
1695         writer.write('&');
1696         writer.write(name);
1697         writer.write(';');
1698 
1699         } catch(IOException e)
1700         {
1701             throw new SAXException(e);
1702         }
1703     }
1704     /**
1705      * @see ExtendedContentHandler#endElement(String)
1706      */
1707     public final void endElement(String elemName) throws SAXException
1708     {
1709         endElement(null, null, elemName);
1710     }
1711 
1712     /**
1713      * Process the attributes, which means to write out the currently
1714      * collected attributes to the writer. The attributes are not
1715      * cleared by this method
1716      *
1717      * @param writer the writer to write processed attributes to.
1718      * @param nAttrs the number of attributes in m_attributes
1719      * to be processed
1720      *
1721      * @throws org.xml.sax.SAXException
1722      */
1723     public void processAttributes(java.io.Writer writer, int nAttrs)
1724         throws IOException,SAXException
1725     {
1726             /*
1727              * process the collected attributes
1728              */
1729             for (int i = 0; i < nAttrs; i++)
1730             {
1731                 processAttribute(
1732                     writer,
1733                     m_attributes.getQName(i),
1734                     m_attributes.getValue(i),
1735                     m_elemContext.m_elementDesc);
1736             }
1737     }
1738 
1739     /**
1740      * For the enclosing elements starting tag write out out any attributes
1741      * followed by ">"
1742      *
1743      *@throws org.xml.sax.SAXException
1744      */
1745     protected void closeStartTag() throws SAXException
1746     {
1747             try
1748             {
1749 
1750             // finish processing attributes, time to fire off the start element event
1751             if (m_tracer != null)
1752                 super.fireStartElem(m_elemContext.m_elementName);
1753 
1754             int nAttrs = m_attributes.getLength();
1755             if (nAttrs>0)
1756             {
1757                 processAttributes(m_writer, nAttrs);
1758                 // clear attributes object for re-use with next element
1759                 m_attributes.clear();
1760             }
1761 
1762             m_writer.write('>');
1763 
1764             /* whether Xalan or XSLTC, we have the prefix mappings now, so
1765              * lets determine if the current element is specified in the cdata-
1766              * section-elements list.
1767              */
1768             if (m_cdataSectionElements != null)
1769                 m_elemContext.m_isCdataSection = isCdataSection();
1770             if (m_doIndent)
1771             {
1772                 m_isprevtext = false;
1773                 m_preserves.push(m_ispreserve);
1774             }
1775 
1776             }
1777             catch(IOException e)
1778             {
1779                 throw new SAXException(e);
1780             }
1781     }
1782     /**
1783      * Initialize the serializer with the specified output stream and output
1784      * format. Must be called before calling any of the serialize methods.
1785      *
1786      * @param output The output stream to use
1787      * @param format The output format
1788      * @throws UnsupportedEncodingException The encoding specified   in the
1789      * output format is not supported
1790      */
1791     protected synchronized void init(OutputStream output, Properties format)
1792         throws UnsupportedEncodingException
1793     {
1794         if (null == format)
1795         {
1796             format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML);
1797          }
1798         super.init(output,format, false);
1799     }
1800 
1801         /**
1802          * Specifies an output stream to which the document should be
1803          * serialized. This method should not be called while the
1804          * serializer is in the process of serializing a document.
1805          * <p>
1806          * The encoding specified in the output properties is used, or
1807          * if no encoding was specified, the default for the selected
1808          * output method.
1809          *
1810          * @param output The output stream
1811          */
1812         public void setOutputStream(OutputStream output)
1813         {
1814 
1815             try
1816             {
1817                 Properties format;
1818                 if (null == m_format)
1819                     format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML);
1820                 else
1821                     format = m_format;
1822                 init(output, format, true);
1823             }
1824             catch (UnsupportedEncodingException uee)
1825             {
1826 
1827                 // Should have been warned in init, I guess...
1828             }
1829         }
1830         /**
1831          * This method is used when a prefix/uri namespace mapping
1832          * is indicated after the element was started with a
1833          * startElement() and before and endElement().
1834          * startPrefixMapping(prefix,uri) would be used before the
1835          * startElement() call.
1836          * @param uri the URI of the namespace
1837          * @param prefix the prefix associated with the given URI.
1838          *
1839          * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1840          */
1841         public void namespaceAfterStartElement(String prefix, String uri)
1842             throws SAXException
1843         {
1844             // hack for XSLTC with finding URI for default namespace
1845             if (m_elemContext.m_elementURI == null)
1846             {
1847                 String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1848                 if (prefix1 == null && EMPTYSTRING.equals(prefix))
1849                 {
1850                     // the elements URI is not known yet, and it
1851                     // doesn't have a prefix, and we are currently
1852                     // setting the uri for prefix "", so we have
1853                     // the uri for the element... lets remember it
1854                     m_elemContext.m_elementURI = uri;
1855                 }
1856             }
1857             startPrefixMapping(prefix,uri,false);
1858         }
1859 
1860     public void startDTD(String name, String publicId, String systemId)
1861         throws SAXException
1862     {
1863         m_inDTD = true;
1864         super.startDTD(name, publicId, systemId);
1865     }
1866 
1867     /**
1868      * Report the end of DTD declarations.
1869      * @throws org.xml.sax.SAXException The application may raise an exception.
1870      * @see #startDTD
1871      */
1872     public void endDTD() throws org.xml.sax.SAXException
1873     {
1874         m_inDTD = false;
1875         /* for ToHTMLStream the DOCTYPE is entirely output in the
1876          * startDocumentInternal() method, so don't do anything here
1877          */
1878     }
1879     /**
1880      * This method does nothing.
1881      */
1882     public void attributeDecl(
1883         String eName,
1884         String aName,
1885         String type,
1886         String valueDefault,
1887         String value)
1888         throws SAXException
1889     {
1890         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1891     }
1892 
1893     /**
1894      * This method does nothing.
1895      */
1896     public void elementDecl(String name, String model) throws SAXException
1897     {
1898         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1899     }
1900     /**
1901      * This method does nothing.
1902      */
1903     public void internalEntityDecl(String name, String value)
1904         throws SAXException
1905     {
1906         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1907     }
1908     /**
1909      * This method does nothing.
1910      */
1911     public void externalEntityDecl(
1912         String name,
1913         String publicId,
1914         String systemId)
1915         throws SAXException
1916     {
1917         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1918     }
1919 
1920     /**
1921      * This method is used to add an attribute to the currently open element.
1922      * The caller has guaranted that this attribute is unique, which means that it
1923      * not been seen before and will not be seen again.
1924      *
1925      * @param name the qualified name of the attribute
1926      * @param value the value of the attribute which can contain only
1927      * ASCII printable characters characters in the range 32 to 127 inclusive.
1928      * @param flags the bit values of this integer give optimization information.
1929      */
1930     public void addUniqueAttribute(String name, String value, int flags)
1931         throws SAXException
1932     {
1933         try
1934         {
1935             final java.io.Writer writer = m_writer;
1936             if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1937             {
1938                 // "flags" has indicated that the characters
1939                 // '>'  '<'   '&'  and '"' are not in the value and
1940                 // m_htmlcharInfo has recorded that there are no other
1941                 // entities in the range 0 to 127 so we write out the
1942                 // value directly
1943                 writer.write(' ');
1944                 writer.write(name);
1945                 writer.write("=\"");
1946                 writer.write(value);
1947                 writer.write('"');
1948             }
1949             else if (
1950                 (flags & HTML_ATTREMPTY) > 0
1951                     && (value.length() == 0 || value.equalsIgnoreCase(name)))
1952             {
1953                 writer.write(' ');
1954                 writer.write(name);
1955             }
1956             else
1957             {
1958                 writer.write(' ');
1959                 writer.write(name);
1960                 writer.write("=\"");
1961                 if ((flags & HTML_ATTRURL) > 0)
1962                 {
1963                     writeAttrURI(writer, value, m_specialEscapeURLs);
1964                 }
1965                 else
1966                 {
1967                     writeAttrString(writer, value, this.getEncoding());
1968                 }
1969                 writer.write('"');
1970             }
1971         } catch (IOException e) {
1972             throw new SAXException(e);
1973         }
1974     }
1975 
1976     public void comment(char ch[], int start, int length)
1977             throws SAXException
1978     {
1979         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1980         if (m_inDTD)
1981             return;
1982         super.comment(ch, start, length);
1983     }
1984 
1985     public boolean reset()
1986     {
1987         boolean ret = super.reset();
1988         if (!ret)
1989             return false;
1990         initToHTMLStream();
1991         return true;
1992     }
1993 
1994     private void initToHTMLStream()
1995     {
1996 //        m_elementDesc = null;
1997         m_inBlockElem = false;
1998         m_inDTD = false;
1999 //        m_isRawStack.clear();
2000         m_omitMetaTag = false;
2001         m_specialEscapeURLs = true;
2002     }
2003 
2004     static class Trie
2005     {
2006         /**
2007          * A digital search trie for 7-bit ASCII text
2008          * The API is a subset of java.util.Hashtable
2009          * The key must be a 7-bit ASCII string
2010          * The value may be any Java Object
2011          * One can get an object stored in a trie from its key,
2012          * but the search is either case sensitive or case
2013          * insensitive to the characters in the key, and this
2014          * choice of sensitivity or insensitivity is made when
2015          * the Trie is created, before any objects are put in it.
2016          *
2017          * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
2018          * It exists to cut the serializers dependancy on that package.
2019          *
2020          * @xsl.usage internal
2021          */
2022 
2023         /** Size of the m_nextChar array.  */
2024         public static final int ALPHA_SIZE = 128;
2025 
2026         /** The root node of the tree.    */
2027         final Node m_Root;
2028 
2029         /** helper buffer to convert Strings to char arrays */
2030         private char[] m_charBuffer = new char[0];
2031 
2032         /** true if the search for an object is lower case only with the key */
2033         private final boolean m_lowerCaseOnly;
2034 
2035         /**
2036          * Construct the trie that has a case insensitive search.
2037          */
2038         public Trie()
2039         {
2040             m_Root = new Node();
2041             m_lowerCaseOnly = false;
2042         }
2043 
2044         /**
2045          * Construct the trie given the desired case sensitivity with the key.
2046          * @param lowerCaseOnly true if the search keys are to be loser case only,
2047          * not case insensitive.
2048          */
2049         public Trie(boolean lowerCaseOnly)
2050         {
2051             m_Root = new Node();
2052             m_lowerCaseOnly = lowerCaseOnly;
2053         }
2054 
2055         /**
2056          * Put an object into the trie for lookup.
2057          *
2058          * @param key must be a 7-bit ASCII string
2059          * @param value any java object.
2060          *
2061          * @return The old object that matched key, or null.
2062          */
2063         public Object put(String key, Object value)
2064         {
2065 
2066             final int len = key.length();
2067             if (len > m_charBuffer.length)
2068             {
2069                 // make the biggest buffer ever needed in get(String)
2070                 m_charBuffer = new char[len];
2071             }
2072 
2073             Node node = m_Root;
2074 
2075             for (int i = 0; i < len; i++)
2076             {
2077                 Node nextNode =
2078                     node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2079 
2080                 if (nextNode != null)
2081                 {
2082                     node = nextNode;
2083                 }
2084                 else
2085                 {
2086                     for (; i < len; i++)
2087                     {
2088                         Node newNode = new Node();
2089                         if (m_lowerCaseOnly)
2090                         {
2091                             // put this value into the tree only with a lower case key
2092                             node.m_nextChar[Character.toLowerCase(
2093                                 key.charAt(i))] =
2094                                 newNode;
2095                         }
2096                         else
2097                         {
2098                             // put this value into the tree with a case insensitive key
2099                             node.m_nextChar[Character.toUpperCase(
2100                                 key.charAt(i))] =
2101                                 newNode;
2102                             node.m_nextChar[Character.toLowerCase(
2103                                 key.charAt(i))] =
2104                                 newNode;
2105                         }
2106                         node = newNode;
2107                     }
2108                     break;
2109                 }
2110             }
2111 
2112             Object ret = node.m_Value;
2113 
2114             node.m_Value = value;
2115 
2116             return ret;
2117         }
2118 
2119         /**
2120          * Get an object that matches the key.
2121          *
2122          * @param key must be a 7-bit ASCII string
2123          *
2124          * @return The object that matches the key, or null.
2125          */
2126         public Object get(final String key)
2127         {
2128 
2129             final int len = key.length();
2130 
2131             /* If the name is too long, we won't find it, this also keeps us
2132              * from overflowing m_charBuffer
2133              */
2134             if (m_charBuffer.length < len)
2135                 return null;
2136 
2137             Node node = m_Root;
2138             switch (len) // optimize the look up based on the number of chars
2139             {
2140                 // case 0 looks silly, but the generated bytecode runs
2141                 // faster for lookup of elements of length 2 with this in
2142                 // and a fair bit faster.  Don't know why.
2143                 case 0 :
2144                     {
2145                         return null;
2146                     }
2147 
2148                 case 1 :
2149                     {
2150                         final char ch = key.charAt(0);
2151                         if (ch < ALPHA_SIZE)
2152                         {
2153                             node = node.m_nextChar[ch];
2154                             if (node != null)
2155                                 return node.m_Value;
2156                         }
2157                         return null;
2158                     }
2159                     //                comment out case 2 because the default is faster
2160                     //                case 2 :
2161                     //                    {
2162                     //                        final char ch0 = key.charAt(0);
2163                     //                        final char ch1 = key.charAt(1);
2164                     //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2165                     //                        {
2166                     //                            node = node.m_nextChar[ch0];
2167                     //                            if (node != null)
2168                     //                            {
2169                     //
2170                     //                                if (ch1 < ALPHA_SIZE)
2171                     //                                {
2172                     //                                    node = node.m_nextChar[ch1];
2173                     //                                    if (node != null)
2174                     //                                        return node.m_Value;
2175                     //                                }
2176                     //                            }
2177                     //                        }
2178                     //                        return null;
2179                     //                   }
2180                 default :
2181                     {
2182                         for (int i = 0; i < len; i++)
2183                         {
2184                             // A thread-safe way to loop over the characters
2185                             final char ch = key.charAt(i);
2186                             if (ALPHA_SIZE <= ch)
2187                             {
2188                                 // the key is not 7-bit ASCII so we won't find it here
2189                                 return null;
2190                             }
2191 
2192                             node = node.m_nextChar[ch];
2193                             if (node == null)
2194                                 return null;
2195                         }
2196 
2197                         return node.m_Value;
2198                     }
2199             }
2200         }
2201 
2202         /**
2203          * The node representation for the trie.
2204          * @xsl.usage internal
2205          */
2206         private class Node
2207         {
2208 
2209             /**
2210              * Constructor, creates a Node[ALPHA_SIZE].
2211              */
2212             Node()
2213             {
2214                 m_nextChar = new Node[ALPHA_SIZE];
2215                 m_Value = null;
2216             }
2217 
2218             /** The next nodes.   */
2219             final Node m_nextChar[];
2220 
2221             /** The value.   */
2222             Object m_Value;
2223         }
2224         /**
2225          * Construct the trie from another Trie.
2226          * Both the existing Trie and this new one share the same table for
2227          * lookup, and it is assumed that the table is fully populated and
2228          * not changing anymore.
2229          *
2230          * @param existingTrie the Trie that this one is a copy of.
2231          */
2232         public Trie(Trie existingTrie)
2233         {
2234             // copy some fields from the existing Trie into this one.
2235             m_Root = existingTrie.m_Root;
2236             m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2237 
2238             // get a buffer just big enough to hold the longest key in the table.
2239             int max = existingTrie.getLongestKeyLength();
2240             m_charBuffer = new char[max];
2241         }
2242 
2243         /**
2244          * Get an object that matches the key.
2245          * This method is faster than get(), but is not thread-safe.
2246          *
2247          * @param key must be a 7-bit ASCII string
2248          *
2249          * @return The object that matches the key, or null.
2250          */
2251         public Object get2(final String key)
2252         {
2253 
2254             final int len = key.length();
2255 
2256             /* If the name is too long, we won't find it, this also keeps us
2257              * from overflowing m_charBuffer
2258              */
2259             if (m_charBuffer.length < len)
2260                 return null;
2261 
2262             Node node = m_Root;
2263             switch (len) // optimize the look up based on the number of chars
2264             {
2265                 // case 0 looks silly, but the generated bytecode runs
2266                 // faster for lookup of elements of length 2 with this in
2267                 // and a fair bit faster.  Don't know why.
2268                 case 0 :
2269                     {
2270                         return null;
2271                     }
2272 
2273                 case 1 :
2274                     {
2275                         final char ch = key.charAt(0);
2276                         if (ch < ALPHA_SIZE)
2277                         {
2278                             node = node.m_nextChar[ch];
2279                             if (node != null)
2280                                 return node.m_Value;
2281                         }
2282                         return null;
2283                     }
2284                 default :
2285                     {
2286                         /* Copy string into array. This is not thread-safe because
2287                          * it modifies the contents of m_charBuffer. If multiple
2288                          * threads were to use this Trie they all would be
2289                          * using this same array (not good). So this
2290                          * method is not thread-safe, but it is faster because
2291                          * converting to a char[] and looping over elements of
2292                          * the array is faster than a String's charAt(i).
2293                          */
2294                         key.getChars(0, len, m_charBuffer, 0);
2295 
2296                         for (int i = 0; i < len; i++)
2297                         {
2298                             final char ch = m_charBuffer[i];
2299                             if (ALPHA_SIZE <= ch)
2300                             {
2301                                 // the key is not 7-bit ASCII so we won't find it here
2302                                 return null;
2303                             }
2304 
2305                             node = node.m_nextChar[ch];
2306                             if (node == null)
2307                                 return null;
2308                         }
2309 
2310                         return node.m_Value;
2311                     }
2312             }
2313         }
2314 
2315         /**
2316          * Get the length of the longest key used in the table.
2317          */
2318         public int getLongestKeyLength()
2319         {
2320             return m_charBuffer.length;
2321         }
2322     }
2323 }