1 /*
   2  * reserved comment block
   3  * DO NOT REMOVE OR ALTER!
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one
   7  * or more contributor license agreements. See the NOTICE file
   8  * distributed with this work for additional information
   9  * regarding copyright ownership. The ASF licenses this file
  10  * to you under the Apache License, Version 2.0 (the  "License");
  11  * you may not use this file except in compliance with the License.
  12  * You may obtain a copy of the License at
  13  *
  14  *     http://www.apache.org/licenses/LICENSE-2.0
  15  *
  16  * Unless required by applicable law or agreed to in writing, software
  17  * distributed under the License is distributed on an "AS IS" BASIS,
  18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19  * See the License for the specific language governing permissions and
  20  * limitations under the License.
  21  */
  22 /*
  23  * $Id: ToHTMLStream.java,v 1.2.4.1 2005/09/15 08:15:26 suresh_emailid Exp $
  24  */
  25 package com.sun.org.apache.xml.internal.serializer;
  26 
  27 import java.io.IOException;
  28 import java.io.OutputStream;
  29 import java.io.UnsupportedEncodingException;
  30 import java.util.Properties;
  31 
  32 import javax.xml.transform.Result;
  33 
  34 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
  35 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
  36 import org.xml.sax.Attributes;
  37 import org.xml.sax.SAXException;
  38 
  39 /**
  40  * This serializer takes a series of SAX or
  41  * SAX-like events and writes its output
  42  * to the given stream.
  43  *
  44  * This class is not a public API, it is public
  45  * because it is used from another package.
  46  *
  47  * @xsl.usage internal
  48  */
  49 public final class ToHTMLStream extends ToStream
  50 {
  51 
  52     /** This flag is set while receiving events from the DTD */
  53     protected boolean m_inDTD = false;
  54 
  55     /** True if the current element is a block element.  (seems like
  56      *  this needs to be a stack. -sb). */
  57     private boolean m_inBlockElem = false;
  58 
  59     /**
  60      * Map that tells which XML characters should have special treatment, and it
  61      *  provides character to entity name lookup.
  62      */
  63     private static final CharInfo m_htmlcharInfo =
  64 //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
  65         CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
  66 
  67     /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
  68     static final Trie m_elementFlags = new Trie();
  69 
  70     static {
  71         initTagReference(m_elementFlags);
  72     }
  73     static void initTagReference(Trie m_elementFlags) {
  74 
  75         // HTML 4.0 loose DTD
  76         m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
  77         m_elementFlags.put(
  78             "FRAME",
  79             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  80         m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
  81         m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
  82         m_elementFlags.put(
  83             "ISINDEX",
  84             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  85         m_elementFlags.put(
  86             "APPLET",
  87             new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
  88         m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
  89         m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
  90         m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
  91 
  92         // HTML 4.0 strict DTD
  93         m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  94         m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  95         m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  96         m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  97         m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  98         m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
  99         m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
 100         m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
 101         m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
 102         m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
 103         m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
 104         m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
 105         m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
 106         m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
 107         m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
 108         m_elementFlags.put(
 109             "SUP",
 110             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 111         m_elementFlags.put(
 112             "SUB",
 113             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 114         m_elementFlags.put(
 115             "SPAN",
 116             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 117         m_elementFlags.put(
 118             "BDO",
 119             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 120         m_elementFlags.put(
 121             "BR",
 122             new ElemDesc(
 123                 0
 124                     | ElemDesc.SPECIAL
 125                     | ElemDesc.ASPECIAL
 126                     | ElemDesc.EMPTY
 127                     | ElemDesc.BLOCK));
 128         m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
 129         m_elementFlags.put(
 130             "ADDRESS",
 131             new ElemDesc(
 132                 0
 133                     | ElemDesc.BLOCK
 134                     | ElemDesc.BLOCKFORM
 135                     | ElemDesc.BLOCKFORMFIELDSET));
 136         m_elementFlags.put(
 137             "DIV",
 138             new ElemDesc(
 139                 0
 140                     | ElemDesc.BLOCK
 141                     | ElemDesc.BLOCKFORM
 142                     | ElemDesc.BLOCKFORMFIELDSET));
 143         m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
 144         m_elementFlags.put(
 145             "MAP",
 146             new ElemDesc(
 147                 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
 148         m_elementFlags.put(
 149             "AREA",
 150             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 151         m_elementFlags.put(
 152             "LINK",
 153             new ElemDesc(
 154                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 155         m_elementFlags.put(
 156             "IMG",
 157             new ElemDesc(
 158                 0
 159                     | ElemDesc.SPECIAL
 160                     | ElemDesc.ASPECIAL
 161                     | ElemDesc.EMPTY
 162                     | ElemDesc.WHITESPACESENSITIVE));
 163         m_elementFlags.put(
 164             "OBJECT",
 165             new ElemDesc(
 166                 0
 167                     | ElemDesc.SPECIAL
 168                     | ElemDesc.ASPECIAL
 169                     | ElemDesc.HEADMISC
 170                     | ElemDesc.WHITESPACESENSITIVE));
 171         m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
 172         m_elementFlags.put(
 173             "HR",
 174             new ElemDesc(
 175                 0
 176                     | ElemDesc.BLOCK
 177                     | ElemDesc.BLOCKFORM
 178                     | ElemDesc.BLOCKFORMFIELDSET
 179                     | ElemDesc.EMPTY));
 180         m_elementFlags.put(
 181             "P",
 182             new ElemDesc(
 183                 0
 184                     | ElemDesc.BLOCK
 185                     | ElemDesc.BLOCKFORM
 186                     | ElemDesc.BLOCKFORMFIELDSET));
 187         m_elementFlags.put(
 188             "H1",
 189             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 190         m_elementFlags.put(
 191             "H2",
 192             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 193         m_elementFlags.put(
 194             "H3",
 195             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 196         m_elementFlags.put(
 197             "H4",
 198             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 199         m_elementFlags.put(
 200             "H5",
 201             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 202         m_elementFlags.put(
 203             "H6",
 204             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 205         m_elementFlags.put(
 206             "PRE",
 207             new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
 208         m_elementFlags.put(
 209             "Q",
 210             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 211         m_elementFlags.put(
 212             "BLOCKQUOTE",
 213             new ElemDesc(
 214                 0
 215                     | ElemDesc.BLOCK
 216                     | ElemDesc.BLOCKFORM
 217                     | ElemDesc.BLOCKFORMFIELDSET));
 218         m_elementFlags.put("INS", new ElemDesc(0));
 219         m_elementFlags.put("DEL", new ElemDesc(0));
 220         m_elementFlags.put(
 221             "DL",
 222             new ElemDesc(
 223                 0
 224                     | ElemDesc.BLOCK
 225                     | ElemDesc.BLOCKFORM
 226                     | ElemDesc.BLOCKFORMFIELDSET));
 227         m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
 228         m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
 229         m_elementFlags.put(
 230             "OL",
 231             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 232         m_elementFlags.put(
 233             "UL",
 234             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 235         m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
 236         m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
 237         m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
 238         m_elementFlags.put(
 239             "INPUT",
 240             new ElemDesc(
 241                 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
 242         m_elementFlags.put(
 243             "SELECT",
 244             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 245         m_elementFlags.put("OPTGROUP", new ElemDesc(0));
 246         m_elementFlags.put("OPTION", new ElemDesc(0));
 247         m_elementFlags.put(
 248             "TEXTAREA",
 249             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 250         m_elementFlags.put(
 251             "FIELDSET",
 252             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
 253         m_elementFlags.put("LEGEND", new ElemDesc(0));
 254         m_elementFlags.put(
 255             "BUTTON",
 256             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 257         m_elementFlags.put(
 258             "TABLE",
 259             new ElemDesc(
 260                 0
 261                     | ElemDesc.BLOCK
 262                     | ElemDesc.BLOCKFORM
 263                     | ElemDesc.BLOCKFORMFIELDSET));
 264         m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
 265         m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
 266         m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
 267         m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
 268         m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
 269         m_elementFlags.put(
 270             "COL",
 271             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 272         m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
 273         m_elementFlags.put("TH", new ElemDesc(0));
 274         m_elementFlags.put("TD", new ElemDesc(0));
 275         m_elementFlags.put(
 276             "HEAD",
 277             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
 278         m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
 279         m_elementFlags.put(
 280             "BASE",
 281             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 282         m_elementFlags.put(
 283             "META",
 284             new ElemDesc(
 285                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 286         m_elementFlags.put(
 287             "STYLE",
 288             new ElemDesc(
 289                 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
 290         m_elementFlags.put(
 291             "SCRIPT",
 292             new ElemDesc(
 293                 0
 294                     | ElemDesc.SPECIAL
 295                     | ElemDesc.ASPECIAL
 296                     | ElemDesc.HEADMISC
 297                     | ElemDesc.RAW));
 298         m_elementFlags.put(
 299             "NOSCRIPT",
 300             new ElemDesc(
 301                 0
 302                     | ElemDesc.BLOCK
 303                     | ElemDesc.BLOCKFORM
 304                     | ElemDesc.BLOCKFORMFIELDSET));
 305         m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
 306 
 307         // From "John Ky" <hand@syd.speednet.com.au
 308         // Transitional Document Type Definition ()
 309         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
 310         m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 311 
 312         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
 313         m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 314         m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 315 
 316         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
 317         m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 318 
 319         // From "John Ky" <hand@syd.speednet.com.au
 320         m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 321 
 322         // HTML 4.0, section 16.5
 323         m_elementFlags.put(
 324             "IFRAME",
 325             new ElemDesc(
 326                 0
 327                     | ElemDesc.BLOCK
 328                     | ElemDesc.BLOCKFORM
 329                     | ElemDesc.BLOCKFORMFIELDSET));
 330 
 331         // Netscape 4 extension
 332         m_elementFlags.put(
 333             "LAYER",
 334             new ElemDesc(
 335                 0
 336                     | ElemDesc.BLOCK
 337                     | ElemDesc.BLOCKFORM
 338                     | ElemDesc.BLOCKFORMFIELDSET));
 339         // Netscape 4 extension
 340         m_elementFlags.put(
 341             "ILAYER",
 342             new ElemDesc(
 343                 0
 344                     | ElemDesc.BLOCK
 345                     | ElemDesc.BLOCKFORM
 346                     | ElemDesc.BLOCKFORMFIELDSET));
 347 
 348 
 349         // NOW FOR ATTRIBUTE INFORMATION . . .
 350         ElemDesc elemDesc;
 351 
 352 
 353         // ----------------------------------------------
 354         elemDesc = (ElemDesc) m_elementFlags.get("a");
 355         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 356         elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
 357 
 358         // ----------------------------------------------
 359         elemDesc = (ElemDesc) m_elementFlags.get("area");
 360         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 361         elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
 362 
 363         // ----------------------------------------------
 364         elemDesc = (ElemDesc) m_elementFlags.get("base");
 365         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 366 
 367         // ----------------------------------------------
 368         elemDesc = (ElemDesc) m_elementFlags.get("button");
 369         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 370 
 371         // ----------------------------------------------
 372         elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
 373         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 374 
 375         // ----------------------------------------------
 376         elemDesc = (ElemDesc) m_elementFlags.get("del");
 377         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 378 
 379         // ----------------------------------------------
 380         elemDesc = (ElemDesc) m_elementFlags.get("dir");
 381         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 382 
 383         // ----------------------------------------------
 384 
 385         elemDesc = (ElemDesc) m_elementFlags.get("div");
 386         elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
 387         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 388 
 389         // ----------------------------------------------
 390         elemDesc = (ElemDesc) m_elementFlags.get("dl");
 391         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 392 
 393         // ----------------------------------------------
 394         elemDesc = (ElemDesc) m_elementFlags.get("form");
 395         elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
 396 
 397         // ----------------------------------------------
 398         // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
 399         elemDesc = (ElemDesc) m_elementFlags.get("frame");
 400         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 401         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 402         elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
 403 
 404         // ----------------------------------------------
 405         elemDesc = (ElemDesc) m_elementFlags.get("head");
 406         elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
 407 
 408         // ----------------------------------------------
 409         elemDesc = (ElemDesc) m_elementFlags.get("hr");
 410         elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
 411 
 412         // ----------------------------------------------
 413         // HTML 4.0, section 16.5
 414         elemDesc = (ElemDesc) m_elementFlags.get("iframe");
 415         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 416         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 417 
 418         // ----------------------------------------------
 419         // Netscape 4 extension
 420         elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
 421         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 422 
 423         // ----------------------------------------------
 424         elemDesc = (ElemDesc) m_elementFlags.get("img");
 425         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 426         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 427         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 428         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 429 
 430         // ----------------------------------------------
 431         elemDesc = (ElemDesc) m_elementFlags.get("input");
 432         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 433         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 434         elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
 435         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 436         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 437         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 438 
 439         // ----------------------------------------------
 440         elemDesc = (ElemDesc) m_elementFlags.get("ins");
 441         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 442 
 443         // ----------------------------------------------
 444         // Netscape 4 extension
 445         elemDesc = (ElemDesc) m_elementFlags.get("layer");
 446         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 447 
 448         // ----------------------------------------------
 449         elemDesc = (ElemDesc) m_elementFlags.get("link");
 450         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 451 
 452         // ----------------------------------------------
 453         elemDesc = (ElemDesc) m_elementFlags.get("menu");
 454         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 455 
 456         // ----------------------------------------------
 457         elemDesc = (ElemDesc) m_elementFlags.get("object");
 458         elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
 459         elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
 460         elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
 461         elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
 462         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 463         elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
 464 
 465         // ----------------------------------------------
 466         elemDesc = (ElemDesc) m_elementFlags.get("ol");
 467         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 468 
 469         // ----------------------------------------------
 470         elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
 471         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 472 
 473         // ----------------------------------------------
 474         elemDesc = (ElemDesc) m_elementFlags.get("option");
 475         elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
 476         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 477 
 478         // ----------------------------------------------
 479         elemDesc = (ElemDesc) m_elementFlags.get("q");
 480         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 481 
 482         // ----------------------------------------------
 483         elemDesc = (ElemDesc) m_elementFlags.get("script");
 484         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 485         elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
 486         elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
 487 
 488         // ----------------------------------------------
 489         elemDesc = (ElemDesc) m_elementFlags.get("select");
 490         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 491         elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
 492 
 493         // ----------------------------------------------
 494         elemDesc = (ElemDesc) m_elementFlags.get("table");
 495         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 496 
 497         // ----------------------------------------------
 498         elemDesc = (ElemDesc) m_elementFlags.get("td");
 499         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 500 
 501         // ----------------------------------------------
 502         elemDesc = (ElemDesc) m_elementFlags.get("textarea");
 503         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 504         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 505 
 506         // ----------------------------------------------
 507         elemDesc = (ElemDesc) m_elementFlags.get("th");
 508         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 509 
 510         // ----------------------------------------------
 511         // The nowrap attribute of a tr element is both
 512         // a Netscape and Internet-Explorer extension
 513         elemDesc = (ElemDesc) m_elementFlags.get("tr");
 514         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 515 
 516         // ----------------------------------------------
 517         elemDesc = (ElemDesc) m_elementFlags.get("ul");
 518         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 519     }
 520 
 521     /**
 522      * Dummy element for elements not found.
 523      */
 524     static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
 525 
 526     /** True if URLs should be specially escaped with the %xx form. */
 527     private boolean m_specialEscapeURLs = true;
 528 
 529     /** True if the META tag should be omitted. */
 530     private boolean m_omitMetaTag = false;
 531 
 532     /**
 533      * Tells if the formatter should use special URL escaping.
 534      *
 535      * @param bool True if URLs should be specially escaped with the %xx form.
 536      */
 537     public void setSpecialEscapeURLs(boolean bool)
 538     {
 539         m_specialEscapeURLs = bool;
 540     }
 541 
 542     /**
 543      * Tells if the formatter should omit the META tag.
 544      *
 545      * @param bool True if the META tag should be omitted.
 546      */
 547     public void setOmitMetaTag(boolean bool)
 548     {
 549         m_omitMetaTag = bool;
 550     }
 551 
 552     /**
 553      * Specifies an output format for this serializer. It the
 554      * serializer has already been associated with an output format,
 555      * it will switch to the new format. This method should not be
 556      * called while the serializer is in the process of serializing
 557      * a document.
 558      *
 559      * This method can be called multiple times before starting
 560      * the serialization of a particular result-tree. In principle
 561      * all serialization parameters can be changed, with the exception
 562      * of method="html" (it must be method="html" otherwise we
 563      * shouldn't even have a ToHTMLStream object here!)
 564      *
 565      * @param format The output format or serialzation parameters
 566      * to use.
 567      */
 568     public void setOutputFormat(Properties format)
 569     {
 570 
 571         m_specialEscapeURLs =
 572             OutputPropertyUtils.getBooleanProperty(
 573                 OutputPropertiesFactory.S_USE_URL_ESCAPING,
 574                 format);
 575 
 576         m_omitMetaTag =
 577             OutputPropertyUtils.getBooleanProperty(
 578                 OutputPropertiesFactory.S_OMIT_META_TAG,
 579                 format);
 580 
 581         super.setOutputFormat(format);
 582     }
 583 
 584     /**
 585      * Tells if the formatter should use special URL escaping.
 586      *
 587      * @return True if URLs should be specially escaped with the %xx form.
 588      */
 589     private final boolean getSpecialEscapeURLs()
 590     {
 591         return m_specialEscapeURLs;
 592     }
 593 
 594     /**
 595      * Tells if the formatter should omit the META tag.
 596      *
 597      * @return True if the META tag should be omitted.
 598      */
 599     private final boolean getOmitMetaTag()
 600     {
 601         return m_omitMetaTag;
 602     }
 603 
 604     /**
 605      * Get a description of the given element.
 606      *
 607      * @param name non-null name of element, case insensitive.
 608      *
 609      * @return non-null reference to ElemDesc, which may be m_dummy if no
 610      *         element description matches the given name.
 611      */
 612     public static final ElemDesc getElemDesc(String name)
 613     {
 614         /* this method used to return m_dummy  when name was null
 615          * but now it doesn't check and and requires non-null name.
 616          */
 617         Object obj = m_elementFlags.get(name);
 618         if (null != obj)
 619             return (ElemDesc)obj;
 620         return m_dummy;
 621     }
 622 
 623     /**
 624      * A Trie that is just a copy of the "static" one.
 625      * We need this one to be able to use the faster, but not thread-safe
 626      * method Trie.get2(name)
 627      */
 628     private Trie m_htmlInfo = new Trie(m_elementFlags);
 629     /**
 630      * Calls to this method could be replaced with calls to
 631      * getElemDesc(name), but this one should be faster.
 632      */
 633     private ElemDesc getElemDesc2(String name)
 634     {
 635         Object obj = m_htmlInfo.get2(name);
 636         if (null != obj)
 637             return (ElemDesc)obj;
 638         return m_dummy;
 639     }
 640 
 641     /**
 642      * Default constructor.
 643      */
 644     public ToHTMLStream()
 645     {
 646 
 647         super();
 648         m_charInfo = m_htmlcharInfo;
 649         // initialize namespaces
 650         m_prefixMap = new NamespaceMappings();
 651 
 652     }
 653 
 654     /** The name of the current element. */
 655 //    private String m_currentElementName = null;
 656 
 657     /**
 658      * Receive notification of the beginning of a document.
 659      *
 660      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 661      *            wrapping another exception.
 662      *
 663      * @throws org.xml.sax.SAXException
 664      */
 665     protected void startDocumentInternal() throws org.xml.sax.SAXException
 666     {
 667         super.startDocumentInternal();
 668 
 669         m_needToCallStartDocument = false;
 670         m_needToOutputDocTypeDecl = true;
 671         m_startNewLine = false;
 672         setOmitXMLDeclaration(true);
 673 
 674         if (true == m_needToOutputDocTypeDecl)
 675         {
 676             String doctypeSystem = getDoctypeSystem();
 677             String doctypePublic = getDoctypePublic();
 678             if ((null != doctypeSystem) || (null != doctypePublic))
 679             {
 680                 final java.io.Writer writer = m_writer;
 681                 try
 682                 {
 683                 writer.write("<!DOCTYPE html");
 684 
 685                 if (null != doctypePublic)
 686                 {
 687                     writer.write(" PUBLIC \"");
 688                     writer.write(doctypePublic);
 689                     writer.write('"');
 690                 }
 691 
 692                 if (null != doctypeSystem)
 693                 {
 694                     if (null == doctypePublic)
 695                         writer.write(" SYSTEM \"");
 696                     else
 697                         writer.write(" \"");
 698 
 699                     writer.write(doctypeSystem);
 700                     writer.write('"');
 701                 }
 702 
 703                 writer.write('>');
 704                 outputLineSep();
 705                 }
 706                 catch(IOException e)
 707                 {
 708                     throw new SAXException(e);
 709                 }
 710             }
 711         }
 712 
 713         m_needToOutputDocTypeDecl = false;
 714     }
 715 
 716     /**
 717      * Receive notification of the end of a document.
 718      *
 719      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 720      *            wrapping another exception.
 721      *
 722      * @throws org.xml.sax.SAXException
 723      */
 724     public final void endDocument() throws org.xml.sax.SAXException
 725     {
 726 
 727         flushPending();
 728         if (m_doIndent && !m_isprevtext)
 729         {
 730             try
 731             {
 732             outputLineSep();
 733             }
 734             catch(IOException e)
 735             {
 736                 throw new SAXException(e);
 737             }
 738         }
 739 
 740         flushWriter();
 741         if (m_tracer != null)
 742             super.fireEndDoc();
 743     }
 744 
 745     /**
 746      *  Receive notification of the beginning of an element.
 747      *
 748      *
 749      *  @param namespaceURI
 750      *  @param localName
 751      *  @param name The element type name.
 752      *  @param atts The attributes attached to the element, if any.
 753      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
 754      *             wrapping another exception.
 755      *  @see #endElement
 756      *  @see org.xml.sax.AttributeList
 757      */
 758     public void startElement(
 759         String namespaceURI,
 760         String localName,
 761         String name,
 762         Attributes atts)
 763         throws org.xml.sax.SAXException
 764     {
 765 
 766         ElemContext elemContext = m_elemContext;
 767 
 768         // clean up any pending things first
 769         if (elemContext.m_startTagOpen)
 770         {
 771             closeStartTag();
 772             elemContext.m_startTagOpen = false;
 773         }
 774         else if (m_cdataTagOpen)
 775         {
 776             closeCDATA();
 777             m_cdataTagOpen = false;
 778         }
 779         else if (m_needToCallStartDocument)
 780         {
 781             startDocumentInternal();
 782             m_needToCallStartDocument = false;
 783         }
 784 
 785 
 786         // if this element has a namespace then treat it like XML
 787         if (null != namespaceURI && namespaceURI.length() > 0)
 788         {
 789             super.startElement(namespaceURI, localName, name, atts);
 790 
 791             return;
 792         }
 793 
 794         try
 795         {
 796             // getElemDesc2(name) is faster than getElemDesc(name)
 797             ElemDesc elemDesc = getElemDesc2(name);
 798             int elemFlags = elemDesc.getFlags();
 799 
 800             // deal with indentation issues first
 801             if (m_doIndent)
 802             {
 803 
 804                 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
 805                 if (m_ispreserve)
 806                     m_ispreserve = false;
 807                 else if (
 808                     (null != elemContext.m_elementName)
 809                     && (!m_inBlockElem
 810                         || isBlockElement) /* && !isWhiteSpaceSensitive */
 811                     )
 812                 {
 813                     m_startNewLine = true;
 814 
 815                     indent();
 816 
 817                 }
 818                 m_inBlockElem = !isBlockElement;
 819             }
 820 
 821             // save any attributes for later processing
 822             if (atts != null)
 823                 addAttributes(atts);
 824 
 825             m_isprevtext = false;
 826             final java.io.Writer writer = m_writer;
 827             writer.write('<');
 828             writer.write(name);
 829 
 830 
 831 
 832             if (m_tracer != null)
 833                 firePseudoAttributes();
 834 
 835             if ((elemFlags & ElemDesc.EMPTY) != 0)
 836             {
 837                 // an optimization for elements which are expected
 838                 // to be empty.
 839                 m_elemContext = elemContext.push();
 840                 /* XSLTC sometimes calls namespaceAfterStartElement()
 841                  * so we need to remember the name
 842                  */
 843                 m_elemContext.m_elementName = name;
 844                 m_elemContext.m_elementDesc = elemDesc;
 845                 return;
 846             }
 847             else
 848             {
 849                 elemContext = elemContext.push(namespaceURI,localName,name);
 850                 m_elemContext = elemContext;
 851                 elemContext.m_elementDesc = elemDesc;
 852                 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
 853             }
 854 
 855 
 856             if ((elemFlags & ElemDesc.HEADELEM) != 0)
 857             {
 858                 // This is the <HEAD> element, do some special processing
 859                 closeStartTag();
 860                 elemContext.m_startTagOpen = false;
 861                 if (!m_omitMetaTag)
 862                 {
 863                     if (m_doIndent)
 864                         indent();
 865                     writer.write(
 866                         "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
 867                     String encoding = getEncoding();
 868                     String encode = Encodings.getMimeEncoding(encoding);
 869                     writer.write(encode);
 870                     writer.write("\">");
 871                 }
 872             }
 873         }
 874         catch (IOException e)
 875         {
 876             throw new SAXException(e);
 877         }
 878     }
 879 
 880     /**
 881      *  Receive notification of the end of an element.
 882      *
 883      *
 884      *  @param namespaceURI
 885      *  @param localName
 886      *  @param name The element type name
 887      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
 888      *             wrapping another exception.
 889      */
 890     public final void endElement(
 891         final String namespaceURI,
 892         final String localName,
 893         final String name)
 894         throws org.xml.sax.SAXException
 895     {
 896         // deal with any pending issues
 897         if (m_cdataTagOpen)
 898             closeCDATA();
 899 
 900         // if the element has a namespace, treat it like XML, not HTML
 901         if (null != namespaceURI && namespaceURI.length() > 0)
 902         {
 903             super.endElement(namespaceURI, localName, name);
 904 
 905             return;
 906         }
 907 
 908         try
 909         {
 910 
 911             ElemContext elemContext = m_elemContext;
 912             final ElemDesc elemDesc = elemContext.m_elementDesc;
 913             final int elemFlags = elemDesc.getFlags();
 914             final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
 915 
 916             // deal with any indentation issues
 917             if (m_doIndent)
 918             {
 919                 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
 920                 boolean shouldIndent = false;
 921 
 922                 if (m_ispreserve)
 923                 {
 924                     m_ispreserve = false;
 925                 }
 926                 else if (m_doIndent && (!m_inBlockElem || isBlockElement))
 927                 {
 928                     m_startNewLine = true;
 929                     shouldIndent = true;
 930                 }
 931                 if (!elemContext.m_startTagOpen && shouldIndent)
 932                     indent(elemContext.m_currentElemDepth - 1);
 933                 m_inBlockElem = !isBlockElement;
 934             }
 935 
 936             final java.io.Writer writer = m_writer;
 937             if (!elemContext.m_startTagOpen)
 938             {
 939                 writer.write("</");
 940                 writer.write(name);
 941                 writer.write('>');
 942             }
 943             else
 944             {
 945                 // the start-tag open when this method was called,
 946                 // so we need to process it now.
 947 
 948                 if (m_tracer != null)
 949                     super.fireStartElem(name);
 950 
 951                 // the starting tag was still open when we received this endElement() call
 952                 // so we need to process any gathered attributes NOW, before they go away.
 953                 int nAttrs = m_attributes.getLength();
 954                 if (nAttrs > 0)
 955                 {
 956                     processAttributes(m_writer, nAttrs);
 957                     // clear attributes object for re-use with next element
 958                     m_attributes.clear();
 959                 }
 960                 if (!elemEmpty)
 961                 {
 962                     // As per Dave/Paul recommendation 12/06/2000
 963                     // if (shouldIndent)
 964                     // writer.write('>');
 965                     //  indent(m_currentIndent);
 966 
 967                     writer.write("></");
 968                     writer.write(name);
 969                     writer.write('>');
 970                 }
 971                 else
 972                 {
 973                     writer.write('>');
 974                 }
 975             }
 976 
 977             // clean up because the element has ended
 978             if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
 979                 m_ispreserve = true;
 980             m_isprevtext = false;
 981 
 982             // fire off the end element event
 983             if (m_tracer != null)
 984                 super.fireEndElem(name);
 985 
 986             // OPTIMIZE-EMPTY
 987             if (elemEmpty)
 988             {
 989                 // a quick exit if the HTML element had no children.
 990                 // This block of code can be removed if the corresponding block of code
 991                 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
 992                 m_elemContext = elemContext.m_prev;
 993                 return;
 994             }
 995 
 996             // some more clean because the element has ended.
 997             if (!elemContext.m_startTagOpen)
 998             {
 999                 if (m_doIndent && !m_preserves.isEmpty())
1000                     m_preserves.pop();
1001             }
1002             m_elemContext = elemContext.m_prev;
1003 //            m_isRawStack.pop();
1004         }
1005         catch (IOException e)
1006         {
1007             throw new SAXException(e);
1008         }
1009     }
1010 
1011     /**
1012      * Process an attribute.
1013      * @param   writer The writer to write the processed output to.
1014      * @param   name   The name of the attribute.
1015      * @param   value   The value of the attribute.
1016      * @param   elemDesc The description of the HTML element
1017      *           that has this attribute.
1018      *
1019      * @throws org.xml.sax.SAXException
1020      */
1021     protected void processAttribute(
1022         java.io.Writer writer,
1023         String name,
1024         String value,
1025         ElemDesc elemDesc)
1026         throws IOException
1027     {
1028         writer.write(' ');
1029 
1030         if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1031             && elemDesc != null
1032             && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1033         {
1034             writer.write(name);
1035         }
1036         else
1037         {
1038             // %REVIEW% %OPT%
1039             // Two calls to single-char write may NOT
1040             // be more efficient than one to string-write...
1041             writer.write(name);
1042             writer.write("=\"");
1043             if (   elemDesc != null
1044                 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1045                 writeAttrURI(writer, value, m_specialEscapeURLs);
1046             else
1047                 writeAttrString(writer, value, this.getEncoding());
1048             writer.write('"');
1049 
1050         }
1051     }
1052 
1053     /**
1054      * Tell if a character is an ASCII digit.
1055      */
1056     private boolean isASCIIDigit(char c)
1057     {
1058         return (c >= '0' && c <= '9');
1059     }
1060 
1061     /**
1062      * Make an integer into an HH hex value.
1063      * Does no checking on the size of the input, since this
1064      * is only meant to be used locally by writeAttrURI.
1065      *
1066      * @param i must be a value less than 255.
1067      *
1068      * @return should be a two character string.
1069      */
1070     private static String makeHHString(int i)
1071     {
1072         String s = Integer.toHexString(i).toUpperCase();
1073         if (s.length() == 1)
1074         {
1075             s = "0" + s;
1076         }
1077         return s;
1078     }
1079 
1080     /**
1081     * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1082     * @param str must be 2 characters long
1083     *
1084     * @return true or false
1085     */
1086     private boolean isHHSign(String str)
1087     {
1088         boolean sign = true;
1089         try
1090         {
1091             char r = (char) Integer.parseInt(str, 16);
1092         }
1093         catch (NumberFormatException e)
1094         {
1095             sign = false;
1096         }
1097         return sign;
1098     }
1099 
1100     /**
1101      * Write the specified <var>string</var> after substituting non ASCII characters,
1102      * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1103      *
1104      * @param   string      String to convert to XML format.
1105      * @param doURLEscaping True if we should try to encode as
1106      *                      per http://www.ietf.org/rfc/rfc2396.txt.
1107      *
1108      * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1109      */
1110     public void writeAttrURI(
1111         final java.io.Writer writer, String string, boolean doURLEscaping)
1112         throws IOException
1113     {
1114         // http://www.ietf.org/rfc/rfc2396.txt says:
1115         // A URI is always in an "escaped" form, since escaping or unescaping a
1116         // completed URI might change its semantics.  Normally, the only time
1117         // escape encodings can safely be made is when the URI is being created
1118         // from its component parts; each component may have its own set of
1119         // characters that are reserved, so only the mechanism responsible for
1120         // generating or interpreting that component can determine whether or
1121         // not escaping a character will change its semantics. Likewise, a URI
1122         // must be separated into its components before the escaped characters
1123         // within those components can be safely decoded.
1124         //
1125         // ...So we do our best to do limited escaping of the URL, without
1126         // causing damage.  If the URL is already properly escaped, in theory, this
1127         // function should not change the string value.
1128 
1129         final int end = string.length();
1130         if (end > m_attrBuff.length)
1131         {
1132            m_attrBuff = new char[end*2 + 1];
1133         }
1134         string.getChars(0,end, m_attrBuff, 0);
1135         final char[] chars = m_attrBuff;
1136 
1137         int cleanStart = 0;
1138         int cleanLength = 0;
1139 
1140 
1141         char ch = 0;
1142         for (int i = 0; i < end; i++)
1143         {
1144             ch = chars[i];
1145 
1146             if ((ch < 32) || (ch > 126))
1147             {
1148                 if (cleanLength > 0)
1149                 {
1150                     writer.write(chars, cleanStart, cleanLength);
1151                     cleanLength = 0;
1152                 }
1153                 if (doURLEscaping)
1154                 {
1155                     // Encode UTF16 to UTF8.
1156                     // Reference is Unicode, A Primer, by Tony Graham.
1157                     // Page 92.
1158 
1159                     // Note that Kay doesn't escape 0x20...
1160                     //  if(ch == 0x20) // Not sure about this... -sb
1161                     //  {
1162                     //    writer.write(ch);
1163                     //  }
1164                     //  else
1165                     if (ch <= 0x7F)
1166                     {
1167                         writer.write('%');
1168                         writer.write(makeHHString(ch));
1169                     }
1170                     else if (ch <= 0x7FF)
1171                     {
1172                         // Clear low 6 bits before rotate, put high 4 bits in low byte,
1173                         // and set two high bits.
1174                         int high = (ch >> 6) | 0xC0;
1175                         int low = (ch & 0x3F) | 0x80;
1176                         // First 6 bits, + high bit
1177                         writer.write('%');
1178                         writer.write(makeHHString(high));
1179                         writer.write('%');
1180                         writer.write(makeHHString(low));
1181                     }
1182                     else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1183                     {
1184                         // I'm sure this can be done in 3 instructions, but I choose
1185                         // to try and do it exactly like it is done in the book, at least
1186                         // until we are sure this is totally clean.  I don't think performance
1187                         // is a big issue with this particular function, though I could be
1188                         // wrong.  Also, the stuff below clearly does more masking than
1189                         // it needs to do.
1190 
1191                         // Clear high 6 bits.
1192                         int highSurrogate = ((int) ch) & 0x03FF;
1193 
1194                         // Middle 4 bits (wwww) + 1
1195                         // "Note that the value of wwww from the high surrogate bit pattern
1196                         // is incremented to make the uuuuu bit pattern in the scalar value
1197                         // so the surrogate pair don't address the BMP."
1198                         int wwww = ((highSurrogate & 0x03C0) >> 6);
1199                         int uuuuu = wwww + 1;
1200 
1201                         // next 4 bits
1202                         int zzzz = (highSurrogate & 0x003C) >> 2;
1203 
1204                         // low 2 bits
1205                         int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1206 
1207                         // Get low surrogate character.
1208                         ch = chars[++i];
1209 
1210                         // Clear high 6 bits.
1211                         int lowSurrogate = ((int) ch) & 0x03FF;
1212 
1213                         // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1214                         yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1215 
1216                         // bottom 6 bits.
1217                         int xxxxxx = (lowSurrogate & 0x003F);
1218 
1219                         int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1220                         int byte2 =
1221                             0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1222                         int byte3 = 0x80 | yyyyyy;
1223                         int byte4 = 0x80 | xxxxxx;
1224 
1225                         writer.write('%');
1226                         writer.write(makeHHString(byte1));
1227                         writer.write('%');
1228                         writer.write(makeHHString(byte2));
1229                         writer.write('%');
1230                         writer.write(makeHHString(byte3));
1231                         writer.write('%');
1232                         writer.write(makeHHString(byte4));
1233                     }
1234                     else
1235                     {
1236                         int high = (ch >> 12) | 0xE0; // top 4 bits
1237                         int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1238                         // middle 6 bits
1239                         int low = (ch & 0x3F) | 0x80;
1240                         // First 6 bits, + high bit
1241                         writer.write('%');
1242                         writer.write(makeHHString(high));
1243                         writer.write('%');
1244                         writer.write(makeHHString(middle));
1245                         writer.write('%');
1246                         writer.write(makeHHString(low));
1247                     }
1248 
1249                 }
1250                 else if (escapingNotNeeded(ch))
1251                 {
1252                     writer.write(ch);
1253                 }
1254                 else
1255                 {
1256                     writer.write("&#");
1257                     writer.write(Integer.toString(ch));
1258                     writer.write(';');
1259                 }
1260                 // In this character range we have first written out any previously accumulated
1261                 // "clean" characters, then processed the current more complicated character,
1262                 // which may have incremented "i".
1263                 // We now we reset the next possible clean character.
1264                 cleanStart = i + 1;
1265             }
1266             // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1267             // not allowing quotes in the URI proper syntax, nor in the fragment
1268             // identifier, we believe that it's OK to double escape quotes.
1269             else if (ch == '"')
1270             {
1271                 // If the character is a '%' number number, try to avoid double-escaping.
1272                 // There is a question if this is legal behavior.
1273 
1274                 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1275                 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1276 
1277                 //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1278 
1279                 // We are no longer escaping '%'
1280 
1281                 if (cleanLength > 0)
1282                 {
1283                     writer.write(chars, cleanStart, cleanLength);
1284                     cleanLength = 0;
1285                 }
1286 
1287 
1288                 // Mike Kay encodes this as ", so he may know something I don't?
1289                 if (doURLEscaping)
1290                     writer.write("%22");
1291                 else
1292                     writer.write("&quot;"); // we have to escape this, I guess.
1293 
1294                 // We have written out any clean characters, then the escaped '%' and now we
1295                 // We now we reset the next possible clean character.
1296                 cleanStart = i + 1;
1297             }
1298             else if (ch == '&')
1299             {
1300                 // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
1301                 // instead of "&" to avoid confusion with the beginning of a character
1302                 // reference (entity reference open delimiter).
1303                 if (cleanLength > 0)
1304                 {
1305                     writer.write(chars, cleanStart, cleanLength);
1306                     cleanLength = 0;
1307                 }
1308                 writer.write("&amp;");
1309                 cleanStart = i + 1;
1310             }
1311             else
1312             {
1313                 // no processing for this character, just count how
1314                 // many characters in a row that we have that need no processing
1315                 cleanLength++;
1316             }
1317         }
1318 
1319         // are there any clean characters at the end of the array
1320         // that we haven't processed yet?
1321         if (cleanLength > 1)
1322         {
1323             // if the whole string can be written out as-is do so
1324             // otherwise write out the clean chars at the end of the
1325             // array
1326             if (cleanStart == 0)
1327                 writer.write(string);
1328             else
1329                 writer.write(chars, cleanStart, cleanLength);
1330         }
1331         else if (cleanLength == 1)
1332         {
1333             // a little optimization for 1 clean character
1334             // (we could have let the previous if(...) handle them all)
1335             writer.write(ch);
1336         }
1337     }
1338 
1339     /**
1340      * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1341      * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1342      *
1343      * @param   string      String to convert to XML format.
1344      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1345      *
1346      * @throws org.xml.sax.SAXException
1347      */
1348     public void writeAttrString(
1349         final java.io.Writer writer, String string, String encoding)
1350         throws IOException
1351     {
1352         final int end = string.length();
1353         if (end > m_attrBuff.length)
1354         {
1355             m_attrBuff = new char[end * 2 + 1];
1356         }
1357         string.getChars(0, end, m_attrBuff, 0);
1358         final char[] chars = m_attrBuff;
1359 
1360 
1361 
1362         int cleanStart = 0;
1363         int cleanLength = 0;
1364 
1365         char ch = 0;
1366         for (int i = 0; i < end; i++)
1367         {
1368             ch = chars[i];
1369 
1370             // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1371             // System.out.println("ch: "+(int)ch);
1372             // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1373             // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1374             if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
1375             {
1376                 cleanLength++;
1377             }
1378             else if ('<' == ch || '>' == ch)
1379             {
1380                 cleanLength++; // no escaping in this case, as specified in 15.2
1381             }
1382             else if (
1383                 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1384             {
1385                 cleanLength++; // no escaping in this case, as specified in 15.2
1386             }
1387             else
1388             {
1389                 if (cleanLength > 0)
1390                 {
1391                     writer.write(chars,cleanStart,cleanLength);
1392                     cleanLength = 0;
1393                 }
1394                 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1395 
1396                 if (i != pos)
1397                 {
1398                     i = pos - 1;
1399                 }
1400                 else
1401                 {
1402                     if (Encodings.isHighUTF16Surrogate(ch))
1403                     {
1404 
1405                             writeUTF16Surrogate(ch, chars, i, end);
1406                             i++; // two input characters processed
1407                                  // this increments by one and the for()
1408                                  // loop itself increments by another one.
1409                     }
1410 
1411                     // The next is kind of a hack to keep from escaping in the case
1412                     // of Shift_JIS and the like.
1413 
1414                     /*
1415                     else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1416                     && (ch != 160))
1417                     {
1418                     writer.write(ch);  // no escaping in this case
1419                     }
1420                     else
1421                     */
1422                     String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1423                     if (null != outputStringForChar)
1424                     {
1425                         writer.write(outputStringForChar);
1426                     }
1427                     else if (escapingNotNeeded(ch))
1428                     {
1429                         writer.write(ch); // no escaping in this case
1430                     }
1431                     else
1432                     {
1433                         writer.write("&#");
1434                         writer.write(Integer.toString(ch));
1435                         writer.write(';');
1436                     }
1437                 }
1438                 cleanStart = i + 1;
1439             }
1440         } // end of for()
1441 
1442         // are there any clean characters at the end of the array
1443         // that we haven't processed yet?
1444         if (cleanLength > 1)
1445         {
1446             // if the whole string can be written out as-is do so
1447             // otherwise write out the clean chars at the end of the
1448             // array
1449             if (cleanStart == 0)
1450                 writer.write(string);
1451             else
1452                 writer.write(chars, cleanStart, cleanLength);
1453         }
1454         else if (cleanLength == 1)
1455         {
1456             // a little optimization for 1 clean character
1457             // (we could have let the previous if(...) handle them all)
1458             writer.write(ch);
1459         }
1460     }
1461 
1462 
1463 
1464     /**
1465      * Receive notification of character data.
1466      *
1467      * <p>The Parser will call this method to report each chunk of
1468      * character data.  SAX parsers may return all contiguous character
1469      * data in a single chunk, or they may split it into several
1470      * chunks; however, all of the characters in any single event
1471      * must come from the same external entity, so that the Locator
1472      * provides useful information.</p>
1473      *
1474      * <p>The application must not attempt to read from the array
1475      * outside of the specified range.</p>
1476      *
1477      * <p>Note that some parsers will report whitespace using the
1478      * ignorableWhitespace() method rather than this one (validating
1479      * parsers must do so).</p>
1480      *
1481      * @param chars The characters from the XML document.
1482      * @param start The start position in the array.
1483      * @param length The number of characters to read from the array.
1484      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1485      *            wrapping another exception.
1486      * @see #ignorableWhitespace
1487      * @see org.xml.sax.Locator
1488      *
1489      * @throws org.xml.sax.SAXException
1490      */
1491     public final void characters(char chars[], int start, int length)
1492         throws org.xml.sax.SAXException
1493     {
1494 
1495         if (m_elemContext.m_isRaw)
1496         {
1497             try
1498             {
1499                 if (m_elemContext.m_startTagOpen)
1500                 {
1501                     closeStartTag();
1502                     m_elemContext.m_startTagOpen = false;
1503                 }
1504                 m_ispreserve = true;
1505 
1506 //              With m_ispreserve just set true it looks like shouldIndent()
1507 //              will always return false, so drop any possible indentation.
1508 //              if (shouldIndent())
1509 //                  indent();
1510 
1511                 // writer.write("<![CDATA[");
1512                 // writer.write(chars, start, length);
1513                 writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1514 
1515                 // writer.write("]]>");
1516 
1517                 // time to generate characters event
1518                 if (m_tracer != null)
1519                     super.fireCharEvent(chars, start, length);
1520 
1521                 return;
1522             }
1523             catch (IOException ioe)
1524             {
1525                 throw new org.xml.sax.SAXException(
1526                     Utils.messages.createMessage(
1527                         MsgKey.ER_OIERROR,
1528                         null),
1529                     ioe);
1530                 //"IO error", ioe);
1531             }
1532         }
1533         else
1534         {
1535             super.characters(chars, start, length);
1536         }
1537     }
1538 
1539     /**
1540      *  Receive notification of cdata.
1541      *
1542      *  <p>The Parser will call this method to report each chunk of
1543      *  character data.  SAX parsers may return all contiguous character
1544      *  data in a single chunk, or they may split it into several
1545      *  chunks; however, all of the characters in any single event
1546      *  must come from the same external entity, so that the Locator
1547      *  provides useful information.</p>
1548      *
1549      *  <p>The application must not attempt to read from the array
1550      *  outside of the specified range.</p>
1551      *
1552      *  <p>Note that some parsers will report whitespace using the
1553      *  ignorableWhitespace() method rather than this one (validating
1554      *  parsers must do so).</p>
1555      *
1556      *  @param ch The characters from the XML document.
1557      *  @param start The start position in the array.
1558      *  @param length The number of characters to read from the array.
1559      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1560      *             wrapping another exception.
1561      *  @see #ignorableWhitespace
1562      *  @see org.xml.sax.Locator
1563      *
1564      * @throws org.xml.sax.SAXException
1565      */
1566     public final void cdata(char ch[], int start, int length)
1567         throws org.xml.sax.SAXException
1568     {
1569 
1570         if ((null != m_elemContext.m_elementName)
1571             && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1572                 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1573         {
1574             try
1575             {
1576                 if (m_elemContext.m_startTagOpen)
1577                 {
1578                     closeStartTag();
1579                     m_elemContext.m_startTagOpen = false;
1580                 }
1581 
1582                 m_ispreserve = true;
1583 
1584                 if (shouldIndent())
1585                     indent();
1586 
1587                 // writer.write(ch, start, length);
1588                 writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1589             }
1590             catch (IOException ioe)
1591             {
1592                 throw new org.xml.sax.SAXException(
1593                     Utils.messages.createMessage(
1594                         MsgKey.ER_OIERROR,
1595                         null),
1596                     ioe);
1597                 //"IO error", ioe);
1598             }
1599         }
1600         else
1601         {
1602             super.cdata(ch, start, length);
1603         }
1604     }
1605 
1606     /**
1607      *  Receive notification of a processing instruction.
1608      *
1609      *  @param target The processing instruction target.
1610      *  @param data The processing instruction data, or null if
1611      *         none was supplied.
1612      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1613      *             wrapping another exception.
1614      *
1615      * @throws org.xml.sax.SAXException
1616      */
1617     public void processingInstruction(String target, String data)
1618         throws org.xml.sax.SAXException
1619     {
1620 
1621         // Process any pending starDocument and startElement first.
1622         flushPending();
1623 
1624         // Use a fairly nasty hack to tell if the next node is supposed to be
1625         // unescaped text.
1626         if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1627         {
1628             startNonEscaping();
1629         }
1630         else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1631         {
1632             endNonEscaping();
1633         }
1634         else
1635         {
1636             try
1637             {
1638             if (m_elemContext.m_startTagOpen)
1639             {
1640                 closeStartTag();
1641                 m_elemContext.m_startTagOpen = false;
1642             }
1643             else if (m_needToCallStartDocument)
1644                 startDocumentInternal();
1645 
1646             if (shouldIndent())
1647                 indent();
1648 
1649             final java.io.Writer writer = m_writer;
1650             //writer.write("<?" + target);
1651             writer.write("<?");
1652             writer.write(target);
1653 
1654             if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1655                 writer.write(' ');
1656 
1657             //writer.write(data + ">"); // different from XML
1658             writer.write(data); // different from XML
1659             writer.write('>'); // different from XML
1660 
1661             // Always output a newline char if not inside of an
1662             // element. The whitespace is not significant in that
1663             // case.
1664             if (m_elemContext.m_currentElemDepth <= 0)
1665                 outputLineSep();
1666 
1667             m_startNewLine = true;
1668             }
1669             catch(IOException e)
1670             {
1671                 throw new SAXException(e);
1672             }
1673         }
1674 
1675         // now generate the PI event
1676         if (m_tracer != null)
1677             super.fireEscapingEvent(target, data);
1678      }
1679 
1680     /**
1681      * Receive notivication of a entityReference.
1682      *
1683      * @param name non-null reference to entity name string.
1684      *
1685      * @throws org.xml.sax.SAXException
1686      */
1687     public final void entityReference(String name)
1688         throws org.xml.sax.SAXException
1689     {
1690         try
1691         {
1692 
1693         final java.io.Writer writer = m_writer;
1694         writer.write('&');
1695         writer.write(name);
1696         writer.write(';');
1697 
1698         } catch(IOException e)
1699         {
1700             throw new SAXException(e);
1701         }
1702     }
1703     /**
1704      * @see ExtendedContentHandler#endElement(String)
1705      */
1706     public final void endElement(String elemName) throws SAXException
1707     {
1708         endElement(null, null, elemName);
1709     }
1710 
1711     /**
1712      * Process the attributes, which means to write out the currently
1713      * collected attributes to the writer. The attributes are not
1714      * cleared by this method
1715      *
1716      * @param writer the writer to write processed attributes to.
1717      * @param nAttrs the number of attributes in m_attributes
1718      * to be processed
1719      *
1720      * @throws org.xml.sax.SAXException
1721      */
1722     public void processAttributes(java.io.Writer writer, int nAttrs)
1723         throws IOException,SAXException
1724     {
1725             /*
1726              * process the collected attributes
1727              */
1728             for (int i = 0; i < nAttrs; i++)
1729             {
1730                 processAttribute(
1731                     writer,
1732                     m_attributes.getQName(i),
1733                     m_attributes.getValue(i),
1734                     m_elemContext.m_elementDesc);
1735             }
1736     }
1737 
1738     /**
1739      * For the enclosing elements starting tag write out out any attributes
1740      * followed by ">"
1741      *
1742      *@throws org.xml.sax.SAXException
1743      */
1744     protected void closeStartTag() throws SAXException
1745     {
1746             try
1747             {
1748 
1749             // finish processing attributes, time to fire off the start element event
1750             if (m_tracer != null)
1751                 super.fireStartElem(m_elemContext.m_elementName);
1752 
1753             int nAttrs = m_attributes.getLength();
1754             if (nAttrs>0)
1755             {
1756                 processAttributes(m_writer, nAttrs);
1757                 // clear attributes object for re-use with next element
1758                 m_attributes.clear();
1759             }
1760 
1761             m_writer.write('>');
1762 
1763             /* whether Xalan or XSLTC, we have the prefix mappings now, so
1764              * lets determine if the current element is specified in the cdata-
1765              * section-elements list.
1766              */
1767             if (m_StringOfCDATASections != null)
1768                 m_elemContext.m_isCdataSection = isCdataSection();
1769             if (m_doIndent)
1770             {
1771                 m_isprevtext = false;
1772                 m_preserves.push(m_ispreserve);
1773             }
1774 
1775             }
1776             catch(IOException e)
1777             {
1778                 throw new SAXException(e);
1779             }
1780     }
1781 
1782         /**
1783          * This method is used when a prefix/uri namespace mapping
1784          * is indicated after the element was started with a
1785          * startElement() and before and endElement().
1786          * startPrefixMapping(prefix,uri) would be used before the
1787          * startElement() call.
1788          * @param uri the URI of the namespace
1789          * @param prefix the prefix associated with the given URI.
1790          *
1791          * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1792          */
1793         public void namespaceAfterStartElement(String prefix, String uri)
1794             throws SAXException
1795         {
1796             // hack for XSLTC with finding URI for default namespace
1797             if (m_elemContext.m_elementURI == null)
1798             {
1799                 String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1800                 if (prefix1 == null && EMPTYSTRING.equals(prefix))
1801                 {
1802                     // the elements URI is not known yet, and it
1803                     // doesn't have a prefix, and we are currently
1804                     // setting the uri for prefix "", so we have
1805                     // the uri for the element... lets remember it
1806                     m_elemContext.m_elementURI = uri;
1807                 }
1808             }
1809             startPrefixMapping(prefix,uri,false);
1810         }
1811 
1812     public void startDTD(String name, String publicId, String systemId)
1813         throws SAXException
1814     {
1815         m_inDTD = true;
1816         super.startDTD(name, publicId, systemId);
1817     }
1818 
1819     /**
1820      * Report the end of DTD declarations.
1821      * @throws org.xml.sax.SAXException The application may raise an exception.
1822      * @see #startDTD
1823      */
1824     public void endDTD() throws org.xml.sax.SAXException
1825     {
1826         m_inDTD = false;
1827         /* for ToHTMLStream the DOCTYPE is entirely output in the
1828          * startDocumentInternal() method, so don't do anything here
1829          */
1830     }
1831     /**
1832      * This method does nothing.
1833      */
1834     public void attributeDecl(
1835         String eName,
1836         String aName,
1837         String type,
1838         String valueDefault,
1839         String value)
1840         throws SAXException
1841     {
1842         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1843     }
1844 
1845     /**
1846      * This method does nothing.
1847      */
1848     public void elementDecl(String name, String model) throws SAXException
1849     {
1850         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1851     }
1852     /**
1853      * This method does nothing.
1854      */
1855     public void internalEntityDecl(String name, String value)
1856         throws SAXException
1857     {
1858         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1859     }
1860     /**
1861      * This method does nothing.
1862      */
1863     public void externalEntityDecl(
1864         String name,
1865         String publicId,
1866         String systemId)
1867         throws SAXException
1868     {
1869         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1870     }
1871 
1872     /**
1873      * This method is used to add an attribute to the currently open element.
1874      * The caller has guaranted that this attribute is unique, which means that it
1875      * not been seen before and will not be seen again.
1876      *
1877      * @param name the qualified name of the attribute
1878      * @param value the value of the attribute which can contain only
1879      * ASCII printable characters characters in the range 32 to 127 inclusive.
1880      * @param flags the bit values of this integer give optimization information.
1881      */
1882     public void addUniqueAttribute(String name, String value, int flags)
1883         throws SAXException
1884     {
1885         try
1886         {
1887             final java.io.Writer writer = m_writer;
1888             if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1889             {
1890                 // "flags" has indicated that the characters
1891                 // '>'  '<'   '&'  and '"' are not in the value and
1892                 // m_htmlcharInfo has recorded that there are no other
1893                 // entities in the range 0 to 127 so we write out the
1894                 // value directly
1895                 writer.write(' ');
1896                 writer.write(name);
1897                 writer.write("=\"");
1898                 writer.write(value);
1899                 writer.write('"');
1900             }
1901             else if (
1902                 (flags & HTML_ATTREMPTY) > 0
1903                     && (value.length() == 0 || value.equalsIgnoreCase(name)))
1904             {
1905                 writer.write(' ');
1906                 writer.write(name);
1907             }
1908             else
1909             {
1910                 writer.write(' ');
1911                 writer.write(name);
1912                 writer.write("=\"");
1913                 if ((flags & HTML_ATTRURL) > 0)
1914                 {
1915                     writeAttrURI(writer, value, m_specialEscapeURLs);
1916                 }
1917                 else
1918                 {
1919                     writeAttrString(writer, value, this.getEncoding());
1920                 }
1921                 writer.write('"');
1922             }
1923         } catch (IOException e) {
1924             throw new SAXException(e);
1925         }
1926     }
1927 
1928     public void comment(char ch[], int start, int length)
1929             throws SAXException
1930     {
1931         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1932         if (m_inDTD)
1933             return;
1934         super.comment(ch, start, length);
1935     }
1936 
1937     public boolean reset()
1938     {
1939         boolean ret = super.reset();
1940         if (!ret)
1941             return false;
1942         initToHTMLStream();
1943         return true;
1944     }
1945 
1946     private void initToHTMLStream()
1947     {
1948 //        m_elementDesc = null;
1949         m_inBlockElem = false;
1950         m_inDTD = false;
1951 //        m_isRawStack.clear();
1952         m_omitMetaTag = false;
1953         m_specialEscapeURLs = true;
1954     }
1955 
1956     static class Trie
1957     {
1958         /**
1959          * A digital search trie for 7-bit ASCII text
1960          * The API is a subset of java.util.Hashtable
1961          * The key must be a 7-bit ASCII string
1962          * The value may be any Java Object
1963          * One can get an object stored in a trie from its key,
1964          * but the search is either case sensitive or case
1965          * insensitive to the characters in the key, and this
1966          * choice of sensitivity or insensitivity is made when
1967          * the Trie is created, before any objects are put in it.
1968          *
1969          * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
1970          * It exists to cut the serializers dependancy on that package.
1971          *
1972          * @xsl.usage internal
1973          */
1974 
1975         /** Size of the m_nextChar array.  */
1976         public static final int ALPHA_SIZE = 128;
1977 
1978         /** The root node of the tree.    */
1979         final Node m_Root;
1980 
1981         /** helper buffer to convert Strings to char arrays */
1982         private char[] m_charBuffer = new char[0];
1983 
1984         /** true if the search for an object is lower case only with the key */
1985         private final boolean m_lowerCaseOnly;
1986 
1987         /**
1988          * Construct the trie that has a case insensitive search.
1989          */
1990         public Trie()
1991         {
1992             m_Root = new Node();
1993             m_lowerCaseOnly = false;
1994         }
1995 
1996         /**
1997          * Construct the trie given the desired case sensitivity with the key.
1998          * @param lowerCaseOnly true if the search keys are to be loser case only,
1999          * not case insensitive.
2000          */
2001         public Trie(boolean lowerCaseOnly)
2002         {
2003             m_Root = new Node();
2004             m_lowerCaseOnly = lowerCaseOnly;
2005         }
2006 
2007         /**
2008          * Put an object into the trie for lookup.
2009          *
2010          * @param key must be a 7-bit ASCII string
2011          * @param value any java object.
2012          *
2013          * @return The old object that matched key, or null.
2014          */
2015         public Object put(String key, Object value)
2016         {
2017 
2018             final int len = key.length();
2019             if (len > m_charBuffer.length)
2020             {
2021                 // make the biggest buffer ever needed in get(String)
2022                 m_charBuffer = new char[len];
2023             }
2024 
2025             Node node = m_Root;
2026 
2027             for (int i = 0; i < len; i++)
2028             {
2029                 Node nextNode =
2030                     node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2031 
2032                 if (nextNode != null)
2033                 {
2034                     node = nextNode;
2035                 }
2036                 else
2037                 {
2038                     for (; i < len; i++)
2039                     {
2040                         Node newNode = new Node();
2041                         if (m_lowerCaseOnly)
2042                         {
2043                             // put this value into the tree only with a lower case key
2044                             node.m_nextChar[Character.toLowerCase(
2045                                 key.charAt(i))] =
2046                                 newNode;
2047                         }
2048                         else
2049                         {
2050                             // put this value into the tree with a case insensitive key
2051                             node.m_nextChar[Character.toUpperCase(
2052                                 key.charAt(i))] =
2053                                 newNode;
2054                             node.m_nextChar[Character.toLowerCase(
2055                                 key.charAt(i))] =
2056                                 newNode;
2057                         }
2058                         node = newNode;
2059                     }
2060                     break;
2061                 }
2062             }
2063 
2064             Object ret = node.m_Value;
2065 
2066             node.m_Value = value;
2067 
2068             return ret;
2069         }
2070 
2071         /**
2072          * Get an object that matches the key.
2073          *
2074          * @param key must be a 7-bit ASCII string
2075          *
2076          * @return The object that matches the key, or null.
2077          */
2078         public Object get(final String key)
2079         {
2080 
2081             final int len = key.length();
2082 
2083             /* If the name is too long, we won't find it, this also keeps us
2084              * from overflowing m_charBuffer
2085              */
2086             if (m_charBuffer.length < len)
2087                 return null;
2088 
2089             Node node = m_Root;
2090             switch (len) // optimize the look up based on the number of chars
2091             {
2092                 // case 0 looks silly, but the generated bytecode runs
2093                 // faster for lookup of elements of length 2 with this in
2094                 // and a fair bit faster.  Don't know why.
2095                 case 0 :
2096                     {
2097                         return null;
2098                     }
2099 
2100                 case 1 :
2101                     {
2102                         final char ch = key.charAt(0);
2103                         if (ch < ALPHA_SIZE)
2104                         {
2105                             node = node.m_nextChar[ch];
2106                             if (node != null)
2107                                 return node.m_Value;
2108                         }
2109                         return null;
2110                     }
2111                     //                comment out case 2 because the default is faster
2112                     //                case 2 :
2113                     //                    {
2114                     //                        final char ch0 = key.charAt(0);
2115                     //                        final char ch1 = key.charAt(1);
2116                     //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2117                     //                        {
2118                     //                            node = node.m_nextChar[ch0];
2119                     //                            if (node != null)
2120                     //                            {
2121                     //
2122                     //                                if (ch1 < ALPHA_SIZE)
2123                     //                                {
2124                     //                                    node = node.m_nextChar[ch1];
2125                     //                                    if (node != null)
2126                     //                                        return node.m_Value;
2127                     //                                }
2128                     //                            }
2129                     //                        }
2130                     //                        return null;
2131                     //                   }
2132                 default :
2133                     {
2134                         for (int i = 0; i < len; i++)
2135                         {
2136                             // A thread-safe way to loop over the characters
2137                             final char ch = key.charAt(i);
2138                             if (ALPHA_SIZE <= ch)
2139                             {
2140                                 // the key is not 7-bit ASCII so we won't find it here
2141                                 return null;
2142                             }
2143 
2144                             node = node.m_nextChar[ch];
2145                             if (node == null)
2146                                 return null;
2147                         }
2148 
2149                         return node.m_Value;
2150                     }
2151             }
2152         }
2153 
2154         /**
2155          * The node representation for the trie.
2156          * @xsl.usage internal
2157          */
2158         private class Node
2159         {
2160 
2161             /**
2162              * Constructor, creates a Node[ALPHA_SIZE].
2163              */
2164             Node()
2165             {
2166                 m_nextChar = new Node[ALPHA_SIZE];
2167                 m_Value = null;
2168             }
2169 
2170             /** The next nodes.   */
2171             final Node m_nextChar[];
2172 
2173             /** The value.   */
2174             Object m_Value;
2175         }
2176         /**
2177          * Construct the trie from another Trie.
2178          * Both the existing Trie and this new one share the same table for
2179          * lookup, and it is assumed that the table is fully populated and
2180          * not changing anymore.
2181          *
2182          * @param existingTrie the Trie that this one is a copy of.
2183          */
2184         public Trie(Trie existingTrie)
2185         {
2186             // copy some fields from the existing Trie into this one.
2187             m_Root = existingTrie.m_Root;
2188             m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2189 
2190             // get a buffer just big enough to hold the longest key in the table.
2191             int max = existingTrie.getLongestKeyLength();
2192             m_charBuffer = new char[max];
2193         }
2194 
2195         /**
2196          * Get an object that matches the key.
2197          * This method is faster than get(), but is not thread-safe.
2198          *
2199          * @param key must be a 7-bit ASCII string
2200          *
2201          * @return The object that matches the key, or null.
2202          */
2203         public Object get2(final String key)
2204         {
2205 
2206             final int len = key.length();
2207 
2208             /* If the name is too long, we won't find it, this also keeps us
2209              * from overflowing m_charBuffer
2210              */
2211             if (m_charBuffer.length < len)
2212                 return null;
2213 
2214             Node node = m_Root;
2215             switch (len) // optimize the look up based on the number of chars
2216             {
2217                 // case 0 looks silly, but the generated bytecode runs
2218                 // faster for lookup of elements of length 2 with this in
2219                 // and a fair bit faster.  Don't know why.
2220                 case 0 :
2221                     {
2222                         return null;
2223                     }
2224 
2225                 case 1 :
2226                     {
2227                         final char ch = key.charAt(0);
2228                         if (ch < ALPHA_SIZE)
2229                         {
2230                             node = node.m_nextChar[ch];
2231                             if (node != null)
2232                                 return node.m_Value;
2233                         }
2234                         return null;
2235                     }
2236                 default :
2237                     {
2238                         /* Copy string into array. This is not thread-safe because
2239                          * it modifies the contents of m_charBuffer. If multiple
2240                          * threads were to use this Trie they all would be
2241                          * using this same array (not good). So this
2242                          * method is not thread-safe, but it is faster because
2243                          * converting to a char[] and looping over elements of
2244                          * the array is faster than a String's charAt(i).
2245                          */
2246                         key.getChars(0, len, m_charBuffer, 0);
2247 
2248                         for (int i = 0; i < len; i++)
2249                         {
2250                             final char ch = m_charBuffer[i];
2251                             if (ALPHA_SIZE <= ch)
2252                             {
2253                                 // the key is not 7-bit ASCII so we won't find it here
2254                                 return null;
2255                             }
2256 
2257                             node = node.m_nextChar[ch];
2258                             if (node == null)
2259                                 return null;
2260                         }
2261 
2262                         return node.m_Value;
2263                     }
2264             }
2265         }
2266 
2267         /**
2268          * Get the length of the longest key used in the table.
2269          */
2270         public int getLongestKeyLength()
2271         {
2272             return m_charBuffer.length;
2273         }
2274     }
2275 }