1 /*
   2  * Copyright (c) 2014, 2016 Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.serializer;
  22 
  23 import java.io.IOException;
  24 import java.util.Properties;
  25 
  26 import javax.xml.transform.Result;
  27 
  28 import org.xml.sax.Attributes;
  29 import org.xml.sax.SAXException;
  30 
  31 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
  32 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
  33 
  34 /**
  35  * This serializer takes a series of SAX or
  36  * SAX-like events and writes its output
  37  * to the given stream.
  38  *
  39  * This class is not a public API, it is public
  40  * because it is used from another package.
  41  *
  42  * @xsl.usage internal
  43  */
  44 public final class ToHTMLStream extends ToStream
  45 {
  46 
  47     /** This flag is set while receiving events from the DTD */
  48     protected boolean m_inDTD = false;
  49 
  50     /** True if the previous element is a block element. */
  51     private boolean m_isprevblock = false;
  52 
  53     /**
  54      * Map that tells which XML characters should have special treatment, and it
  55      *  provides character to entity name lookup.
  56      */
  57     private static final CharInfo m_htmlcharInfo =
  58 //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
  59         CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
  60 
  61     /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
  62     static final Trie m_elementFlags = new Trie();
  63 
  64     static {
  65         initTagReference(m_elementFlags);
  66     }
  67     static void initTagReference(Trie m_elementFlags) {
  68 
  69         // HTML 4.0 loose DTD
  70         m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
  71         m_elementFlags.put(
  72             "FRAME",
  73             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  74         m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
  75         m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
  76         m_elementFlags.put(
  77             "ISINDEX",
  78             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
  79         m_elementFlags.put(
  80             "APPLET",
  81             new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
  82         m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
  83         m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
  84         m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
  85 
  86         // HTML 4.0 strict DTD
  87         m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  88         m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  89         m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  90         m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  91         m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
  92         m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
  93         m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
  94         m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
  95         m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
  96         m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
  97         m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
  98         m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
  99         m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
 100         m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
 101         m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
 102         m_elementFlags.put(
 103             "SUP",
 104             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 105         m_elementFlags.put(
 106             "SUB",
 107             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 108         m_elementFlags.put(
 109             "SPAN",
 110             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 111         m_elementFlags.put(
 112             "BDO",
 113             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 114         m_elementFlags.put(
 115             "BR",
 116             new ElemDesc(
 117                 0
 118                     | ElemDesc.SPECIAL
 119                     | ElemDesc.ASPECIAL
 120                     | ElemDesc.EMPTY
 121                     | ElemDesc.BLOCK));
 122         m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
 123         m_elementFlags.put(
 124             "ADDRESS",
 125             new ElemDesc(
 126                 0
 127                     | ElemDesc.BLOCK
 128                     | ElemDesc.BLOCKFORM
 129                     | ElemDesc.BLOCKFORMFIELDSET));
 130         m_elementFlags.put(
 131             "DIV",
 132             new ElemDesc(
 133                 0
 134                     | ElemDesc.BLOCK
 135                     | ElemDesc.BLOCKFORM
 136                     | ElemDesc.BLOCKFORMFIELDSET));
 137         m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
 138         m_elementFlags.put(
 139             "MAP",
 140             new ElemDesc(
 141                 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
 142         m_elementFlags.put(
 143             "AREA",
 144             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 145         m_elementFlags.put(
 146             "LINK",
 147             new ElemDesc(
 148                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 149         m_elementFlags.put(
 150             "IMG",
 151             new ElemDesc(
 152                 0
 153                     | ElemDesc.SPECIAL
 154                     | ElemDesc.ASPECIAL
 155                     | ElemDesc.EMPTY
 156                     | ElemDesc.WHITESPACESENSITIVE));
 157         m_elementFlags.put(
 158             "OBJECT",
 159             new ElemDesc(
 160                 0
 161                     | ElemDesc.SPECIAL
 162                     | ElemDesc.ASPECIAL
 163                     | ElemDesc.HEADMISC
 164                     | ElemDesc.WHITESPACESENSITIVE));
 165         m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
 166         m_elementFlags.put(
 167             "HR",
 168             new ElemDesc(
 169                 0
 170                     | ElemDesc.BLOCK
 171                     | ElemDesc.BLOCKFORM
 172                     | ElemDesc.BLOCKFORMFIELDSET
 173                     | ElemDesc.EMPTY));
 174         m_elementFlags.put(
 175             "P",
 176             new ElemDesc(
 177                 0
 178                     | ElemDesc.BLOCK
 179                     | ElemDesc.BLOCKFORM
 180                     | ElemDesc.BLOCKFORMFIELDSET));
 181         m_elementFlags.put(
 182             "H1",
 183             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 184         m_elementFlags.put(
 185             "H2",
 186             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 187         m_elementFlags.put(
 188             "H3",
 189             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 190         m_elementFlags.put(
 191             "H4",
 192             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 193         m_elementFlags.put(
 194             "H5",
 195             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 196         m_elementFlags.put(
 197             "H6",
 198             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
 199         m_elementFlags.put(
 200             "PRE",
 201             new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
 202         m_elementFlags.put(
 203             "Q",
 204             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
 205         m_elementFlags.put(
 206             "BLOCKQUOTE",
 207             new ElemDesc(
 208                 0
 209                     | ElemDesc.BLOCK
 210                     | ElemDesc.BLOCKFORM
 211                     | ElemDesc.BLOCKFORMFIELDSET));
 212         m_elementFlags.put("INS", new ElemDesc(0));
 213         m_elementFlags.put("DEL", new ElemDesc(0));
 214         m_elementFlags.put(
 215             "DL",
 216             new ElemDesc(
 217                 0
 218                     | ElemDesc.BLOCK
 219                     | ElemDesc.BLOCKFORM
 220                     | ElemDesc.BLOCKFORMFIELDSET));
 221         m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
 222         m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
 223         m_elementFlags.put(
 224             "OL",
 225             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 226         m_elementFlags.put(
 227             "UL",
 228             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
 229         m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
 230         m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
 231         m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
 232         m_elementFlags.put(
 233             "INPUT",
 234             new ElemDesc(
 235                 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
 236         m_elementFlags.put(
 237             "SELECT",
 238             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 239         m_elementFlags.put("OPTGROUP", new ElemDesc(0));
 240         m_elementFlags.put("OPTION", new ElemDesc(0));
 241         m_elementFlags.put(
 242             "TEXTAREA",
 243             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 244         m_elementFlags.put(
 245             "FIELDSET",
 246             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
 247         m_elementFlags.put("LEGEND", new ElemDesc(0));
 248         m_elementFlags.put(
 249             "BUTTON",
 250             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
 251         m_elementFlags.put(
 252             "TABLE",
 253             new ElemDesc(
 254                 0
 255                     | ElemDesc.BLOCK
 256                     | ElemDesc.BLOCKFORM
 257                     | ElemDesc.BLOCKFORMFIELDSET));
 258         m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
 259         m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
 260         m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
 261         m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
 262         m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
 263         m_elementFlags.put(
 264             "COL",
 265             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 266         m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
 267         m_elementFlags.put("TH", new ElemDesc(0));
 268         m_elementFlags.put("TD", new ElemDesc(0));
 269         m_elementFlags.put(
 270             "HEAD",
 271             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
 272         m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
 273         m_elementFlags.put(
 274             "BASE",
 275             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
 276         m_elementFlags.put(
 277             "META",
 278             new ElemDesc(
 279                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
 280         m_elementFlags.put(
 281             "STYLE",
 282             new ElemDesc(
 283                 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
 284         m_elementFlags.put(
 285             "SCRIPT",
 286             new ElemDesc(
 287                 0
 288                     | ElemDesc.SPECIAL
 289                     | ElemDesc.ASPECIAL
 290                     | ElemDesc.HEADMISC
 291                     | ElemDesc.RAW));
 292         m_elementFlags.put(
 293             "NOSCRIPT",
 294             new ElemDesc(
 295                 0
 296                     | ElemDesc.BLOCK
 297                     | ElemDesc.BLOCKFORM
 298                     | ElemDesc.BLOCKFORMFIELDSET));
 299         m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
 300 
 301         // From "John Ky" <hand@syd.speednet.com.au
 302         // Transitional Document Type Definition ()
 303         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
 304         m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 305 
 306         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
 307         m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 308         m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 309 
 310         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
 311         m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 312 
 313         // From "John Ky" <hand@syd.speednet.com.au
 314         m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
 315 
 316         // HTML 4.0, section 16.5
 317         m_elementFlags.put(
 318             "IFRAME",
 319             new ElemDesc(
 320                 0
 321                     | ElemDesc.BLOCK
 322                     | ElemDesc.BLOCKFORM
 323                     | ElemDesc.BLOCKFORMFIELDSET));
 324 
 325         // Netscape 4 extension
 326         m_elementFlags.put(
 327             "LAYER",
 328             new ElemDesc(
 329                 0
 330                     | ElemDesc.BLOCK
 331                     | ElemDesc.BLOCKFORM
 332                     | ElemDesc.BLOCKFORMFIELDSET));
 333         // Netscape 4 extension
 334         m_elementFlags.put(
 335             "ILAYER",
 336             new ElemDesc(
 337                 0
 338                     | ElemDesc.BLOCK
 339                     | ElemDesc.BLOCKFORM
 340                     | ElemDesc.BLOCKFORMFIELDSET));
 341 
 342 
 343         // NOW FOR ATTRIBUTE INFORMATION . . .
 344         ElemDesc elemDesc;
 345 
 346 
 347         // ----------------------------------------------
 348         elemDesc = (ElemDesc) m_elementFlags.get("a");
 349         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 350         elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
 351 
 352         // ----------------------------------------------
 353         elemDesc = (ElemDesc) m_elementFlags.get("area");
 354         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 355         elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
 356 
 357         // ----------------------------------------------
 358         elemDesc = (ElemDesc) m_elementFlags.get("base");
 359         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 360 
 361         // ----------------------------------------------
 362         elemDesc = (ElemDesc) m_elementFlags.get("button");
 363         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 364 
 365         // ----------------------------------------------
 366         elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
 367         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 368 
 369         // ----------------------------------------------
 370         elemDesc = (ElemDesc) m_elementFlags.get("del");
 371         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 372 
 373         // ----------------------------------------------
 374         elemDesc = (ElemDesc) m_elementFlags.get("dir");
 375         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 376 
 377         // ----------------------------------------------
 378 
 379         elemDesc = (ElemDesc) m_elementFlags.get("div");
 380         elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
 381         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 382 
 383         // ----------------------------------------------
 384         elemDesc = (ElemDesc) m_elementFlags.get("dl");
 385         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 386 
 387         // ----------------------------------------------
 388         elemDesc = (ElemDesc) m_elementFlags.get("form");
 389         elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
 390 
 391         // ----------------------------------------------
 392         // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
 393         elemDesc = (ElemDesc) m_elementFlags.get("frame");
 394         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 395         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 396         elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
 397 
 398         // ----------------------------------------------
 399         elemDesc = (ElemDesc) m_elementFlags.get("head");
 400         elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
 401 
 402         // ----------------------------------------------
 403         elemDesc = (ElemDesc) m_elementFlags.get("hr");
 404         elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
 405 
 406         // ----------------------------------------------
 407         // HTML 4.0, section 16.5
 408         elemDesc = (ElemDesc) m_elementFlags.get("iframe");
 409         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 410         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 411 
 412         // ----------------------------------------------
 413         // Netscape 4 extension
 414         elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
 415         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 416 
 417         // ----------------------------------------------
 418         elemDesc = (ElemDesc) m_elementFlags.get("img");
 419         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 420         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
 421         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 422         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 423 
 424         // ----------------------------------------------
 425         elemDesc = (ElemDesc) m_elementFlags.get("input");
 426         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 427         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 428         elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
 429         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 430         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
 431         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 432 
 433         // ----------------------------------------------
 434         elemDesc = (ElemDesc) m_elementFlags.get("ins");
 435         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 436 
 437         // ----------------------------------------------
 438         // Netscape 4 extension
 439         elemDesc = (ElemDesc) m_elementFlags.get("layer");
 440         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 441 
 442         // ----------------------------------------------
 443         elemDesc = (ElemDesc) m_elementFlags.get("link");
 444         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
 445 
 446         // ----------------------------------------------
 447         elemDesc = (ElemDesc) m_elementFlags.get("menu");
 448         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 449 
 450         // ----------------------------------------------
 451         elemDesc = (ElemDesc) m_elementFlags.get("object");
 452         elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
 453         elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
 454         elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
 455         elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
 456         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
 457         elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
 458 
 459         // ----------------------------------------------
 460         elemDesc = (ElemDesc) m_elementFlags.get("ol");
 461         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 462 
 463         // ----------------------------------------------
 464         elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
 465         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 466 
 467         // ----------------------------------------------
 468         elemDesc = (ElemDesc) m_elementFlags.get("option");
 469         elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
 470         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 471 
 472         // ----------------------------------------------
 473         elemDesc = (ElemDesc) m_elementFlags.get("q");
 474         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
 475 
 476         // ----------------------------------------------
 477         elemDesc = (ElemDesc) m_elementFlags.get("script");
 478         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
 479         elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
 480         elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
 481 
 482         // ----------------------------------------------
 483         elemDesc = (ElemDesc) m_elementFlags.get("select");
 484         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 485         elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
 486 
 487         // ----------------------------------------------
 488         elemDesc = (ElemDesc) m_elementFlags.get("table");
 489         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
 490 
 491         // ----------------------------------------------
 492         elemDesc = (ElemDesc) m_elementFlags.get("td");
 493         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 494 
 495         // ----------------------------------------------
 496         elemDesc = (ElemDesc) m_elementFlags.get("textarea");
 497         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
 498         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
 499 
 500         // ----------------------------------------------
 501         elemDesc = (ElemDesc) m_elementFlags.get("th");
 502         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 503 
 504         // ----------------------------------------------
 505         // The nowrap attribute of a tr element is both
 506         // a Netscape and Internet-Explorer extension
 507         elemDesc = (ElemDesc) m_elementFlags.get("tr");
 508         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
 509 
 510         // ----------------------------------------------
 511         elemDesc = (ElemDesc) m_elementFlags.get("ul");
 512         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
 513     }
 514 
 515     /**
 516      * Dummy element for elements not found.
 517      */
 518     static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
 519 
 520     /** True if URLs should be specially escaped with the %xx form. */
 521     private boolean m_specialEscapeURLs = true;
 522 
 523     /** True if the META tag should be omitted. */
 524     private boolean m_omitMetaTag = false;
 525 
 526     /**
 527      * Tells if the formatter should use special URL escaping.
 528      *
 529      * @param bool True if URLs should be specially escaped with the %xx form.
 530      */
 531     public void setSpecialEscapeURLs(boolean bool)
 532     {
 533         m_specialEscapeURLs = bool;
 534     }
 535 
 536     /**
 537      * Tells if the formatter should omit the META tag.
 538      *
 539      * @param bool True if the META tag should be omitted.
 540      */
 541     public void setOmitMetaTag(boolean bool)
 542     {
 543         m_omitMetaTag = bool;
 544     }
 545 
 546     /**
 547      * Specifies an output format for this serializer. It the
 548      * serializer has already been associated with an output format,
 549      * it will switch to the new format. This method should not be
 550      * called while the serializer is in the process of serializing
 551      * a document.
 552      *
 553      * This method can be called multiple times before starting
 554      * the serialization of a particular result-tree. In principle
 555      * all serialization parameters can be changed, with the exception
 556      * of method="html" (it must be method="html" otherwise we
 557      * shouldn't even have a ToHTMLStream object here!)
 558      *
 559      * @param format The output format or serialzation parameters
 560      * to use.
 561      */
 562     public void setOutputFormat(Properties format)
 563     {
 564 
 565         m_specialEscapeURLs =
 566             OutputPropertyUtils.getBooleanProperty(
 567                 OutputPropertiesFactory.S_USE_URL_ESCAPING,
 568                 format);
 569 
 570         m_omitMetaTag =
 571             OutputPropertyUtils.getBooleanProperty(
 572                 OutputPropertiesFactory.S_OMIT_META_TAG,
 573                 format);
 574 
 575         super.setOutputFormat(format);
 576     }
 577 
 578     /**
 579      * Tells if the formatter should use special URL escaping.
 580      *
 581      * @return True if URLs should be specially escaped with the %xx form.
 582      */
 583     private final boolean getSpecialEscapeURLs()
 584     {
 585         return m_specialEscapeURLs;
 586     }
 587 
 588     /**
 589      * Tells if the formatter should omit the META tag.
 590      *
 591      * @return True if the META tag should be omitted.
 592      */
 593     private final boolean getOmitMetaTag()
 594     {
 595         return m_omitMetaTag;
 596     }
 597 
 598     /**
 599      * Get a description of the given element.
 600      *
 601      * @param name non-null name of element, case insensitive.
 602      *
 603      * @return non-null reference to ElemDesc, which may be m_dummy if no
 604      *         element description matches the given name.
 605      */
 606     public static final ElemDesc getElemDesc(String name)
 607     {
 608         /* this method used to return m_dummy  when name was null
 609          * but now it doesn't check and and requires non-null name.
 610          */
 611         Object obj = m_elementFlags.get(name);
 612         if (null != obj)
 613             return (ElemDesc)obj;
 614         return m_dummy;
 615     }
 616 
 617     /**
 618      * A Trie that is just a copy of the "static" one.
 619      * We need this one to be able to use the faster, but not thread-safe
 620      * method Trie.get2(name)
 621      */
 622     private Trie m_htmlInfo = new Trie(m_elementFlags);
 623     /**
 624      * Calls to this method could be replaced with calls to
 625      * getElemDesc(name), but this one should be faster.
 626      */
 627     private ElemDesc getElemDesc2(String name)
 628     {
 629         Object obj = m_htmlInfo.get2(name);
 630         if (null != obj)
 631             return (ElemDesc)obj;
 632         return m_dummy;
 633     }
 634 
 635     /**
 636      * Default constructor.
 637      */
 638     public ToHTMLStream()
 639     {
 640 
 641         super();
 642         m_charInfo = m_htmlcharInfo;
 643         // initialize namespaces
 644         m_prefixMap = new NamespaceMappings();
 645 
 646     }
 647 
 648     /** The name of the current element. */
 649 //    private String m_currentElementName = null;
 650 
 651     /**
 652      * Receive notification of the beginning of a document.
 653      *
 654      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 655      *            wrapping another exception.
 656      *
 657      * @throws org.xml.sax.SAXException
 658      */
 659     protected void startDocumentInternal() throws org.xml.sax.SAXException
 660     {
 661         super.startDocumentInternal();
 662 
 663         m_needToCallStartDocument = false;
 664         m_needToOutputDocTypeDecl = true;
 665         m_startNewLine = false;
 666         setOmitXMLDeclaration(true);
 667 
 668         if (true == m_needToOutputDocTypeDecl)
 669         {
 670             String doctypeSystem = getDoctypeSystem();
 671             String doctypePublic = getDoctypePublic();
 672             if ((null != doctypeSystem) || (null != doctypePublic))
 673             {
 674                 final java.io.Writer writer = m_writer;
 675                 try
 676                 {
 677                 writer.write("<!DOCTYPE html");
 678 
 679                 if (null != doctypePublic)
 680                 {
 681                     writer.write(" PUBLIC \"");
 682                     writer.write(doctypePublic);
 683                     writer.write('"');
 684                 }
 685 
 686                 if (null != doctypeSystem)
 687                 {
 688                     if (null == doctypePublic)
 689                         writer.write(" SYSTEM \"");
 690                     else
 691                         writer.write(" \"");
 692 
 693                     writer.write(doctypeSystem);
 694                     writer.write('"');
 695                 }
 696 
 697                 writer.write('>');
 698                 outputLineSep();
 699                 }
 700                 catch(IOException e)
 701                 {
 702                     throw new SAXException(e);
 703                 }
 704             }
 705         }
 706 
 707         m_needToOutputDocTypeDecl = false;
 708     }
 709 
 710     /**
 711      * Receive notification of the end of a document.
 712      *
 713      * @throws org.xml.sax.SAXException Any SAX exception, possibly
 714      *            wrapping another exception.
 715      *
 716      * @throws org.xml.sax.SAXException
 717      */
 718     public final void endDocument() throws org.xml.sax.SAXException
 719     {
 720         flushCharactersBuffer();
 721         flushPending();
 722         if (m_doIndent && !m_isprevtext)
 723         {
 724             try
 725             {
 726             outputLineSep();
 727             }
 728             catch(IOException e)
 729             {
 730                 throw new SAXException(e);
 731             }
 732         }
 733 
 734         flushWriter();
 735         if (m_tracer != null)
 736             super.fireEndDoc();
 737     }
 738 
 739     /**
 740      * If the previous is an inline element, won't insert a new line before the
 741      * text.
 742      *
 743      */
 744     protected boolean shouldIndentForText() {
 745         return super.shouldIndentForText() && m_isprevblock;
 746     }
 747 
 748     /**
 749      * Only check m_doIndent, disregard m_ispreserveSpace.
 750      *
 751      * @return True if the content should be formatted.
 752      */
 753     protected boolean shouldFormatOutput() {
 754         return m_doIndent;
 755     }
 756 
 757     /**
 758      * Receive notification of the beginning of an element.
 759      *
 760      *
 761      * @param namespaceURI
 762      * @param localName
 763      * @param name
 764      *            The element type name.
 765      * @param atts
 766      *            The attributes attached to the element, if any.
 767      * @throws org.xml.sax.SAXException
 768      *             Any SAX exception, possibly wrapping another exception.
 769      * @see #endElement
 770      * @see org.xml.sax.AttributeList
 771      */
 772     public void startElement(
 773         String namespaceURI,
 774         String localName,
 775         String name,
 776         Attributes atts)
 777         throws SAXException
 778     {
 779         // will add extra one if having namespace but no matter
 780         m_childNodeNum++;
 781         flushCharactersBuffer();
 782         ElemContext elemContext = m_elemContext;
 783 
 784         // clean up any pending things first
 785         if (elemContext.m_startTagOpen)
 786         {
 787             closeStartTag();
 788             elemContext.m_startTagOpen = false;
 789         }
 790         else if (m_cdataTagOpen)
 791         {
 792             closeCDATA();
 793             m_cdataTagOpen = false;
 794         }
 795         else if (m_needToCallStartDocument)
 796         {
 797             startDocumentInternal();
 798             m_needToCallStartDocument = false;
 799         }
 800 
 801 
 802         // if this element has a namespace then treat it like XML
 803         if (null != namespaceURI && namespaceURI.length() > 0)
 804         {
 805             super.startElement(namespaceURI, localName, name, atts);
 806 
 807             return;
 808         }
 809 
 810         try
 811         {
 812             // getElemDesc2(name) is faster than getElemDesc(name)
 813             ElemDesc elemDesc = getElemDesc2(name);
 814             int elemFlags = elemDesc.getFlags();
 815 
 816             // deal with indentation issues first
 817             if (m_doIndent)
 818             {
 819                 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
 820                 if ((elemContext.m_elementName != null)
 821                         // If this element is a block element,
 822                         // or if this is not a block element, then if the
 823                         // previous is neither a text nor an inline
 824                         && (isBlockElement || (!(m_isprevtext || !m_isprevblock))))
 825                 {
 826                     m_startNewLine = true;
 827 
 828                     indent();
 829                 }
 830                 m_isprevblock = isBlockElement;
 831             }
 832 
 833             // save any attributes for later processing
 834             if (atts != null)
 835                 addAttributes(atts);
 836 
 837             m_isprevtext = false;
 838             final java.io.Writer writer = m_writer;
 839             writer.write('<');
 840             writer.write(name);
 841 
 842             m_childNodeNumStack.push(m_childNodeNum);
 843             m_childNodeNum = 0;
 844 
 845             if (m_tracer != null)
 846                 firePseudoAttributes();
 847 
 848             if ((elemFlags & ElemDesc.EMPTY) != 0)
 849             {
 850                 // an optimization for elements which are expected
 851                 // to be empty.
 852                 m_elemContext = elemContext.push();
 853                 /* XSLTC sometimes calls namespaceAfterStartElement()
 854                  * so we need to remember the name
 855                  */
 856                 m_elemContext.m_elementName = name;
 857                 m_elemContext.m_elementDesc = elemDesc;
 858                 return;
 859             }
 860             else
 861             {
 862                 elemContext = elemContext.push(namespaceURI,localName,name);
 863                 m_elemContext = elemContext;
 864                 elemContext.m_elementDesc = elemDesc;
 865                 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
 866 
 867                 // set m_startNewLine for the next element
 868                 if (m_doIndent) {
 869                     // elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(),
 870                     // in this branch m_elemContext.m_elementName is not null
 871                     boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
 872                     if (isBlockElement)
 873                         m_startNewLine = true;
 874                 }
 875             }
 876 
 877 
 878             if ((elemFlags & ElemDesc.HEADELEM) != 0)
 879             {
 880                 // This is the <HEAD> element, do some special processing
 881                 closeStartTag();
 882                 elemContext.m_startTagOpen = false;
 883                 if (!m_omitMetaTag)
 884                 {
 885                     if (m_doIndent)
 886                         indent();
 887                     writer.write(
 888                         "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
 889                     String encoding = getEncoding();
 890                     String encode = Encodings.getMimeEncoding(encoding);
 891                     writer.write(encode);
 892                     writer.write("\">");
 893                 }
 894             }
 895         }
 896         catch (IOException e)
 897         {
 898             throw new SAXException(e);
 899         }
 900     }
 901 
 902     /**
 903      *  Receive notification of the end of an element.
 904      *
 905      *
 906      *  @param namespaceURI
 907      *  @param localName
 908      *  @param name The element type name
 909      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
 910      *             wrapping another exception.
 911      */
 912     public final void endElement(
 913         final String namespaceURI,
 914         final String localName,
 915         final String name)
 916         throws org.xml.sax.SAXException
 917     {
 918         flushCharactersBuffer();
 919         // deal with any pending issues
 920         if (m_cdataTagOpen)
 921             closeCDATA();
 922 
 923         // if the element has a namespace, treat it like XML, not HTML
 924         if (null != namespaceURI && namespaceURI.length() > 0)
 925         {
 926             super.endElement(namespaceURI, localName, name);
 927 
 928             return;
 929         }
 930 
 931         try
 932         {
 933 
 934             ElemContext elemContext = m_elemContext;
 935             final ElemDesc elemDesc = elemContext.m_elementDesc;
 936             final int elemFlags = elemDesc.getFlags();
 937             final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
 938 
 939             // deal with any indentation issues
 940             if (m_doIndent)
 941             {
 942                 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
 943                 boolean shouldIndent = false;
 944 
 945                 // If this element is a block element,
 946                 // or if this is not a block element, then if the previous is
 947                 // neither a text nor an inline
 948                 if (isBlockElement || (!(m_isprevtext || !m_isprevblock)))
 949                 {
 950                     m_startNewLine = true;
 951                     shouldIndent = true;
 952                 }
 953                 if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext))
 954                     indent(elemContext.m_currentElemDepth - 1);
 955 
 956                 m_isprevblock = isBlockElement;
 957             }
 958 
 959             final java.io.Writer writer = m_writer;
 960             if (!elemContext.m_startTagOpen)
 961             {
 962                 writer.write("</");
 963                 writer.write(name);
 964                 writer.write('>');
 965             }
 966             else
 967             {
 968                 // the start-tag open when this method was called,
 969                 // so we need to process it now.
 970 
 971                 if (m_tracer != null)
 972                     super.fireStartElem(name);
 973 
 974                 // the starting tag was still open when we received this endElement() call
 975                 // so we need to process any gathered attributes NOW, before they go away.
 976                 int nAttrs = m_attributes.getLength();
 977                 if (nAttrs > 0)
 978                 {
 979                     processAttributes(m_writer, nAttrs);
 980                     // clear attributes object for re-use with next element
 981                     m_attributes.clear();
 982                 }
 983                 if (!elemEmpty)
 984                 {
 985                     // As per Dave/Paul recommendation 12/06/2000
 986                     // if (shouldIndent)
 987                     // writer.write('>');
 988                     //  indent(m_currentIndent);
 989 
 990                     writer.write("></");
 991                     writer.write(name);
 992                     writer.write('>');
 993                 }
 994                 else
 995                 {
 996                     writer.write('>');
 997                 }
 998             }
 999 
1000             m_childNodeNum = m_childNodeNumStack.pop();
1001             // clean up because the element has ended
1002             if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
1003                 m_ispreserve = true;
1004             m_isprevtext = false;
1005 
1006             // fire off the end element event
1007             if (m_tracer != null)
1008                 super.fireEndElem(name);
1009 
1010             // OPTIMIZE-EMPTY
1011             if (elemEmpty)
1012             {
1013                 // a quick exit if the HTML element had no children.
1014                 // This block of code can be removed if the corresponding block of code
1015                 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
1016                 m_elemContext = elemContext.m_prev;
1017                 return;
1018             }
1019 
1020             // some more clean because the element has ended.
1021             if (!elemContext.m_startTagOpen)
1022             {
1023                 if (m_doIndent && !m_preserves.isEmpty())
1024                     m_preserves.pop();
1025             }
1026             m_elemContext = elemContext.m_prev;
1027 //            m_isRawStack.pop();
1028         }
1029         catch (IOException e)
1030         {
1031             throw new SAXException(e);
1032         }
1033     }
1034 
1035     /**
1036      * Process an attribute.
1037      * @param   writer The writer to write the processed output to.
1038      * @param   name   The name of the attribute.
1039      * @param   value   The value of the attribute.
1040      * @param   elemDesc The description of the HTML element
1041      *           that has this attribute.
1042      *
1043      * @throws org.xml.sax.SAXException
1044      */
1045     protected void processAttribute(
1046         java.io.Writer writer,
1047         String name,
1048         String value,
1049         ElemDesc elemDesc)
1050         throws IOException
1051     {
1052         writer.write(' ');
1053 
1054         if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1055             && elemDesc != null
1056             && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1057         {
1058             writer.write(name);
1059         }
1060         else
1061         {
1062             // %REVIEW% %OPT%
1063             // Two calls to single-char write may NOT
1064             // be more efficient than one to string-write...
1065             writer.write(name);
1066             writer.write("=\"");
1067             if (   elemDesc != null
1068                 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1069                 writeAttrURI(writer, value, m_specialEscapeURLs);
1070             else
1071                 writeAttrString(writer, value, this.getEncoding());
1072             writer.write('"');
1073 
1074         }
1075     }
1076 
1077     /**
1078      * Tell if a character is an ASCII digit.
1079      */
1080     private boolean isASCIIDigit(char c)
1081     {
1082         return (c >= '0' && c <= '9');
1083     }
1084 
1085     /**
1086      * Make an integer into an HH hex value.
1087      * Does no checking on the size of the input, since this
1088      * is only meant to be used locally by writeAttrURI.
1089      *
1090      * @param i must be a value less than 255.
1091      *
1092      * @return should be a two character string.
1093      */
1094     private static String makeHHString(int i)
1095     {
1096         String s = Integer.toHexString(i).toUpperCase();
1097         if (s.length() == 1)
1098         {
1099             s = "0" + s;
1100         }
1101         return s;
1102     }
1103 
1104     /**
1105     * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1106     * @param str must be 2 characters long
1107     *
1108     * @return true or false
1109     */
1110     private boolean isHHSign(String str)
1111     {
1112         boolean sign = true;
1113         try
1114         {
1115             char r = (char) Integer.parseInt(str, 16);
1116         }
1117         catch (NumberFormatException e)
1118         {
1119             sign = false;
1120         }
1121         return sign;
1122     }
1123 
1124     /**
1125      * Write the specified <var>string</var> after substituting non ASCII characters,
1126      * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1127      *
1128      * @param   string      String to convert to XML format.
1129      * @param doURLEscaping True if we should try to encode as
1130      *                      per http://www.ietf.org/rfc/rfc2396.txt.
1131      *
1132      * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1133      */
1134     public void writeAttrURI(
1135         final java.io.Writer writer, String string, boolean doURLEscaping)
1136         throws IOException
1137     {
1138         // http://www.ietf.org/rfc/rfc2396.txt says:
1139         // A URI is always in an "escaped" form, since escaping or unescaping a
1140         // completed URI might change its semantics.  Normally, the only time
1141         // escape encodings can safely be made is when the URI is being created
1142         // from its component parts; each component may have its own set of
1143         // characters that are reserved, so only the mechanism responsible for
1144         // generating or interpreting that component can determine whether or
1145         // not escaping a character will change its semantics. Likewise, a URI
1146         // must be separated into its components before the escaped characters
1147         // within those components can be safely decoded.
1148         //
1149         // ...So we do our best to do limited escaping of the URL, without
1150         // causing damage.  If the URL is already properly escaped, in theory, this
1151         // function should not change the string value.
1152 
1153         final int end = string.length();
1154         if (end > m_attrBuff.length)
1155         {
1156            m_attrBuff = new char[end*2 + 1];
1157         }
1158         string.getChars(0,end, m_attrBuff, 0);
1159         final char[] chars = m_attrBuff;
1160 
1161         int cleanStart = 0;
1162         int cleanLength = 0;
1163 
1164 
1165         char ch = 0;
1166         for (int i = 0; i < end; i++)
1167         {
1168             ch = chars[i];
1169 
1170             if ((ch < 32) || (ch > 126))
1171             {
1172                 if (cleanLength > 0)
1173                 {
1174                     writer.write(chars, cleanStart, cleanLength);
1175                     cleanLength = 0;
1176                 }
1177                 if (doURLEscaping)
1178                 {
1179                     // Encode UTF16 to UTF8.
1180                     // Reference is Unicode, A Primer, by Tony Graham.
1181                     // Page 92.
1182 
1183                     // Note that Kay doesn't escape 0x20...
1184                     //  if(ch == 0x20) // Not sure about this... -sb
1185                     //  {
1186                     //    writer.write(ch);
1187                     //  }
1188                     //  else
1189                     if (ch <= 0x7F)
1190                     {
1191                         writer.write('%');
1192                         writer.write(makeHHString(ch));
1193                     }
1194                     else if (ch <= 0x7FF)
1195                     {
1196                         // Clear low 6 bits before rotate, put high 4 bits in low byte,
1197                         // and set two high bits.
1198                         int high = (ch >> 6) | 0xC0;
1199                         int low = (ch & 0x3F) | 0x80;
1200                         // First 6 bits, + high bit
1201                         writer.write('%');
1202                         writer.write(makeHHString(high));
1203                         writer.write('%');
1204                         writer.write(makeHHString(low));
1205                     }
1206                     else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1207                     {
1208                         // I'm sure this can be done in 3 instructions, but I choose
1209                         // to try and do it exactly like it is done in the book, at least
1210                         // until we are sure this is totally clean.  I don't think performance
1211                         // is a big issue with this particular function, though I could be
1212                         // wrong.  Also, the stuff below clearly does more masking than
1213                         // it needs to do.
1214 
1215                         // Clear high 6 bits.
1216                         int highSurrogate = ((int) ch) & 0x03FF;
1217 
1218                         // Middle 4 bits (wwww) + 1
1219                         // "Note that the value of wwww from the high surrogate bit pattern
1220                         // is incremented to make the uuuuu bit pattern in the scalar value
1221                         // so the surrogate pair don't address the BMP."
1222                         int wwww = ((highSurrogate & 0x03C0) >> 6);
1223                         int uuuuu = wwww + 1;
1224 
1225                         // next 4 bits
1226                         int zzzz = (highSurrogate & 0x003C) >> 2;
1227 
1228                         // low 2 bits
1229                         int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1230 
1231                         // Get low surrogate character.
1232                         ch = chars[++i];
1233 
1234                         // Clear high 6 bits.
1235                         int lowSurrogate = ((int) ch) & 0x03FF;
1236 
1237                         // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1238                         yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1239 
1240                         // bottom 6 bits.
1241                         int xxxxxx = (lowSurrogate & 0x003F);
1242 
1243                         int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1244                         int byte2 =
1245                             0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1246                         int byte3 = 0x80 | yyyyyy;
1247                         int byte4 = 0x80 | xxxxxx;
1248 
1249                         writer.write('%');
1250                         writer.write(makeHHString(byte1));
1251                         writer.write('%');
1252                         writer.write(makeHHString(byte2));
1253                         writer.write('%');
1254                         writer.write(makeHHString(byte3));
1255                         writer.write('%');
1256                         writer.write(makeHHString(byte4));
1257                     }
1258                     else
1259                     {
1260                         int high = (ch >> 12) | 0xE0; // top 4 bits
1261                         int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1262                         // middle 6 bits
1263                         int low = (ch & 0x3F) | 0x80;
1264                         // First 6 bits, + high bit
1265                         writer.write('%');
1266                         writer.write(makeHHString(high));
1267                         writer.write('%');
1268                         writer.write(makeHHString(middle));
1269                         writer.write('%');
1270                         writer.write(makeHHString(low));
1271                     }
1272 
1273                 }
1274                 else if (escapingNotNeeded(ch))
1275                 {
1276                     writer.write(ch);
1277                 }
1278                 else
1279                 {
1280                     writer.write("&#");
1281                     writer.write(Integer.toString(ch));
1282                     writer.write(';');
1283                 }
1284                 // In this character range we have first written out any previously accumulated
1285                 // "clean" characters, then processed the current more complicated character,
1286                 // which may have incremented "i".
1287                 // We now we reset the next possible clean character.
1288                 cleanStart = i + 1;
1289             }
1290             // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1291             // not allowing quotes in the URI proper syntax, nor in the fragment
1292             // identifier, we believe that it's OK to double escape quotes.
1293             else if (ch == '"')
1294             {
1295                 // If the character is a '%' number number, try to avoid double-escaping.
1296                 // There is a question if this is legal behavior.
1297 
1298                 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1299                 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1300 
1301                 //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1302 
1303                 // We are no longer escaping '%'
1304 
1305                 if (cleanLength > 0)
1306                 {
1307                     writer.write(chars, cleanStart, cleanLength);
1308                     cleanLength = 0;
1309                 }
1310 
1311 
1312                 // Mike Kay encodes this as ", so he may know something I don't?
1313                 if (doURLEscaping)
1314                     writer.write("%22");
1315                 else
1316                     writer.write("&quot;"); // we have to escape this, I guess.
1317 
1318                 // We have written out any clean characters, then the escaped '%' and now we
1319                 // We now we reset the next possible clean character.
1320                 cleanStart = i + 1;
1321             }
1322             else if (ch == '&')
1323             {
1324                 // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
1325                 // instead of "&" to avoid confusion with the beginning of a character
1326                 // reference (entity reference open delimiter).
1327                 if (cleanLength > 0)
1328                 {
1329                     writer.write(chars, cleanStart, cleanLength);
1330                     cleanLength = 0;
1331                 }
1332                 writer.write("&amp;");
1333                 cleanStart = i + 1;
1334             }
1335             else
1336             {
1337                 // no processing for this character, just count how
1338                 // many characters in a row that we have that need no processing
1339                 cleanLength++;
1340             }
1341         }
1342 
1343         // are there any clean characters at the end of the array
1344         // that we haven't processed yet?
1345         if (cleanLength > 1)
1346         {
1347             // if the whole string can be written out as-is do so
1348             // otherwise write out the clean chars at the end of the
1349             // array
1350             if (cleanStart == 0)
1351                 writer.write(string);
1352             else
1353                 writer.write(chars, cleanStart, cleanLength);
1354         }
1355         else if (cleanLength == 1)
1356         {
1357             // a little optimization for 1 clean character
1358             // (we could have let the previous if(...) handle them all)
1359             writer.write(ch);
1360         }
1361     }
1362 
1363     /**
1364      * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1365      * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1366      *
1367      * @param   string      String to convert to XML format.
1368      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1369      *
1370      * @throws org.xml.sax.SAXException
1371      */
1372     public void writeAttrString(
1373         final java.io.Writer writer, String string, String encoding)
1374         throws IOException
1375     {
1376         final int end = string.length();
1377         if (end > m_attrBuff.length)
1378         {
1379             m_attrBuff = new char[end * 2 + 1];
1380         }
1381         string.getChars(0, end, m_attrBuff, 0);
1382         final char[] chars = m_attrBuff;
1383 
1384 
1385 
1386         int cleanStart = 0;
1387         int cleanLength = 0;
1388 
1389         char ch = 0;
1390         for (int i = 0; i < end; i++)
1391         {
1392             ch = chars[i];
1393 
1394             // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1395             // System.out.println("ch: "+(int)ch);
1396             // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1397             // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1398             if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
1399             {
1400                 cleanLength++;
1401             }
1402             else if ('<' == ch || '>' == ch)
1403             {
1404                 cleanLength++; // no escaping in this case, as specified in 15.2
1405             }
1406             else if (
1407                 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1408             {
1409                 cleanLength++; // no escaping in this case, as specified in 15.2
1410             }
1411             else
1412             {
1413                 if (cleanLength > 0)
1414                 {
1415                     writer.write(chars,cleanStart,cleanLength);
1416                     cleanLength = 0;
1417                 }
1418                 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1419 
1420                 if (i != pos)
1421                 {
1422                     i = pos - 1;
1423                 }
1424                 else
1425                 {
1426                     if (Encodings.isHighUTF16Surrogate(ch))
1427                     {
1428 
1429                             writeUTF16Surrogate(ch, chars, i, end);
1430                             i++; // two input characters processed
1431                                  // this increments by one and the for()
1432                                  // loop itself increments by another one.
1433                     }
1434 
1435                     // The next is kind of a hack to keep from escaping in the case
1436                     // of Shift_JIS and the like.
1437 
1438                     /*
1439                     else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1440                     && (ch != 160))
1441                     {
1442                     writer.write(ch);  // no escaping in this case
1443                     }
1444                     else
1445                     */
1446                     String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1447                     if (null != outputStringForChar)
1448                     {
1449                         writer.write(outputStringForChar);
1450                     }
1451                     else if (escapingNotNeeded(ch))
1452                     {
1453                         writer.write(ch); // no escaping in this case
1454                     }
1455                     else
1456                     {
1457                         writer.write("&#");
1458                         writer.write(Integer.toString(ch));
1459                         writer.write(';');
1460                     }
1461                 }
1462                 cleanStart = i + 1;
1463             }
1464         } // end of for()
1465 
1466         // are there any clean characters at the end of the array
1467         // that we haven't processed yet?
1468         if (cleanLength > 1)
1469         {
1470             // if the whole string can be written out as-is do so
1471             // otherwise write out the clean chars at the end of the
1472             // array
1473             if (cleanStart == 0)
1474                 writer.write(string);
1475             else
1476                 writer.write(chars, cleanStart, cleanLength);
1477         }
1478         else if (cleanLength == 1)
1479         {
1480             // a little optimization for 1 clean character
1481             // (we could have let the previous if(...) handle them all)
1482             writer.write(ch);
1483         }
1484     }
1485 
1486 
1487 
1488     /**
1489      * Receive notification of character data.
1490      *
1491      * <p>The Parser will call this method to report each chunk of
1492      * character data.  SAX parsers may return all contiguous character
1493      * data in a single chunk, or they may split it into several
1494      * chunks; however, all of the characters in any single event
1495      * must come from the same external entity, so that the Locator
1496      * provides useful information.</p>
1497      *
1498      * <p>The application must not attempt to read from the array
1499      * outside of the specified range.</p>
1500      *
1501      * <p>Note that some parsers will report whitespace using the
1502      * ignorableWhitespace() method rather than this one (validating
1503      * parsers must do so).</p>
1504      *
1505      * @param chars The characters from the XML document.
1506      * @param start The start position in the array.
1507      * @param length The number of characters to read from the array.
1508      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1509      *            wrapping another exception.
1510      * @see #ignorableWhitespace
1511      * @see org.xml.sax.Locator
1512      *
1513      * @throws org.xml.sax.SAXException
1514      */
1515     public final void characters(char chars[], int start, int length)
1516         throws org.xml.sax.SAXException
1517     {
1518 
1519         if (m_elemContext.m_isRaw)
1520         {
1521             try
1522             {
1523                 if (m_elemContext.m_startTagOpen)
1524                 {
1525                     closeStartTag();
1526                     m_elemContext.m_startTagOpen = false;
1527                 }
1528                 m_ispreserve = true;
1529 
1530 //              With m_ispreserve just set true it looks like shouldIndent()
1531 //              will always return false, so drop any possible indentation.
1532 //              if (shouldIndent())
1533 //                  indent();
1534 
1535                 // writer.write("<![CDATA[");
1536                 // writer.write(chars, start, length);
1537                 writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1538                 m_isprevtext = true;
1539                 // writer.write("]]>");
1540 
1541                 // time to generate characters event
1542                 if (m_tracer != null)
1543                     super.fireCharEvent(chars, start, length);
1544 
1545                 return;
1546             }
1547             catch (IOException ioe)
1548             {
1549                 throw new org.xml.sax.SAXException(
1550                     Utils.messages.createMessage(
1551                         MsgKey.ER_OIERROR,
1552                         null),
1553                     ioe);
1554                 //"IO error", ioe);
1555             }
1556         }
1557         else
1558         {
1559             super.characters(chars, start, length);
1560         }
1561     }
1562 
1563     /**
1564      *  Receive notification of cdata.
1565      *
1566      *  <p>The Parser will call this method to report each chunk of
1567      *  character data.  SAX parsers may return all contiguous character
1568      *  data in a single chunk, or they may split it into several
1569      *  chunks; however, all of the characters in any single event
1570      *  must come from the same external entity, so that the Locator
1571      *  provides useful information.</p>
1572      *
1573      *  <p>The application must not attempt to read from the array
1574      *  outside of the specified range.</p>
1575      *
1576      *  <p>Note that some parsers will report whitespace using the
1577      *  ignorableWhitespace() method rather than this one (validating
1578      *  parsers must do so).</p>
1579      *
1580      *  @param ch The characters from the XML document.
1581      *  @param start The start position in the array.
1582      *  @param length The number of characters to read from the array.
1583      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1584      *             wrapping another exception.
1585      *  @see #ignorableWhitespace
1586      *  @see org.xml.sax.Locator
1587      *
1588      * @throws org.xml.sax.SAXException
1589      */
1590     public final void cdata(char ch[], int start, int length)
1591         throws org.xml.sax.SAXException
1592     {
1593         if ((null != m_elemContext.m_elementName)
1594             && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1595                 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1596         {
1597             try
1598             {
1599                 if (m_elemContext.m_startTagOpen)
1600                 {
1601                     closeStartTag();
1602                     m_elemContext.m_startTagOpen = false;
1603                 }
1604 
1605                 m_ispreserve = true;
1606 
1607                 if (shouldIndent())
1608                     indent();
1609 
1610                 // writer.write(ch, start, length);
1611                 writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1612             }
1613             catch (IOException ioe)
1614             {
1615                 throw new org.xml.sax.SAXException(
1616                     Utils.messages.createMessage(
1617                         MsgKey.ER_OIERROR,
1618                         null),
1619                     ioe);
1620                 //"IO error", ioe);
1621             }
1622         }
1623         else
1624         {
1625             super.cdata(ch, start, length);
1626         }
1627     }
1628 
1629     /**
1630      *  Receive notification of a processing instruction.
1631      *
1632      *  @param target The processing instruction target.
1633      *  @param data The processing instruction data, or null if
1634      *         none was supplied.
1635      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1636      *             wrapping another exception.
1637      *
1638      * @throws org.xml.sax.SAXException
1639      */
1640     public void processingInstruction(String target, String data)
1641         throws org.xml.sax.SAXException
1642     {
1643         m_childNodeNum++;
1644         flushCharactersBuffer();
1645         // Process any pending starDocument and startElement first.
1646         flushPending();
1647 
1648         // Use a fairly nasty hack to tell if the next node is supposed to be
1649         // unescaped text.
1650         if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1651         {
1652             startNonEscaping();
1653         }
1654         else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1655         {
1656             endNonEscaping();
1657         }
1658         else
1659         {
1660             try
1661             {
1662             if (m_elemContext.m_startTagOpen)
1663             {
1664                 closeStartTag();
1665                 m_elemContext.m_startTagOpen = false;
1666             }
1667             else if (m_needToCallStartDocument)
1668                 startDocumentInternal();
1669 
1670             if (shouldIndent())
1671                 indent();
1672 
1673             final java.io.Writer writer = m_writer;
1674             //writer.write("<?" + target);
1675             writer.write("<?");
1676             writer.write(target);
1677 
1678             if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1679                 writer.write(' ');
1680 
1681             //writer.write(data + ">"); // different from XML
1682             writer.write(data); // different from XML
1683             writer.write('>'); // different from XML
1684 
1685             // Always output a newline char if not inside of an
1686             // element. The whitespace is not significant in that
1687             // case.
1688             if (m_elemContext.m_currentElemDepth <= 0)
1689                 outputLineSep();
1690 
1691             m_startNewLine = true;
1692             }
1693             catch(IOException e)
1694             {
1695                 throw new SAXException(e);
1696             }
1697         }
1698 
1699         // now generate the PI event
1700         if (m_tracer != null)
1701             super.fireEscapingEvent(target, data);
1702      }
1703 
1704     /**
1705      * Receive notivication of a entityReference.
1706      *
1707      * @param name non-null reference to entity name string.
1708      *
1709      * @throws org.xml.sax.SAXException
1710      */
1711     public final void entityReference(String name)
1712         throws org.xml.sax.SAXException
1713     {
1714         try
1715         {
1716 
1717         final java.io.Writer writer = m_writer;
1718         writer.write('&');
1719         writer.write(name);
1720         writer.write(';');
1721 
1722         } catch(IOException e)
1723         {
1724             throw new SAXException(e);
1725         }
1726     }
1727     /**
1728      * @see ExtendedContentHandler#endElement(String)
1729      */
1730     public final void endElement(String elemName) throws SAXException
1731     {
1732         endElement(null, null, elemName);
1733     }
1734 
1735     /**
1736      * Process the attributes, which means to write out the currently
1737      * collected attributes to the writer. The attributes are not
1738      * cleared by this method
1739      *
1740      * @param writer the writer to write processed attributes to.
1741      * @param nAttrs the number of attributes in m_attributes
1742      * to be processed
1743      *
1744      * @throws org.xml.sax.SAXException
1745      */
1746     public void processAttributes(java.io.Writer writer, int nAttrs)
1747         throws IOException,SAXException
1748     {
1749             /*
1750              * process the collected attributes
1751              */
1752             for (int i = 0; i < nAttrs; i++)
1753             {
1754                 processAttribute(
1755                     writer,
1756                     m_attributes.getQName(i),
1757                     m_attributes.getValue(i),
1758                     m_elemContext.m_elementDesc);
1759             }
1760     }
1761 
1762     /**
1763      * For the enclosing elements starting tag write out out any attributes
1764      * followed by ">"
1765      *
1766      *@throws org.xml.sax.SAXException
1767      */
1768     protected void closeStartTag() throws SAXException
1769     {
1770             try
1771             {
1772 
1773             // finish processing attributes, time to fire off the start element event
1774             if (m_tracer != null)
1775                 super.fireStartElem(m_elemContext.m_elementName);
1776 
1777             int nAttrs = m_attributes.getLength();
1778             if (nAttrs>0)
1779             {
1780                 processAttributes(m_writer, nAttrs);
1781                 // clear attributes object for re-use with next element
1782                 m_attributes.clear();
1783             }
1784 
1785             m_writer.write('>');
1786 
1787             /* whether Xalan or XSLTC, we have the prefix mappings now, so
1788              * lets determine if the current element is specified in the cdata-
1789              * section-elements list.
1790              */
1791             if (m_StringOfCDATASections != null)
1792                 m_elemContext.m_isCdataSection = isCdataSection();
1793             if (m_doIndent)
1794             {
1795                 m_isprevtext = false;
1796                 m_preserves.push(m_ispreserve);
1797             }
1798 
1799             }
1800             catch(IOException e)
1801             {
1802                 throw new SAXException(e);
1803             }
1804     }
1805 
1806         /**
1807          * This method is used when a prefix/uri namespace mapping
1808          * is indicated after the element was started with a
1809          * startElement() and before and endElement().
1810          * startPrefixMapping(prefix,uri) would be used before the
1811          * startElement() call.
1812          * @param uri the URI of the namespace
1813          * @param prefix the prefix associated with the given URI.
1814          *
1815          * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1816          */
1817         public void namespaceAfterStartElement(String prefix, String uri)
1818             throws SAXException
1819         {
1820             // hack for XSLTC with finding URI for default namespace
1821             if (m_elemContext.m_elementURI == null)
1822             {
1823                 String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1824                 if (prefix1 == null && EMPTYSTRING.equals(prefix))
1825                 {
1826                     // the elements URI is not known yet, and it
1827                     // doesn't have a prefix, and we are currently
1828                     // setting the uri for prefix "", so we have
1829                     // the uri for the element... lets remember it
1830                     m_elemContext.m_elementURI = uri;
1831                 }
1832             }
1833             startPrefixMapping(prefix,uri,false);
1834         }
1835 
1836     public void startDTD(String name, String publicId, String systemId)
1837         throws SAXException
1838     {
1839         m_inDTD = true;
1840         super.startDTD(name, publicId, systemId);
1841     }
1842 
1843     /**
1844      * Report the end of DTD declarations.
1845      * @throws org.xml.sax.SAXException The application may raise an exception.
1846      * @see #startDTD
1847      */
1848     public void endDTD() throws org.xml.sax.SAXException
1849     {
1850         m_inDTD = false;
1851         /* for ToHTMLStream the DOCTYPE is entirely output in the
1852          * startDocumentInternal() method, so don't do anything here
1853          */
1854     }
1855     /**
1856      * This method does nothing.
1857      */
1858     public void attributeDecl(
1859         String eName,
1860         String aName,
1861         String type,
1862         String valueDefault,
1863         String value)
1864         throws SAXException
1865     {
1866         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1867     }
1868 
1869     /**
1870      * This method does nothing.
1871      */
1872     public void elementDecl(String name, String model) throws SAXException
1873     {
1874         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1875     }
1876     /**
1877      * This method does nothing.
1878      */
1879     public void internalEntityDecl(String name, String value)
1880         throws SAXException
1881     {
1882         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1883     }
1884     /**
1885      * This method does nothing.
1886      */
1887     public void externalEntityDecl(
1888         String name,
1889         String publicId,
1890         String systemId)
1891         throws SAXException
1892     {
1893         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1894     }
1895 
1896     /**
1897      * This method is used to add an attribute to the currently open element.
1898      * The caller has guaranted that this attribute is unique, which means that it
1899      * not been seen before and will not be seen again.
1900      *
1901      * @param name the qualified name of the attribute
1902      * @param value the value of the attribute which can contain only
1903      * ASCII printable characters characters in the range 32 to 127 inclusive.
1904      * @param flags the bit values of this integer give optimization information.
1905      */
1906     public void addUniqueAttribute(String name, String value, int flags)
1907         throws SAXException
1908     {
1909         try
1910         {
1911             final java.io.Writer writer = m_writer;
1912             if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1913             {
1914                 // "flags" has indicated that the characters
1915                 // '>'  '<'   '&'  and '"' are not in the value and
1916                 // m_htmlcharInfo has recorded that there are no other
1917                 // entities in the range 0 to 127 so we write out the
1918                 // value directly
1919                 writer.write(' ');
1920                 writer.write(name);
1921                 writer.write("=\"");
1922                 writer.write(value);
1923                 writer.write('"');
1924             }
1925             else if (
1926                 (flags & HTML_ATTREMPTY) > 0
1927                     && (value.length() == 0 || value.equalsIgnoreCase(name)))
1928             {
1929                 writer.write(' ');
1930                 writer.write(name);
1931             }
1932             else
1933             {
1934                 writer.write(' ');
1935                 writer.write(name);
1936                 writer.write("=\"");
1937                 if ((flags & HTML_ATTRURL) > 0)
1938                 {
1939                     writeAttrURI(writer, value, m_specialEscapeURLs);
1940                 }
1941                 else
1942                 {
1943                     writeAttrString(writer, value, this.getEncoding());
1944                 }
1945                 writer.write('"');
1946             }
1947         } catch (IOException e) {
1948             throw new SAXException(e);
1949         }
1950     }
1951 
1952     public void comment(char ch[], int start, int length)
1953             throws SAXException
1954     {
1955         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1956         if (m_inDTD)
1957             return;
1958         super.comment(ch, start, length);
1959     }
1960 
1961     public boolean reset()
1962     {
1963         boolean ret = super.reset();
1964         if (!ret)
1965             return false;
1966         initToHTMLStream();
1967         return true;
1968     }
1969 
1970     private void initToHTMLStream()
1971     {
1972         m_isprevblock = false;
1973         m_inDTD = false;
1974         m_omitMetaTag = false;
1975         m_specialEscapeURLs = true;
1976     }
1977 
1978     static class Trie
1979     {
1980         /**
1981          * A digital search trie for 7-bit ASCII text
1982          * The API is a subset of java.util.Hashtable
1983          * The key must be a 7-bit ASCII string
1984          * The value may be any Java Object
1985          * One can get an object stored in a trie from its key,
1986          * but the search is either case sensitive or case
1987          * insensitive to the characters in the key, and this
1988          * choice of sensitivity or insensitivity is made when
1989          * the Trie is created, before any objects are put in it.
1990          *
1991          * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
1992          * It exists to cut the serializers dependancy on that package.
1993          *
1994          * @xsl.usage internal
1995          */
1996 
1997         /** Size of the m_nextChar array.  */
1998         public static final int ALPHA_SIZE = 128;
1999 
2000         /** The root node of the tree.    */
2001         final Node m_Root;
2002 
2003         /** helper buffer to convert Strings to char arrays */
2004         private char[] m_charBuffer = new char[0];
2005 
2006         /** true if the search for an object is lower case only with the key */
2007         private final boolean m_lowerCaseOnly;
2008 
2009         /**
2010          * Construct the trie that has a case insensitive search.
2011          */
2012         public Trie()
2013         {
2014             m_Root = new Node();
2015             m_lowerCaseOnly = false;
2016         }
2017 
2018         /**
2019          * Construct the trie given the desired case sensitivity with the key.
2020          * @param lowerCaseOnly true if the search keys are to be loser case only,
2021          * not case insensitive.
2022          */
2023         public Trie(boolean lowerCaseOnly)
2024         {
2025             m_Root = new Node();
2026             m_lowerCaseOnly = lowerCaseOnly;
2027         }
2028 
2029         /**
2030          * Put an object into the trie for lookup.
2031          *
2032          * @param key must be a 7-bit ASCII string
2033          * @param value any java object.
2034          *
2035          * @return The old object that matched key, or null.
2036          */
2037         public Object put(String key, Object value)
2038         {
2039 
2040             final int len = key.length();
2041             if (len > m_charBuffer.length)
2042             {
2043                 // make the biggest buffer ever needed in get(String)
2044                 m_charBuffer = new char[len];
2045             }
2046 
2047             Node node = m_Root;
2048 
2049             for (int i = 0; i < len; i++)
2050             {
2051                 Node nextNode =
2052                     node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2053 
2054                 if (nextNode != null)
2055                 {
2056                     node = nextNode;
2057                 }
2058                 else
2059                 {
2060                     for (; i < len; i++)
2061                     {
2062                         Node newNode = new Node();
2063                         if (m_lowerCaseOnly)
2064                         {
2065                             // put this value into the tree only with a lower case key
2066                             node.m_nextChar[Character.toLowerCase(
2067                                 key.charAt(i))] =
2068                                 newNode;
2069                         }
2070                         else
2071                         {
2072                             // put this value into the tree with a case insensitive key
2073                             node.m_nextChar[Character.toUpperCase(
2074                                 key.charAt(i))] =
2075                                 newNode;
2076                             node.m_nextChar[Character.toLowerCase(
2077                                 key.charAt(i))] =
2078                                 newNode;
2079                         }
2080                         node = newNode;
2081                     }
2082                     break;
2083                 }
2084             }
2085 
2086             Object ret = node.m_Value;
2087 
2088             node.m_Value = value;
2089 
2090             return ret;
2091         }
2092 
2093         /**
2094          * Get an object that matches the key.
2095          *
2096          * @param key must be a 7-bit ASCII string
2097          *
2098          * @return The object that matches the key, or null.
2099          */
2100         public Object get(final String key)
2101         {
2102 
2103             final int len = key.length();
2104 
2105             /* If the name is too long, we won't find it, this also keeps us
2106              * from overflowing m_charBuffer
2107              */
2108             if (m_charBuffer.length < len)
2109                 return null;
2110 
2111             Node node = m_Root;
2112             switch (len) // optimize the look up based on the number of chars
2113             {
2114                 // case 0 looks silly, but the generated bytecode runs
2115                 // faster for lookup of elements of length 2 with this in
2116                 // and a fair bit faster.  Don't know why.
2117                 case 0 :
2118                     {
2119                         return null;
2120                     }
2121 
2122                 case 1 :
2123                     {
2124                         final char ch = key.charAt(0);
2125                         if (ch < ALPHA_SIZE)
2126                         {
2127                             node = node.m_nextChar[ch];
2128                             if (node != null)
2129                                 return node.m_Value;
2130                         }
2131                         return null;
2132                     }
2133                     //                comment out case 2 because the default is faster
2134                     //                case 2 :
2135                     //                    {
2136                     //                        final char ch0 = key.charAt(0);
2137                     //                        final char ch1 = key.charAt(1);
2138                     //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2139                     //                        {
2140                     //                            node = node.m_nextChar[ch0];
2141                     //                            if (node != null)
2142                     //                            {
2143                     //
2144                     //                                if (ch1 < ALPHA_SIZE)
2145                     //                                {
2146                     //                                    node = node.m_nextChar[ch1];
2147                     //                                    if (node != null)
2148                     //                                        return node.m_Value;
2149                     //                                }
2150                     //                            }
2151                     //                        }
2152                     //                        return null;
2153                     //                   }
2154                 default :
2155                     {
2156                         for (int i = 0; i < len; i++)
2157                         {
2158                             // A thread-safe way to loop over the characters
2159                             final char ch = key.charAt(i);
2160                             if (ALPHA_SIZE <= ch)
2161                             {
2162                                 // the key is not 7-bit ASCII so we won't find it here
2163                                 return null;
2164                             }
2165 
2166                             node = node.m_nextChar[ch];
2167                             if (node == null)
2168                                 return null;
2169                         }
2170 
2171                         return node.m_Value;
2172                     }
2173             }
2174         }
2175 
2176         /**
2177          * The node representation for the trie.
2178          * @xsl.usage internal
2179          */
2180         private class Node
2181         {
2182 
2183             /**
2184              * Constructor, creates a Node[ALPHA_SIZE].
2185              */
2186             Node()
2187             {
2188                 m_nextChar = new Node[ALPHA_SIZE];
2189                 m_Value = null;
2190             }
2191 
2192             /** The next nodes.   */
2193             final Node m_nextChar[];
2194 
2195             /** The value.   */
2196             Object m_Value;
2197         }
2198         /**
2199          * Construct the trie from another Trie.
2200          * Both the existing Trie and this new one share the same table for
2201          * lookup, and it is assumed that the table is fully populated and
2202          * not changing anymore.
2203          *
2204          * @param existingTrie the Trie that this one is a copy of.
2205          */
2206         public Trie(Trie existingTrie)
2207         {
2208             // copy some fields from the existing Trie into this one.
2209             m_Root = existingTrie.m_Root;
2210             m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2211 
2212             // get a buffer just big enough to hold the longest key in the table.
2213             int max = existingTrie.getLongestKeyLength();
2214             m_charBuffer = new char[max];
2215         }
2216 
2217         /**
2218          * Get an object that matches the key.
2219          * This method is faster than get(), but is not thread-safe.
2220          *
2221          * @param key must be a 7-bit ASCII string
2222          *
2223          * @return The object that matches the key, or null.
2224          */
2225         public Object get2(final String key)
2226         {
2227 
2228             final int len = key.length();
2229 
2230             /* If the name is too long, we won't find it, this also keeps us
2231              * from overflowing m_charBuffer
2232              */
2233             if (m_charBuffer.length < len)
2234                 return null;
2235 
2236             Node node = m_Root;
2237             switch (len) // optimize the look up based on the number of chars
2238             {
2239                 // case 0 looks silly, but the generated bytecode runs
2240                 // faster for lookup of elements of length 2 with this in
2241                 // and a fair bit faster.  Don't know why.
2242                 case 0 :
2243                     {
2244                         return null;
2245                     }
2246 
2247                 case 1 :
2248                     {
2249                         final char ch = key.charAt(0);
2250                         if (ch < ALPHA_SIZE)
2251                         {
2252                             node = node.m_nextChar[ch];
2253                             if (node != null)
2254                                 return node.m_Value;
2255                         }
2256                         return null;
2257                     }
2258                 default :
2259                     {
2260                         /* Copy string into array. This is not thread-safe because
2261                          * it modifies the contents of m_charBuffer. If multiple
2262                          * threads were to use this Trie they all would be
2263                          * using this same array (not good). So this
2264                          * method is not thread-safe, but it is faster because
2265                          * converting to a char[] and looping over elements of
2266                          * the array is faster than a String's charAt(i).
2267                          */
2268                         key.getChars(0, len, m_charBuffer, 0);
2269 
2270                         for (int i = 0; i < len; i++)
2271                         {
2272                             final char ch = m_charBuffer[i];
2273                             if (ALPHA_SIZE <= ch)
2274                             {
2275                                 // the key is not 7-bit ASCII so we won't find it here
2276                                 return null;
2277                             }
2278 
2279                             node = node.m_nextChar[ch];
2280                             if (node == null)
2281                                 return null;
2282                         }
2283 
2284                         return node.m_Value;
2285                     }
2286             }
2287         }
2288 
2289         /**
2290          * Get the length of the longest key used in the table.
2291          */
2292         public int getLongestKeyLength()
2293         {
2294             return m_charBuffer.length;
2295         }
2296     }
2297 }