1 /* 2 * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Copyright 2001-2004 The Apache Software Foundation. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 /* 20 * $Id: ToHTMLStream.java,v 1.2.4.1 2005/09/15 08:15:26 suresh_emailid Exp $ 21 */ 22 package com.sun.org.apache.xml.internal.serializer; 23 24 import java.io.IOException; 25 import java.io.OutputStream; 26 import java.io.UnsupportedEncodingException; 27 import java.util.Properties; 28 29 import javax.xml.transform.Result; 30 31 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 32 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 33 import org.xml.sax.Attributes; 34 import org.xml.sax.SAXException; 35 36 /** 37 * This serializer takes a series of SAX or 38 * SAX-like events and writes its output 39 * to the given stream. 40 * 41 * This class is not a public API, it is public 42 * because it is used from another package. 43 * 44 * @xsl.usage internal 45 * @LastModified: Sept 2018 46 */ 47 public final class ToHTMLStream extends ToStream 48 { 49 50 /** This flag is set while receiving events from the DTD */ 51 protected boolean m_inDTD = false; 52 53 /** True if the current element is a block element. (seems like 54 * this needs to be a stack. -sb). */ 55 private boolean m_inBlockElem = false; 56 57 /** 58 * Map that tells which XML characters should have special treatment, and it 59 * provides character to entity name lookup. 60 */ 61 private static final CharInfo m_htmlcharInfo = 62 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); 63 CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); 64 65 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ 66 static final Trie m_elementFlags = new Trie(); 67 68 static { 69 initTagReference(m_elementFlags); 70 } 71 static void initTagReference(Trie m_elementFlags) { 72 73 // HTML 4.0 loose DTD 74 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); 75 m_elementFlags.put( 76 "FRAME", 77 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 78 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); 79 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); 80 m_elementFlags.put( 81 "ISINDEX", 82 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 83 m_elementFlags.put( 84 "APPLET", 85 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); 86 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); 87 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); 88 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); 89 90 // HTML 4.0 strict DTD 91 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 92 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 93 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 94 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 95 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 96 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); 97 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); 98 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); 99 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); 100 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); 101 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); 102 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); 103 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); 104 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); 105 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); 106 m_elementFlags.put( 107 "SUP", 108 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 109 m_elementFlags.put( 110 "SUB", 111 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 112 m_elementFlags.put( 113 "SPAN", 114 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 115 m_elementFlags.put( 116 "BDO", 117 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 118 m_elementFlags.put( 119 "BR", 120 new ElemDesc( 121 0 122 | ElemDesc.SPECIAL 123 | ElemDesc.ASPECIAL 124 | ElemDesc.EMPTY 125 | ElemDesc.BLOCK)); 126 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); 127 m_elementFlags.put( 128 "ADDRESS", 129 new ElemDesc( 130 0 131 | ElemDesc.BLOCK 132 | ElemDesc.BLOCKFORM 133 | ElemDesc.BLOCKFORMFIELDSET)); 134 m_elementFlags.put( 135 "DIV", 136 new ElemDesc( 137 0 138 | ElemDesc.BLOCK 139 | ElemDesc.BLOCKFORM 140 | ElemDesc.BLOCKFORMFIELDSET)); 141 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); 142 m_elementFlags.put( 143 "MAP", 144 new ElemDesc( 145 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); 146 m_elementFlags.put( 147 "AREA", 148 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 149 m_elementFlags.put( 150 "LINK", 151 new ElemDesc( 152 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 153 m_elementFlags.put( 154 "IMG", 155 new ElemDesc( 156 0 157 | ElemDesc.SPECIAL 158 | ElemDesc.ASPECIAL 159 | ElemDesc.EMPTY 160 | ElemDesc.WHITESPACESENSITIVE)); 161 m_elementFlags.put( 162 "OBJECT", 163 new ElemDesc( 164 0 165 | ElemDesc.SPECIAL 166 | ElemDesc.ASPECIAL 167 | ElemDesc.HEADMISC 168 | ElemDesc.WHITESPACESENSITIVE)); 169 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); 170 m_elementFlags.put( 171 "HR", 172 new ElemDesc( 173 0 174 | ElemDesc.BLOCK 175 | ElemDesc.BLOCKFORM 176 | ElemDesc.BLOCKFORMFIELDSET 177 | ElemDesc.EMPTY)); 178 m_elementFlags.put( 179 "P", 180 new ElemDesc( 181 0 182 | ElemDesc.BLOCK 183 | ElemDesc.BLOCKFORM 184 | ElemDesc.BLOCKFORMFIELDSET)); 185 m_elementFlags.put( 186 "H1", 187 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 188 m_elementFlags.put( 189 "H2", 190 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 191 m_elementFlags.put( 192 "H3", 193 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 194 m_elementFlags.put( 195 "H4", 196 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 197 m_elementFlags.put( 198 "H5", 199 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 200 m_elementFlags.put( 201 "H6", 202 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 203 m_elementFlags.put( 204 "PRE", 205 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); 206 m_elementFlags.put( 207 "Q", 208 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 209 m_elementFlags.put( 210 "BLOCKQUOTE", 211 new ElemDesc( 212 0 213 | ElemDesc.BLOCK 214 | ElemDesc.BLOCKFORM 215 | ElemDesc.BLOCKFORMFIELDSET)); 216 m_elementFlags.put("INS", new ElemDesc(0)); 217 m_elementFlags.put("DEL", new ElemDesc(0)); 218 m_elementFlags.put( 219 "DL", 220 new ElemDesc( 221 0 222 | ElemDesc.BLOCK 223 | ElemDesc.BLOCKFORM 224 | ElemDesc.BLOCKFORMFIELDSET)); 225 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); 226 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); 227 m_elementFlags.put( 228 "OL", 229 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 230 m_elementFlags.put( 231 "UL", 232 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 233 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); 234 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); 235 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); 236 m_elementFlags.put( 237 "INPUT", 238 new ElemDesc( 239 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); 240 m_elementFlags.put( 241 "SELECT", 242 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 243 m_elementFlags.put("OPTGROUP", new ElemDesc(0)); 244 m_elementFlags.put("OPTION", new ElemDesc(0)); 245 m_elementFlags.put( 246 "TEXTAREA", 247 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 248 m_elementFlags.put( 249 "FIELDSET", 250 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); 251 m_elementFlags.put("LEGEND", new ElemDesc(0)); 252 m_elementFlags.put( 253 "BUTTON", 254 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 255 m_elementFlags.put( 256 "TABLE", 257 new ElemDesc( 258 0 259 | ElemDesc.BLOCK 260 | ElemDesc.BLOCKFORM 261 | ElemDesc.BLOCKFORMFIELDSET)); 262 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); 263 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); 264 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); 265 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); 266 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); 267 m_elementFlags.put( 268 "COL", 269 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 270 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); 271 m_elementFlags.put("TH", new ElemDesc(0)); 272 m_elementFlags.put("TD", new ElemDesc(0)); 273 m_elementFlags.put( 274 "HEAD", 275 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); 276 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); 277 m_elementFlags.put( 278 "BASE", 279 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 280 m_elementFlags.put( 281 "META", 282 new ElemDesc( 283 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 284 m_elementFlags.put( 285 "STYLE", 286 new ElemDesc( 287 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); 288 m_elementFlags.put( 289 "SCRIPT", 290 new ElemDesc( 291 0 292 | ElemDesc.SPECIAL 293 | ElemDesc.ASPECIAL 294 | ElemDesc.HEADMISC 295 | ElemDesc.RAW)); 296 m_elementFlags.put( 297 "NOSCRIPT", 298 new ElemDesc( 299 0 300 | ElemDesc.BLOCK 301 | ElemDesc.BLOCKFORM 302 | ElemDesc.BLOCKFORMFIELDSET)); 303 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); 304 305 // From "John Ky" <hand@syd.speednet.com.au 306 // Transitional Document Type Definition () 307 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont 308 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 309 310 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE 311 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 312 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 313 314 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U 315 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 316 317 // From "John Ky" <hand@syd.speednet.com.au 318 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 319 320 // HTML 4.0, section 16.5 321 m_elementFlags.put( 322 "IFRAME", 323 new ElemDesc( 324 0 325 | ElemDesc.BLOCK 326 | ElemDesc.BLOCKFORM 327 | ElemDesc.BLOCKFORMFIELDSET)); 328 329 // Netscape 4 extension 330 m_elementFlags.put( 331 "LAYER", 332 new ElemDesc( 333 0 334 | ElemDesc.BLOCK 335 | ElemDesc.BLOCKFORM 336 | ElemDesc.BLOCKFORMFIELDSET)); 337 // Netscape 4 extension 338 m_elementFlags.put( 339 "ILAYER", 340 new ElemDesc( 341 0 342 | ElemDesc.BLOCK 343 | ElemDesc.BLOCKFORM 344 | ElemDesc.BLOCKFORMFIELDSET)); 345 346 347 // NOW FOR ATTRIBUTE INFORMATION . . . 348 ElemDesc elemDesc; 349 350 351 // ---------------------------------------------- 352 elemDesc = (ElemDesc) m_elementFlags.get("A"); 353 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 354 elemDesc.setAttr("NAME", ElemDesc.ATTRURL); 355 356 // ---------------------------------------------- 357 elemDesc = (ElemDesc) m_elementFlags.get("AREA"); 358 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 359 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); 360 361 // ---------------------------------------------- 362 elemDesc = (ElemDesc) m_elementFlags.get("BASE"); 363 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 364 365 // ---------------------------------------------- 366 elemDesc = (ElemDesc) m_elementFlags.get("BUTTON"); 367 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 368 369 // ---------------------------------------------- 370 elemDesc = (ElemDesc) m_elementFlags.get("BLOCKQUOTE"); 371 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 372 373 // ---------------------------------------------- 374 elemDesc = (ElemDesc) m_elementFlags.get("DEL"); 375 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 376 377 // ---------------------------------------------- 378 elemDesc = (ElemDesc) m_elementFlags.get("DIR"); 379 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 380 381 // ---------------------------------------------- 382 383 elemDesc = (ElemDesc) m_elementFlags.get("DIV"); 384 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension 385 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 386 387 // ---------------------------------------------- 388 elemDesc = (ElemDesc) m_elementFlags.get("DL"); 389 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 390 391 // ---------------------------------------------- 392 elemDesc = (ElemDesc) m_elementFlags.get("FORM"); 393 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); 394 395 // ---------------------------------------------- 396 // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> 397 elemDesc = (ElemDesc) m_elementFlags.get("FRAME"); 398 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 399 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 400 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); 401 402 // ---------------------------------------------- 403 elemDesc = (ElemDesc) m_elementFlags.get("HEAD"); 404 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); 405 406 // ---------------------------------------------- 407 elemDesc = (ElemDesc) m_elementFlags.get("HR"); 408 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); 409 410 // ---------------------------------------------- 411 // HTML 4.0, section 16.5 412 elemDesc = (ElemDesc) m_elementFlags.get("IFRAME"); 413 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 414 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 415 416 // ---------------------------------------------- 417 // Netscape 4 extension 418 elemDesc = (ElemDesc) m_elementFlags.get("ILAYER"); 419 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 420 421 // ---------------------------------------------- 422 elemDesc = (ElemDesc) m_elementFlags.get("IMG"); 423 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 424 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 425 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 426 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 427 428 // ---------------------------------------------- 429 elemDesc = (ElemDesc) m_elementFlags.get("INPUT"); 430 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 431 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 432 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); 433 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 434 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 435 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 436 437 // ---------------------------------------------- 438 elemDesc = (ElemDesc) m_elementFlags.get("INS"); 439 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 440 441 // ---------------------------------------------- 442 // Netscape 4 extension 443 elemDesc = (ElemDesc) m_elementFlags.get("LAYER"); 444 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 445 446 // ---------------------------------------------- 447 elemDesc = (ElemDesc) m_elementFlags.get("LINK"); 448 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 449 450 // ---------------------------------------------- 451 elemDesc = (ElemDesc) m_elementFlags.get("MENU"); 452 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 453 454 // ---------------------------------------------- 455 elemDesc = (ElemDesc) m_elementFlags.get("OBJECT"); 456 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); 457 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); 458 elemDesc.setAttr("DATA", ElemDesc.ATTRURL); 459 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); 460 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 461 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); 462 463 // ---------------------------------------------- 464 elemDesc = (ElemDesc) m_elementFlags.get("OL"); 465 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 466 467 // ---------------------------------------------- 468 elemDesc = (ElemDesc) m_elementFlags.get("OPTGROUP"); 469 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 470 471 // ---------------------------------------------- 472 elemDesc = (ElemDesc) m_elementFlags.get("OPTION"); 473 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); 474 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 475 476 // ---------------------------------------------- 477 elemDesc = (ElemDesc) m_elementFlags.get("Q"); 478 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 479 480 // ---------------------------------------------- 481 elemDesc = (ElemDesc) m_elementFlags.get("SCRIPT"); 482 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 483 elemDesc.setAttr("FOR", ElemDesc.ATTRURL); 484 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); 485 486 // ---------------------------------------------- 487 elemDesc = (ElemDesc) m_elementFlags.get("SELECT"); 488 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 489 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); 490 491 // ---------------------------------------------- 492 elemDesc = (ElemDesc) m_elementFlags.get("TABLE"); 493 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 494 495 // ---------------------------------------------- 496 elemDesc = (ElemDesc) m_elementFlags.get("TD"); 497 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 498 499 // ---------------------------------------------- 500 elemDesc = (ElemDesc) m_elementFlags.get("TEXTAREA"); 501 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 502 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 503 504 // ---------------------------------------------- 505 elemDesc = (ElemDesc) m_elementFlags.get("TH"); 506 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 507 508 // ---------------------------------------------- 509 // The nowrap attribute of a tr element is both 510 // a Netscape and Internet-Explorer extension 511 elemDesc = (ElemDesc) m_elementFlags.get("TR"); 512 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 513 514 // ---------------------------------------------- 515 elemDesc = (ElemDesc) m_elementFlags.get("UL"); 516 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 517 } 518 519 /** 520 * Dummy element for elements not found. 521 */ 522 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); 523 524 /** True if URLs should be specially escaped with the %xx form. */ 525 private boolean m_specialEscapeURLs = true; 526 527 /** True if the META tag should be omitted. */ 528 private boolean m_omitMetaTag = false; 529 530 /** 531 * Tells if the formatter should use special URL escaping. 532 * 533 * @param bool True if URLs should be specially escaped with the %xx form. 534 */ 535 public void setSpecialEscapeURLs(boolean bool) 536 { 537 m_specialEscapeURLs = bool; 538 } 539 540 /** 541 * Tells if the formatter should omit the META tag. 542 * 543 * @param bool True if the META tag should be omitted. 544 */ 545 public void setOmitMetaTag(boolean bool) 546 { 547 m_omitMetaTag = bool; 548 } 549 550 /** 551 * Specifies an output format for this serializer. It the 552 * serializer has already been associated with an output format, 553 * it will switch to the new format. This method should not be 554 * called while the serializer is in the process of serializing 555 * a document. 556 * 557 * This method can be called multiple times before starting 558 * the serialization of a particular result-tree. In principle 559 * all serialization parameters can be changed, with the exception 560 * of method="html" (it must be method="html" otherwise we 561 * shouldn't even have a ToHTMLStream object here!) 562 * 563 * @param format The output format or serialzation parameters 564 * to use. 565 */ 566 public void setOutputFormat(Properties format) 567 { 568 569 m_specialEscapeURLs = 570 OutputPropertyUtils.getBooleanProperty( 571 OutputPropertiesFactory.S_USE_URL_ESCAPING, 572 format); 573 574 m_omitMetaTag = 575 OutputPropertyUtils.getBooleanProperty( 576 OutputPropertiesFactory.S_OMIT_META_TAG, 577 format); 578 579 super.setOutputFormat(format); 580 } 581 582 /** 583 * Tells if the formatter should use special URL escaping. 584 * 585 * @return True if URLs should be specially escaped with the %xx form. 586 */ 587 private final boolean getSpecialEscapeURLs() 588 { 589 return m_specialEscapeURLs; 590 } 591 592 /** 593 * Tells if the formatter should omit the META tag. 594 * 595 * @return True if the META tag should be omitted. 596 */ 597 private final boolean getOmitMetaTag() 598 { 599 return m_omitMetaTag; 600 } 601 602 /** 603 * Get a description of the given element. 604 * 605 * @param name non-null name of element, case insensitive. 606 * 607 * @return non-null reference to ElemDesc, which may be m_dummy if no 608 * element description matches the given name. 609 */ 610 public static final ElemDesc getElemDesc(String name) 611 { 612 /* this method used to return m_dummy when name was null 613 * but now it doesn't check and and requires non-null name. 614 */ 615 Object obj = m_elementFlags.get(name); 616 if (null != obj) 617 return (ElemDesc)obj; 618 return m_dummy; 619 } 620 621 /** 622 * A Trie that is just a copy of the "static" one. 623 * We need this one to be able to use the faster, but not thread-safe 624 * method Trie.get2(name) 625 */ 626 private Trie m_htmlInfo = new Trie(m_elementFlags); 627 /** 628 * Calls to this method could be replaced with calls to 629 * getElemDesc(name), but this one should be faster. 630 */ 631 private ElemDesc getElemDesc2(String name) 632 { 633 Object obj = m_htmlInfo.get2(name); 634 if (null != obj) 635 return (ElemDesc)obj; 636 return m_dummy; 637 } 638 639 /** 640 * Default constructor. 641 */ 642 public ToHTMLStream() 643 { 644 645 super(); 646 m_charInfo = m_htmlcharInfo; 647 // initialize namespaces 648 m_prefixMap = new NamespaceMappings(); 649 650 } 651 652 /** The name of the current element. */ 653 // private String m_currentElementName = null; 654 655 /** 656 * Receive notification of the beginning of a document. 657 * 658 * @throws org.xml.sax.SAXException Any SAX exception, possibly 659 * wrapping another exception. 660 * 661 * @throws org.xml.sax.SAXException 662 */ 663 protected void startDocumentInternal() throws org.xml.sax.SAXException 664 { 665 super.startDocumentInternal(); 666 667 m_needToCallStartDocument = false; 668 m_needToOutputDocTypeDecl = true; 669 m_startNewLine = false; 670 setOmitXMLDeclaration(true); 671 672 if (true == m_needToOutputDocTypeDecl) 673 { 674 String doctypeSystem = getDoctypeSystem(); 675 String doctypePublic = getDoctypePublic(); 676 if ((null != doctypeSystem) || (null != doctypePublic)) 677 { 678 final java.io.Writer writer = m_writer; 679 try 680 { 681 writer.write("<!DOCTYPE html"); 682 683 if (null != doctypePublic) 684 { 685 writer.write(" PUBLIC \""); 686 writer.write(doctypePublic); 687 writer.write('"'); 688 } 689 690 if (null != doctypeSystem) 691 { 692 if (null == doctypePublic) 693 writer.write(" SYSTEM \""); 694 else 695 writer.write(" \""); 696 697 writer.write(doctypeSystem); 698 writer.write('"'); 699 } 700 701 writer.write('>'); 702 outputLineSep(); 703 } 704 catch(IOException e) 705 { 706 throw new SAXException(e); 707 } 708 } 709 } 710 711 m_needToOutputDocTypeDecl = false; 712 } 713 714 /** 715 * Receive notification of the end of a document. 716 * 717 * @throws org.xml.sax.SAXException Any SAX exception, possibly 718 * wrapping another exception. 719 * 720 * @throws org.xml.sax.SAXException 721 */ 722 public final void endDocument() throws org.xml.sax.SAXException 723 { 724 725 flushPending(); 726 if (m_doIndent && !m_isprevtext) 727 { 728 try 729 { 730 outputLineSep(); 731 } 732 catch(IOException e) 733 { 734 throw new SAXException(e); 735 } 736 } 737 738 flushWriter(); 739 if (m_tracer != null) 740 super.fireEndDoc(); 741 } 742 743 /** 744 * Receive notification of the beginning of an element. 745 * 746 * 747 * @param namespaceURI 748 * @param localName 749 * @param name The element type name. 750 * @param atts The attributes attached to the element, if any. 751 * @throws org.xml.sax.SAXException Any SAX exception, possibly 752 * wrapping another exception. 753 * @see #endElement 754 * @see org.xml.sax.AttributeList 755 */ 756 public void startElement( 757 String namespaceURI, 758 String localName, 759 String name, 760 Attributes atts) 761 throws org.xml.sax.SAXException 762 { 763 764 ElemContext elemContext = m_elemContext; 765 766 // clean up any pending things first 767 if (elemContext.m_startTagOpen) 768 { 769 closeStartTag(); 770 elemContext.m_startTagOpen = false; 771 } 772 else if (m_cdataTagOpen) 773 { 774 closeCDATA(); 775 m_cdataTagOpen = false; 776 } 777 else if (m_needToCallStartDocument) 778 { 779 startDocumentInternal(); 780 m_needToCallStartDocument = false; 781 } 782 783 784 // if this element has a namespace then treat it like XML 785 if (null != namespaceURI && namespaceURI.length() > 0) 786 { 787 super.startElement(namespaceURI, localName, name, atts); 788 789 return; 790 } 791 792 try 793 { 794 // getElemDesc2(name) is faster than getElemDesc(name) 795 ElemDesc elemDesc = getElemDesc2(name); 796 int elemFlags = elemDesc.getFlags(); 797 798 // deal with indentation issues first 799 if (m_doIndent) 800 { 801 802 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 803 if (m_ispreserve) 804 m_ispreserve = false; 805 else if ( 806 (null != elemContext.m_elementName) 807 && (!m_inBlockElem 808 || isBlockElement) /* && !isWhiteSpaceSensitive */ 809 ) 810 { 811 m_startNewLine = true; 812 813 indent(); 814 815 } 816 m_inBlockElem = !isBlockElement; 817 } 818 819 // save any attributes for later processing 820 if (atts != null) 821 addAttributes(atts); 822 823 m_isprevtext = false; 824 final java.io.Writer writer = m_writer; 825 writer.write('<'); 826 writer.write(name); 827 828 829 830 if (m_tracer != null) 831 firePseudoAttributes(); 832 833 if ((elemFlags & ElemDesc.EMPTY) != 0) 834 { 835 // an optimization for elements which are expected 836 // to be empty. 837 m_elemContext = elemContext.push(); 838 /* XSLTC sometimes calls namespaceAfterStartElement() 839 * so we need to remember the name 840 */ 841 m_elemContext.m_elementName = name; 842 m_elemContext.m_elementDesc = elemDesc; 843 return; 844 } 845 else 846 { 847 elemContext = elemContext.push(namespaceURI,localName,name); 848 m_elemContext = elemContext; 849 elemContext.m_elementDesc = elemDesc; 850 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; 851 } 852 853 854 if ((elemFlags & ElemDesc.HEADELEM) != 0) 855 { 856 // This is the <HEAD> element, do some special processing 857 closeStartTag(); 858 elemContext.m_startTagOpen = false; 859 if (!m_omitMetaTag) 860 { 861 if (m_doIndent) 862 indent(); 863 writer.write( 864 "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); 865 String encoding = getEncoding(); 866 String encode = Encodings.getMimeEncoding(encoding); 867 writer.write(encode); 868 writer.write("\">"); 869 } 870 } 871 } 872 catch (IOException e) 873 { 874 throw new SAXException(e); 875 } 876 } 877 878 /** 879 * Receive notification of the end of an element. 880 * 881 * 882 * @param namespaceURI 883 * @param localName 884 * @param name The element type name 885 * @throws org.xml.sax.SAXException Any SAX exception, possibly 886 * wrapping another exception. 887 */ 888 public final void endElement( 889 final String namespaceURI, 890 final String localName, 891 final String name) 892 throws org.xml.sax.SAXException 893 { 894 // deal with any pending issues 895 if (m_cdataTagOpen) 896 closeCDATA(); 897 898 // if the element has a namespace, treat it like XML, not HTML 899 if (null != namespaceURI && namespaceURI.length() > 0) 900 { 901 super.endElement(namespaceURI, localName, name); 902 903 return; 904 } 905 906 try 907 { 908 909 ElemContext elemContext = m_elemContext; 910 final ElemDesc elemDesc = elemContext.m_elementDesc; 911 final int elemFlags = elemDesc.getFlags(); 912 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; 913 914 // deal with any indentation issues 915 if (m_doIndent) 916 { 917 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; 918 boolean shouldIndent = false; 919 920 if (m_ispreserve) 921 { 922 m_ispreserve = false; 923 } 924 else if (m_doIndent && (!m_inBlockElem || isBlockElement)) 925 { 926 m_startNewLine = true; 927 shouldIndent = true; 928 } 929 if (!elemContext.m_startTagOpen && shouldIndent) 930 indent(elemContext.m_currentElemDepth - 1); 931 m_inBlockElem = !isBlockElement; 932 } 933 934 final java.io.Writer writer = m_writer; 935 if (!elemContext.m_startTagOpen) 936 { 937 writer.write("</"); 938 writer.write(name); 939 writer.write('>'); 940 } 941 else 942 { 943 // the start-tag open when this method was called, 944 // so we need to process it now. 945 946 if (m_tracer != null) 947 super.fireStartElem(name); 948 949 // the starting tag was still open when we received this endElement() call 950 // so we need to process any gathered attributes NOW, before they go away. 951 int nAttrs = m_attributes.getLength(); 952 if (nAttrs > 0) 953 { 954 processAttributes(m_writer, nAttrs); 955 // clear attributes object for re-use with next element 956 m_attributes.clear(); 957 } 958 if (!elemEmpty) 959 { 960 // As per Dave/Paul recommendation 12/06/2000 961 // if (shouldIndent) 962 // writer.write('>'); 963 // indent(m_currentIndent); 964 965 writer.write("></"); 966 writer.write(name); 967 writer.write('>'); 968 } 969 else 970 { 971 writer.write('>'); 972 } 973 } 974 975 // clean up because the element has ended 976 if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0) 977 m_ispreserve = true; 978 m_isprevtext = false; 979 980 // fire off the end element event 981 if (m_tracer != null) 982 super.fireEndElem(name); 983 984 // OPTIMIZE-EMPTY 985 if (elemEmpty) 986 { 987 // a quick exit if the HTML element had no children. 988 // This block of code can be removed if the corresponding block of code 989 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed 990 m_elemContext = elemContext.m_prev; 991 return; 992 } 993 994 // some more clean because the element has ended. 995 if (!elemContext.m_startTagOpen) 996 { 997 if (m_doIndent && !m_preserves.isEmpty()) 998 m_preserves.pop(); 999 } 1000 m_elemContext = elemContext.m_prev; 1001 // m_isRawStack.pop(); 1002 } 1003 catch (IOException e) 1004 { 1005 throw new SAXException(e); 1006 } 1007 } 1008 1009 /** 1010 * Process an attribute. 1011 * @param writer The writer to write the processed output to. 1012 * @param name The name of the attribute. 1013 * @param value The value of the attribute. 1014 * @param elemDesc The description of the HTML element 1015 * that has this attribute. 1016 * 1017 * @throws org.xml.sax.SAXException 1018 */ 1019 protected void processAttribute( 1020 java.io.Writer writer, 1021 String name, 1022 String value, 1023 ElemDesc elemDesc) 1024 throws IOException, SAXException 1025 { 1026 writer.write(' '); 1027 1028 if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) 1029 && elemDesc != null 1030 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) 1031 { 1032 writer.write(name); 1033 } 1034 else 1035 { 1036 // %REVIEW% %OPT% 1037 // Two calls to single-char write may NOT 1038 // be more efficient than one to string-write... 1039 writer.write(name); 1040 writer.write("=\""); 1041 if ( elemDesc != null 1042 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) 1043 writeAttrURI(writer, value, m_specialEscapeURLs); 1044 else 1045 writeAttrString(writer, value, this.getEncoding()); 1046 writer.write('"'); 1047 1048 } 1049 } 1050 1051 /** 1052 * Tell if a character is an ASCII digit. 1053 */ 1054 private boolean isASCIIDigit(char c) 1055 { 1056 return (c >= '0' && c <= '9'); 1057 } 1058 1059 /** 1060 * Make an integer into an HH hex value. 1061 * Does no checking on the size of the input, since this 1062 * is only meant to be used locally by writeAttrURI. 1063 * 1064 * @param i must be a value less than 255. 1065 * 1066 * @return should be a two character string. 1067 */ 1068 private static String makeHHString(int i) 1069 { 1070 String s = Integer.toHexString(i).toUpperCase(); 1071 if (s.length() == 1) 1072 { 1073 s = "0" + s; 1074 } 1075 return s; 1076 } 1077 1078 /** 1079 * Dmitri Ilyin: Makes sure if the String is HH encoded sign. 1080 * @param str must be 2 characters long 1081 * 1082 * @return true or false 1083 */ 1084 private boolean isHHSign(String str) 1085 { 1086 boolean sign = true; 1087 try 1088 { 1089 char r = (char) Integer.parseInt(str, 16); 1090 } 1091 catch (NumberFormatException e) 1092 { 1093 sign = false; 1094 } 1095 return sign; 1096 } 1097 1098 /** 1099 * Write the specified <var>string</var> after substituting non ASCII characters, 1100 * with <CODE>%HH</CODE>, where HH is the hex of the byte value. 1101 * 1102 * @param string String to convert to XML format. 1103 * @param doURLEscaping True if we should try to encode as 1104 * per http://www.ietf.org/rfc/rfc2396.txt. 1105 * 1106 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. 1107 */ 1108 public void writeAttrURI( 1109 final java.io.Writer writer, String string, boolean doURLEscaping) 1110 throws IOException 1111 { 1112 // http://www.ietf.org/rfc/rfc2396.txt says: 1113 // A URI is always in an "escaped" form, since escaping or unescaping a 1114 // completed URI might change its semantics. Normally, the only time 1115 // escape encodings can safely be made is when the URI is being created 1116 // from its component parts; each component may have its own set of 1117 // characters that are reserved, so only the mechanism responsible for 1118 // generating or interpreting that component can determine whether or 1119 // not escaping a character will change its semantics. Likewise, a URI 1120 // must be separated into its components before the escaped characters 1121 // within those components can be safely decoded. 1122 // 1123 // ...So we do our best to do limited escaping of the URL, without 1124 // causing damage. If the URL is already properly escaped, in theory, this 1125 // function should not change the string value. 1126 1127 final int end = string.length(); 1128 if (end > m_attrBuff.length) 1129 { 1130 m_attrBuff = new char[end*2 + 1]; 1131 } 1132 string.getChars(0,end, m_attrBuff, 0); 1133 final char[] chars = m_attrBuff; 1134 1135 int cleanStart = 0; 1136 int cleanLength = 0; 1137 1138 1139 char ch = 0; 1140 for (int i = 0; i < end; i++) 1141 { 1142 ch = chars[i]; 1143 1144 if ((ch < 32) || (ch > 126)) 1145 { 1146 if (cleanLength > 0) 1147 { 1148 writer.write(chars, cleanStart, cleanLength); 1149 cleanLength = 0; 1150 } 1151 if (doURLEscaping) 1152 { 1153 // Encode UTF16 to UTF8. 1154 // Reference is Unicode, A Primer, by Tony Graham. 1155 // Page 92. 1156 1157 // Note that Kay doesn't escape 0x20... 1158 // if(ch == 0x20) // Not sure about this... -sb 1159 // { 1160 // writer.write(ch); 1161 // } 1162 // else 1163 if (ch <= 0x7F) 1164 { 1165 writer.write('%'); 1166 writer.write(makeHHString(ch)); 1167 } 1168 else if (ch <= 0x7FF) 1169 { 1170 // Clear low 6 bits before rotate, put high 4 bits in low byte, 1171 // and set two high bits. 1172 int high = (ch >> 6) | 0xC0; 1173 int low = (ch & 0x3F) | 0x80; 1174 // First 6 bits, + high bit 1175 writer.write('%'); 1176 writer.write(makeHHString(high)); 1177 writer.write('%'); 1178 writer.write(makeHHString(low)); 1179 } 1180 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate 1181 { 1182 // I'm sure this can be done in 3 instructions, but I choose 1183 // to try and do it exactly like it is done in the book, at least 1184 // until we are sure this is totally clean. I don't think performance 1185 // is a big issue with this particular function, though I could be 1186 // wrong. Also, the stuff below clearly does more masking than 1187 // it needs to do. 1188 1189 // Clear high 6 bits. 1190 int highSurrogate = ((int) ch) & 0x03FF; 1191 1192 // Middle 4 bits (wwww) + 1 1193 // "Note that the value of wwww from the high surrogate bit pattern 1194 // is incremented to make the uuuuu bit pattern in the scalar value 1195 // so the surrogate pair don't address the BMP." 1196 int wwww = ((highSurrogate & 0x03C0) >> 6); 1197 int uuuuu = wwww + 1; 1198 1199 // next 4 bits 1200 int zzzz = (highSurrogate & 0x003C) >> 2; 1201 1202 // low 2 bits 1203 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; 1204 1205 // Get low surrogate character. 1206 ch = chars[++i]; 1207 1208 // Clear high 6 bits. 1209 int lowSurrogate = ((int) ch) & 0x03FF; 1210 1211 // put the middle 4 bits into the bottom of yyyyyy (byte 3) 1212 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); 1213 1214 // bottom 6 bits. 1215 int xxxxxx = (lowSurrogate & 0x003F); 1216 1217 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu 1218 int byte2 = 1219 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; 1220 int byte3 = 0x80 | yyyyyy; 1221 int byte4 = 0x80 | xxxxxx; 1222 1223 writer.write('%'); 1224 writer.write(makeHHString(byte1)); 1225 writer.write('%'); 1226 writer.write(makeHHString(byte2)); 1227 writer.write('%'); 1228 writer.write(makeHHString(byte3)); 1229 writer.write('%'); 1230 writer.write(makeHHString(byte4)); 1231 } 1232 else 1233 { 1234 int high = (ch >> 12) | 0xE0; // top 4 bits 1235 int middle = ((ch & 0x0FC0) >> 6) | 0x80; 1236 // middle 6 bits 1237 int low = (ch & 0x3F) | 0x80; 1238 // First 6 bits, + high bit 1239 writer.write('%'); 1240 writer.write(makeHHString(high)); 1241 writer.write('%'); 1242 writer.write(makeHHString(middle)); 1243 writer.write('%'); 1244 writer.write(makeHHString(low)); 1245 } 1246 1247 } 1248 else if (escapingNotNeeded(ch)) 1249 { 1250 writer.write(ch); 1251 } 1252 else 1253 { 1254 writer.write("&#"); 1255 writer.write(Integer.toString(ch)); 1256 writer.write(';'); 1257 } 1258 // In this character range we have first written out any previously accumulated 1259 // "clean" characters, then processed the current more complicated character, 1260 // which may have incremented "i". 1261 // We now we reset the next possible clean character. 1262 cleanStart = i + 1; 1263 } 1264 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as 1265 // not allowing quotes in the URI proper syntax, nor in the fragment 1266 // identifier, we believe that it's OK to double escape quotes. 1267 else if (ch == '"') 1268 { 1269 // If the character is a '%' number number, try to avoid double-escaping. 1270 // There is a question if this is legal behavior. 1271 1272 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded 1273 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. 1274 1275 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) 1276 1277 // We are no longer escaping '%' 1278 1279 if (cleanLength > 0) 1280 { 1281 writer.write(chars, cleanStart, cleanLength); 1282 cleanLength = 0; 1283 } 1284 1285 1286 // Mike Kay encodes this as ", so he may know something I don't? 1287 if (doURLEscaping) 1288 writer.write("%22"); 1289 else 1290 writer.write("""); // we have to escape this, I guess. 1291 1292 // We have written out any clean characters, then the escaped '%' and now we 1293 // We now we reset the next possible clean character. 1294 cleanStart = i + 1; 1295 } 1296 else if (ch == '&') 1297 { 1298 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) 1299 // instead of "&" to avoid confusion with the beginning of a character 1300 // reference (entity reference open delimiter). 1301 if (cleanLength > 0) 1302 { 1303 writer.write(chars, cleanStart, cleanLength); 1304 cleanLength = 0; 1305 } 1306 writer.write("&"); 1307 cleanStart = i + 1; 1308 } 1309 else 1310 { 1311 // no processing for this character, just count how 1312 // many characters in a row that we have that need no processing 1313 cleanLength++; 1314 } 1315 } 1316 1317 // are there any clean characters at the end of the array 1318 // that we haven't processed yet? 1319 if (cleanLength > 1) 1320 { 1321 // if the whole string can be written out as-is do so 1322 // otherwise write out the clean chars at the end of the 1323 // array 1324 if (cleanStart == 0) 1325 writer.write(string); 1326 else 1327 writer.write(chars, cleanStart, cleanLength); 1328 } 1329 else if (cleanLength == 1) 1330 { 1331 // a little optimization for 1 clean character 1332 // (we could have let the previous if(...) handle them all) 1333 writer.write(ch); 1334 } 1335 } 1336 1337 /** 1338 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, 1339 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. 1340 * 1341 * @param string String to convert to XML format. 1342 * @param encoding CURRENTLY NOT IMPLEMENTED. 1343 * 1344 * @throws org.xml.sax.SAXException 1345 */ 1346 public void writeAttrString( 1347 final java.io.Writer writer, String string, String encoding) 1348 throws IOException, SAXException 1349 { 1350 final int end = string.length(); 1351 if (end > m_attrBuff.length) 1352 { 1353 m_attrBuff = new char[end * 2 + 1]; 1354 } 1355 string.getChars(0, end, m_attrBuff, 0); 1356 final char[] chars = m_attrBuff; 1357 1358 1359 1360 int cleanStart = 0; 1361 int cleanLength = 0; 1362 1363 char ch = 0; 1364 for (int i = 0; i < end; i++) 1365 { 1366 ch = chars[i]; 1367 1368 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); 1369 // System.out.println("ch: "+(int)ch); 1370 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); 1371 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); 1372 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) 1373 { 1374 cleanLength++; 1375 } 1376 else if ('<' == ch || '>' == ch) 1377 { 1378 cleanLength++; // no escaping in this case, as specified in 15.2 1379 } 1380 else if ( 1381 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) 1382 { 1383 cleanLength++; // no escaping in this case, as specified in 15.2 1384 } 1385 else 1386 { 1387 if (cleanLength > 0) 1388 { 1389 writer.write(chars,cleanStart,cleanLength); 1390 cleanLength = 0; 1391 } 1392 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); 1393 1394 if (i != pos) 1395 { 1396 i = pos - 1; 1397 } 1398 else 1399 { 1400 if (Encodings.isHighUTF16Surrogate(ch) || 1401 Encodings.isLowUTF16Surrogate(ch)) 1402 { 1403 if (writeUTF16Surrogate(ch, chars, i, end) >= 0) { 1404 // move the index if the low surrogate is consumed 1405 // as writeUTF16Surrogate has written the pair 1406 if (Encodings.isHighUTF16Surrogate(ch)) { 1407 i++; 1408 } 1409 } 1410 } 1411 1412 // The next is kind of a hack to keep from escaping in the case 1413 // of Shift_JIS and the like. 1414 1415 /* 1416 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) 1417 && (ch != 160)) 1418 { 1419 writer.write(ch); // no escaping in this case 1420 } 1421 else 1422 */ 1423 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); 1424 if (null != outputStringForChar) 1425 { 1426 writer.write(outputStringForChar); 1427 } 1428 else if (escapingNotNeeded(ch)) 1429 { 1430 writer.write(ch); // no escaping in this case 1431 } 1432 else 1433 { 1434 writer.write("&#"); 1435 writer.write(Integer.toString(ch)); 1436 writer.write(';'); 1437 } 1438 } 1439 cleanStart = i + 1; 1440 } 1441 } // end of for() 1442 1443 // are there any clean characters at the end of the array 1444 // that we haven't processed yet? 1445 if (cleanLength > 1) 1446 { 1447 // if the whole string can be written out as-is do so 1448 // otherwise write out the clean chars at the end of the 1449 // array 1450 if (cleanStart == 0) 1451 writer.write(string); 1452 else 1453 writer.write(chars, cleanStart, cleanLength); 1454 } 1455 else if (cleanLength == 1) 1456 { 1457 // a little optimization for 1 clean character 1458 // (we could have let the previous if(...) handle them all) 1459 writer.write(ch); 1460 } 1461 } 1462 1463 1464 1465 /** 1466 * Receive notification of character data. 1467 * 1468 * <p>The Parser will call this method to report each chunk of 1469 * character data. SAX parsers may return all contiguous character 1470 * data in a single chunk, or they may split it into several 1471 * chunks; however, all of the characters in any single event 1472 * must come from the same external entity, so that the Locator 1473 * provides useful information.</p> 1474 * 1475 * <p>The application must not attempt to read from the array 1476 * outside of the specified range.</p> 1477 * 1478 * <p>Note that some parsers will report whitespace using the 1479 * ignorableWhitespace() method rather than this one (validating 1480 * parsers must do so).</p> 1481 * 1482 * @param chars The characters from the XML document. 1483 * @param start The start position in the array. 1484 * @param length The number of characters to read from the array. 1485 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1486 * wrapping another exception. 1487 * @see #ignorableWhitespace 1488 * @see org.xml.sax.Locator 1489 * 1490 * @throws org.xml.sax.SAXException 1491 */ 1492 public final void characters(char chars[], int start, int length) 1493 throws org.xml.sax.SAXException 1494 { 1495 1496 if (m_elemContext.m_isRaw) 1497 { 1498 try 1499 { 1500 if (m_elemContext.m_startTagOpen) 1501 { 1502 closeStartTag(); 1503 m_elemContext.m_startTagOpen = false; 1504 } 1505 m_ispreserve = true; 1506 1507 // With m_ispreserve just set true it looks like shouldIndent() 1508 // will always return false, so drop any possible indentation. 1509 // if (shouldIndent()) 1510 // indent(); 1511 1512 // writer.write("<![CDATA["); 1513 // writer.write(chars, start, length); 1514 writeNormalizedChars(chars, start, length, false, m_lineSepUse); 1515 1516 // writer.write("]]>"); 1517 1518 // time to generate characters event 1519 if (m_tracer != null) 1520 super.fireCharEvent(chars, start, length); 1521 1522 return; 1523 } 1524 catch (IOException ioe) 1525 { 1526 throw new org.xml.sax.SAXException( 1527 Utils.messages.createMessage( 1528 MsgKey.ER_OIERROR, 1529 null), 1530 ioe); 1531 //"IO error", ioe); 1532 } 1533 } 1534 else 1535 { 1536 super.characters(chars, start, length); 1537 } 1538 } 1539 1540 /** 1541 * Receive notification of cdata. 1542 * 1543 * <p>The Parser will call this method to report each chunk of 1544 * character data. SAX parsers may return all contiguous character 1545 * data in a single chunk, or they may split it into several 1546 * chunks; however, all of the characters in any single event 1547 * must come from the same external entity, so that the Locator 1548 * provides useful information.</p> 1549 * 1550 * <p>The application must not attempt to read from the array 1551 * outside of the specified range.</p> 1552 * 1553 * <p>Note that some parsers will report whitespace using the 1554 * ignorableWhitespace() method rather than this one (validating 1555 * parsers must do so).</p> 1556 * 1557 * @param ch The characters from the XML document. 1558 * @param start The start position in the array. 1559 * @param length The number of characters to read from the array. 1560 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1561 * wrapping another exception. 1562 * @see #ignorableWhitespace 1563 * @see org.xml.sax.Locator 1564 * 1565 * @throws org.xml.sax.SAXException 1566 */ 1567 public final void cdata(char ch[], int start, int length) 1568 throws org.xml.sax.SAXException 1569 { 1570 1571 if ((null != m_elemContext.m_elementName) 1572 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") 1573 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) 1574 { 1575 try 1576 { 1577 if (m_elemContext.m_startTagOpen) 1578 { 1579 closeStartTag(); 1580 m_elemContext.m_startTagOpen = false; 1581 } 1582 1583 m_ispreserve = true; 1584 1585 if (shouldIndent()) 1586 indent(); 1587 1588 // writer.write(ch, start, length); 1589 writeNormalizedChars(ch, start, length, true, m_lineSepUse); 1590 } 1591 catch (IOException ioe) 1592 { 1593 throw new org.xml.sax.SAXException( 1594 Utils.messages.createMessage( 1595 MsgKey.ER_OIERROR, 1596 null), 1597 ioe); 1598 //"IO error", ioe); 1599 } 1600 } 1601 else 1602 { 1603 super.cdata(ch, start, length); 1604 } 1605 } 1606 1607 /** 1608 * Receive notification of a processing instruction. 1609 * 1610 * @param target The processing instruction target. 1611 * @param data The processing instruction data, or null if 1612 * none was supplied. 1613 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1614 * wrapping another exception. 1615 * 1616 * @throws org.xml.sax.SAXException 1617 */ 1618 public void processingInstruction(String target, String data) 1619 throws org.xml.sax.SAXException 1620 { 1621 1622 // Process any pending starDocument and startElement first. 1623 flushPending(); 1624 1625 // Use a fairly nasty hack to tell if the next node is supposed to be 1626 // unescaped text. 1627 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) 1628 { 1629 startNonEscaping(); 1630 } 1631 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) 1632 { 1633 endNonEscaping(); 1634 } 1635 else 1636 { 1637 try 1638 { 1639 if (m_elemContext.m_startTagOpen) 1640 { 1641 closeStartTag(); 1642 m_elemContext.m_startTagOpen = false; 1643 } 1644 else if (m_needToCallStartDocument) 1645 startDocumentInternal(); 1646 1647 if (shouldIndent()) 1648 indent(); 1649 1650 final java.io.Writer writer = m_writer; 1651 //writer.write("<?" + target); 1652 writer.write("<?"); 1653 writer.write(target); 1654 1655 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) 1656 writer.write(' '); 1657 1658 //writer.write(data + ">"); // different from XML 1659 writer.write(data); // different from XML 1660 writer.write('>'); // different from XML 1661 1662 // Always output a newline char if not inside of an 1663 // element. The whitespace is not significant in that 1664 // case. 1665 if (m_elemContext.m_currentElemDepth <= 0) 1666 outputLineSep(); 1667 1668 m_startNewLine = true; 1669 } 1670 catch(IOException e) 1671 { 1672 throw new SAXException(e); 1673 } 1674 } 1675 1676 // now generate the PI event 1677 if (m_tracer != null) 1678 super.fireEscapingEvent(target, data); 1679 } 1680 1681 /** 1682 * Receive notivication of a entityReference. 1683 * 1684 * @param name non-null reference to entity name string. 1685 * 1686 * @throws org.xml.sax.SAXException 1687 */ 1688 public final void entityReference(String name) 1689 throws org.xml.sax.SAXException 1690 { 1691 try 1692 { 1693 1694 final java.io.Writer writer = m_writer; 1695 writer.write('&'); 1696 writer.write(name); 1697 writer.write(';'); 1698 1699 } catch(IOException e) 1700 { 1701 throw new SAXException(e); 1702 } 1703 } 1704 /** 1705 * @see ExtendedContentHandler#endElement(String) 1706 */ 1707 public final void endElement(String elemName) throws SAXException 1708 { 1709 endElement(null, null, elemName); 1710 } 1711 1712 /** 1713 * Process the attributes, which means to write out the currently 1714 * collected attributes to the writer. The attributes are not 1715 * cleared by this method 1716 * 1717 * @param writer the writer to write processed attributes to. 1718 * @param nAttrs the number of attributes in m_attributes 1719 * to be processed 1720 * 1721 * @throws org.xml.sax.SAXException 1722 */ 1723 public void processAttributes(java.io.Writer writer, int nAttrs) 1724 throws IOException,SAXException 1725 { 1726 /* 1727 * process the collected attributes 1728 */ 1729 for (int i = 0; i < nAttrs; i++) 1730 { 1731 processAttribute( 1732 writer, 1733 m_attributes.getQName(i), 1734 m_attributes.getValue(i), 1735 m_elemContext.m_elementDesc); 1736 } 1737 } 1738 1739 /** 1740 * For the enclosing elements starting tag write out out any attributes 1741 * followed by ">" 1742 * 1743 *@throws org.xml.sax.SAXException 1744 */ 1745 protected void closeStartTag() throws SAXException 1746 { 1747 try 1748 { 1749 1750 // finish processing attributes, time to fire off the start element event 1751 if (m_tracer != null) 1752 super.fireStartElem(m_elemContext.m_elementName); 1753 1754 int nAttrs = m_attributes.getLength(); 1755 if (nAttrs>0) 1756 { 1757 processAttributes(m_writer, nAttrs); 1758 // clear attributes object for re-use with next element 1759 m_attributes.clear(); 1760 } 1761 1762 m_writer.write('>'); 1763 1764 /* whether Xalan or XSLTC, we have the prefix mappings now, so 1765 * lets determine if the current element is specified in the cdata- 1766 * section-elements list. 1767 */ 1768 if (m_cdataSectionElements != null) 1769 m_elemContext.m_isCdataSection = isCdataSection(); 1770 if (m_doIndent) 1771 { 1772 m_isprevtext = false; 1773 m_preserves.push(m_ispreserve); 1774 } 1775 1776 } 1777 catch(IOException e) 1778 { 1779 throw new SAXException(e); 1780 } 1781 } 1782 /** 1783 * Initialize the serializer with the specified output stream and output 1784 * format. Must be called before calling any of the serialize methods. 1785 * 1786 * @param output The output stream to use 1787 * @param format The output format 1788 * @throws UnsupportedEncodingException The encoding specified in the 1789 * output format is not supported 1790 */ 1791 protected synchronized void init(OutputStream output, Properties format) 1792 throws UnsupportedEncodingException 1793 { 1794 if (null == format) 1795 { 1796 format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML); 1797 } 1798 super.init(output,format, false); 1799 } 1800 1801 /** 1802 * Specifies an output stream to which the document should be 1803 * serialized. This method should not be called while the 1804 * serializer is in the process of serializing a document. 1805 * <p> 1806 * The encoding specified in the output properties is used, or 1807 * if no encoding was specified, the default for the selected 1808 * output method. 1809 * 1810 * @param output The output stream 1811 */ 1812 public void setOutputStream(OutputStream output) 1813 { 1814 1815 try 1816 { 1817 Properties format; 1818 if (null == m_format) 1819 format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML); 1820 else 1821 format = m_format; 1822 init(output, format, true); 1823 } 1824 catch (UnsupportedEncodingException uee) 1825 { 1826 1827 // Should have been warned in init, I guess... 1828 } 1829 } 1830 /** 1831 * This method is used when a prefix/uri namespace mapping 1832 * is indicated after the element was started with a 1833 * startElement() and before and endElement(). 1834 * startPrefixMapping(prefix,uri) would be used before the 1835 * startElement() call. 1836 * @param uri the URI of the namespace 1837 * @param prefix the prefix associated with the given URI. 1838 * 1839 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) 1840 */ 1841 public void namespaceAfterStartElement(String prefix, String uri) 1842 throws SAXException 1843 { 1844 // hack for XSLTC with finding URI for default namespace 1845 if (m_elemContext.m_elementURI == null) 1846 { 1847 String prefix1 = getPrefixPart(m_elemContext.m_elementName); 1848 if (prefix1 == null && EMPTYSTRING.equals(prefix)) 1849 { 1850 // the elements URI is not known yet, and it 1851 // doesn't have a prefix, and we are currently 1852 // setting the uri for prefix "", so we have 1853 // the uri for the element... lets remember it 1854 m_elemContext.m_elementURI = uri; 1855 } 1856 } 1857 startPrefixMapping(prefix,uri,false); 1858 } 1859 1860 public void startDTD(String name, String publicId, String systemId) 1861 throws SAXException 1862 { 1863 m_inDTD = true; 1864 super.startDTD(name, publicId, systemId); 1865 } 1866 1867 /** 1868 * Report the end of DTD declarations. 1869 * @throws org.xml.sax.SAXException The application may raise an exception. 1870 * @see #startDTD 1871 */ 1872 public void endDTD() throws org.xml.sax.SAXException 1873 { 1874 m_inDTD = false; 1875 /* for ToHTMLStream the DOCTYPE is entirely output in the 1876 * startDocumentInternal() method, so don't do anything here 1877 */ 1878 } 1879 /** 1880 * This method does nothing. 1881 */ 1882 public void attributeDecl( 1883 String eName, 1884 String aName, 1885 String type, 1886 String valueDefault, 1887 String value) 1888 throws SAXException 1889 { 1890 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1891 } 1892 1893 /** 1894 * This method does nothing. 1895 */ 1896 public void elementDecl(String name, String model) throws SAXException 1897 { 1898 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1899 } 1900 /** 1901 * This method does nothing. 1902 */ 1903 public void internalEntityDecl(String name, String value) 1904 throws SAXException 1905 { 1906 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1907 } 1908 /** 1909 * This method does nothing. 1910 */ 1911 public void externalEntityDecl( 1912 String name, 1913 String publicId, 1914 String systemId) 1915 throws SAXException 1916 { 1917 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1918 } 1919 1920 /** 1921 * This method is used to add an attribute to the currently open element. 1922 * The caller has guaranted that this attribute is unique, which means that it 1923 * not been seen before and will not be seen again. 1924 * 1925 * @param name the qualified name of the attribute 1926 * @param value the value of the attribute which can contain only 1927 * ASCII printable characters characters in the range 32 to 127 inclusive. 1928 * @param flags the bit values of this integer give optimization information. 1929 */ 1930 public void addUniqueAttribute(String name, String value, int flags) 1931 throws SAXException 1932 { 1933 try 1934 { 1935 final java.io.Writer writer = m_writer; 1936 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) 1937 { 1938 // "flags" has indicated that the characters 1939 // '>' '<' '&' and '"' are not in the value and 1940 // m_htmlcharInfo has recorded that there are no other 1941 // entities in the range 0 to 127 so we write out the 1942 // value directly 1943 writer.write(' '); 1944 writer.write(name); 1945 writer.write("=\""); 1946 writer.write(value); 1947 writer.write('"'); 1948 } 1949 else if ( 1950 (flags & HTML_ATTREMPTY) > 0 1951 && (value.length() == 0 || value.equalsIgnoreCase(name))) 1952 { 1953 writer.write(' '); 1954 writer.write(name); 1955 } 1956 else 1957 { 1958 writer.write(' '); 1959 writer.write(name); 1960 writer.write("=\""); 1961 if ((flags & HTML_ATTRURL) > 0) 1962 { 1963 writeAttrURI(writer, value, m_specialEscapeURLs); 1964 } 1965 else 1966 { 1967 writeAttrString(writer, value, this.getEncoding()); 1968 } 1969 writer.write('"'); 1970 } 1971 } catch (IOException e) { 1972 throw new SAXException(e); 1973 } 1974 } 1975 1976 public void comment(char ch[], int start, int length) 1977 throws SAXException 1978 { 1979 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1980 if (m_inDTD) 1981 return; 1982 super.comment(ch, start, length); 1983 } 1984 1985 public boolean reset() 1986 { 1987 boolean ret = super.reset(); 1988 if (!ret) 1989 return false; 1990 initToHTMLStream(); 1991 return true; 1992 } 1993 1994 private void initToHTMLStream() 1995 { 1996 // m_elementDesc = null; 1997 m_inBlockElem = false; 1998 m_inDTD = false; 1999 // m_isRawStack.clear(); 2000 m_omitMetaTag = false; 2001 m_specialEscapeURLs = true; 2002 } 2003 2004 static class Trie 2005 { 2006 /** 2007 * A digital search trie for 7-bit ASCII text 2008 * The API is a subset of java.util.Hashtable 2009 * The key must be a 7-bit ASCII string 2010 * The value may be any Java Object 2011 * One can get an object stored in a trie from its key, 2012 * but the search is either case sensitive or case 2013 * insensitive to the characters in the key, and this 2014 * choice of sensitivity or insensitivity is made when 2015 * the Trie is created, before any objects are put in it. 2016 * 2017 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. 2018 * It exists to cut the serializers dependancy on that package. 2019 * 2020 * @xsl.usage internal 2021 */ 2022 2023 /** Size of the m_nextChar array. */ 2024 public static final int ALPHA_SIZE = 128; 2025 2026 /** The root node of the tree. */ 2027 final Node m_Root; 2028 2029 /** helper buffer to convert Strings to char arrays */ 2030 private char[] m_charBuffer = new char[0]; 2031 2032 /** true if the search for an object is lower case only with the key */ 2033 private final boolean m_lowerCaseOnly; 2034 2035 /** 2036 * Construct the trie that has a case insensitive search. 2037 */ 2038 public Trie() 2039 { 2040 m_Root = new Node(); 2041 m_lowerCaseOnly = false; 2042 } 2043 2044 /** 2045 * Construct the trie given the desired case sensitivity with the key. 2046 * @param lowerCaseOnly true if the search keys are to be loser case only, 2047 * not case insensitive. 2048 */ 2049 public Trie(boolean lowerCaseOnly) 2050 { 2051 m_Root = new Node(); 2052 m_lowerCaseOnly = lowerCaseOnly; 2053 } 2054 2055 /** 2056 * Put an object into the trie for lookup. 2057 * 2058 * @param key must be a 7-bit ASCII string 2059 * @param value any java object. 2060 * 2061 * @return The old object that matched key, or null. 2062 */ 2063 public Object put(String key, Object value) 2064 { 2065 2066 final int len = key.length(); 2067 if (len > m_charBuffer.length) 2068 { 2069 // make the biggest buffer ever needed in get(String) 2070 m_charBuffer = new char[len]; 2071 } 2072 2073 Node node = m_Root; 2074 2075 for (int i = 0; i < len; i++) 2076 { 2077 Node nextNode = 2078 node.m_nextChar[Character.toLowerCase(key.charAt(i))]; 2079 2080 if (nextNode != null) 2081 { 2082 node = nextNode; 2083 } 2084 else 2085 { 2086 for (; i < len; i++) 2087 { 2088 Node newNode = new Node(); 2089 if (m_lowerCaseOnly) 2090 { 2091 // put this value into the tree only with a lower case key 2092 node.m_nextChar[Character.toLowerCase( 2093 key.charAt(i))] = 2094 newNode; 2095 } 2096 else 2097 { 2098 // put this value into the tree with a case insensitive key 2099 node.m_nextChar[Character.toUpperCase( 2100 key.charAt(i))] = 2101 newNode; 2102 node.m_nextChar[Character.toLowerCase( 2103 key.charAt(i))] = 2104 newNode; 2105 } 2106 node = newNode; 2107 } 2108 break; 2109 } 2110 } 2111 2112 Object ret = node.m_Value; 2113 2114 node.m_Value = value; 2115 2116 return ret; 2117 } 2118 2119 /** 2120 * Get an object that matches the key. 2121 * 2122 * @param key must be a 7-bit ASCII string 2123 * 2124 * @return The object that matches the key, or null. 2125 */ 2126 public Object get(final String key) 2127 { 2128 2129 final int len = key.length(); 2130 2131 /* If the name is too long, we won't find it, this also keeps us 2132 * from overflowing m_charBuffer 2133 */ 2134 if (m_charBuffer.length < len) 2135 return null; 2136 2137 Node node = m_Root; 2138 switch (len) // optimize the look up based on the number of chars 2139 { 2140 // case 0 looks silly, but the generated bytecode runs 2141 // faster for lookup of elements of length 2 with this in 2142 // and a fair bit faster. Don't know why. 2143 case 0 : 2144 { 2145 return null; 2146 } 2147 2148 case 1 : 2149 { 2150 final char ch = key.charAt(0); 2151 if (ch < ALPHA_SIZE) 2152 { 2153 node = node.m_nextChar[ch]; 2154 if (node != null) 2155 return node.m_Value; 2156 } 2157 return null; 2158 } 2159 // comment out case 2 because the default is faster 2160 // case 2 : 2161 // { 2162 // final char ch0 = key.charAt(0); 2163 // final char ch1 = key.charAt(1); 2164 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) 2165 // { 2166 // node = node.m_nextChar[ch0]; 2167 // if (node != null) 2168 // { 2169 // 2170 // if (ch1 < ALPHA_SIZE) 2171 // { 2172 // node = node.m_nextChar[ch1]; 2173 // if (node != null) 2174 // return node.m_Value; 2175 // } 2176 // } 2177 // } 2178 // return null; 2179 // } 2180 default : 2181 { 2182 for (int i = 0; i < len; i++) 2183 { 2184 // A thread-safe way to loop over the characters 2185 final char ch = key.charAt(i); 2186 if (ALPHA_SIZE <= ch) 2187 { 2188 // the key is not 7-bit ASCII so we won't find it here 2189 return null; 2190 } 2191 2192 node = node.m_nextChar[ch]; 2193 if (node == null) 2194 return null; 2195 } 2196 2197 return node.m_Value; 2198 } 2199 } 2200 } 2201 2202 /** 2203 * The node representation for the trie. 2204 * @xsl.usage internal 2205 */ 2206 private class Node 2207 { 2208 2209 /** 2210 * Constructor, creates a Node[ALPHA_SIZE]. 2211 */ 2212 Node() 2213 { 2214 m_nextChar = new Node[ALPHA_SIZE]; 2215 m_Value = null; 2216 } 2217 2218 /** The next nodes. */ 2219 final Node m_nextChar[]; 2220 2221 /** The value. */ 2222 Object m_Value; 2223 } 2224 /** 2225 * Construct the trie from another Trie. 2226 * Both the existing Trie and this new one share the same table for 2227 * lookup, and it is assumed that the table is fully populated and 2228 * not changing anymore. 2229 * 2230 * @param existingTrie the Trie that this one is a copy of. 2231 */ 2232 public Trie(Trie existingTrie) 2233 { 2234 // copy some fields from the existing Trie into this one. 2235 m_Root = existingTrie.m_Root; 2236 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; 2237 2238 // get a buffer just big enough to hold the longest key in the table. 2239 int max = existingTrie.getLongestKeyLength(); 2240 m_charBuffer = new char[max]; 2241 } 2242 2243 /** 2244 * Get an object that matches the key. 2245 * This method is faster than get(), but is not thread-safe. 2246 * 2247 * @param key must be a 7-bit ASCII string 2248 * 2249 * @return The object that matches the key, or null. 2250 */ 2251 public Object get2(final String key) 2252 { 2253 2254 final int len = key.length(); 2255 2256 /* If the name is too long, we won't find it, this also keeps us 2257 * from overflowing m_charBuffer 2258 */ 2259 if (m_charBuffer.length < len) 2260 return null; 2261 2262 Node node = m_Root; 2263 switch (len) // optimize the look up based on the number of chars 2264 { 2265 // case 0 looks silly, but the generated bytecode runs 2266 // faster for lookup of elements of length 2 with this in 2267 // and a fair bit faster. Don't know why. 2268 case 0 : 2269 { 2270 return null; 2271 } 2272 2273 case 1 : 2274 { 2275 final char ch = key.charAt(0); 2276 if (ch < ALPHA_SIZE) 2277 { 2278 node = node.m_nextChar[ch]; 2279 if (node != null) 2280 return node.m_Value; 2281 } 2282 return null; 2283 } 2284 default : 2285 { 2286 /* Copy string into array. This is not thread-safe because 2287 * it modifies the contents of m_charBuffer. If multiple 2288 * threads were to use this Trie they all would be 2289 * using this same array (not good). So this 2290 * method is not thread-safe, but it is faster because 2291 * converting to a char[] and looping over elements of 2292 * the array is faster than a String's charAt(i). 2293 */ 2294 key.getChars(0, len, m_charBuffer, 0); 2295 2296 for (int i = 0; i < len; i++) 2297 { 2298 final char ch = m_charBuffer[i]; 2299 if (ALPHA_SIZE <= ch) 2300 { 2301 // the key is not 7-bit ASCII so we won't find it here 2302 return null; 2303 } 2304 2305 node = node.m_nextChar[ch]; 2306 if (node == null) 2307 return null; 2308 } 2309 2310 return node.m_Value; 2311 } 2312 } 2313 } 2314 2315 /** 2316 * Get the length of the longest key used in the table. 2317 */ 2318 public int getLongestKeyLength() 2319 { 2320 return m_charBuffer.length; 2321 } 2322 } 2323 }