1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 2001-2004 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 /* 21 * $Id: ToHTMLStream.java,v 1.2.4.1 2005/09/15 08:15:26 suresh_emailid Exp $ 22 */ 23 package com.sun.org.apache.xml.internal.serializer; 24 25 import java.io.IOException; 26 import java.io.OutputStream; 27 import java.io.UnsupportedEncodingException; 28 import java.util.Properties; 29 30 import javax.xml.transform.Result; 31 32 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 33 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 34 import org.xml.sax.Attributes; 35 import org.xml.sax.SAXException; 36 37 /** 38 * This serializer takes a series of SAX or 39 * SAX-like events and writes its output 40 * to the given stream. 41 * 42 * This class is not a public API, it is public 43 * because it is used from another package. 44 * 45 * @xsl.usage internal 46 */ 47 public final class ToHTMLStream extends ToStream 48 { 49 50 /** This flag is set while receiving events from the DTD */ 51 protected boolean m_inDTD = false; 52 53 /** True if the current element is a block element. (seems like 54 * this needs to be a stack. -sb). */ 55 private boolean m_inBlockElem = false; 56 57 /** 58 * Map that tells which XML characters should have special treatment, and it 59 * provides character to entity name lookup. 60 */ 61 private static final CharInfo m_htmlcharInfo = 62 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); 63 CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); 64 65 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ 66 static final Trie m_elementFlags = new Trie(); 67 68 static { 69 initTagReference(m_elementFlags); 70 } 71 static void initTagReference(Trie m_elementFlags) { 72 73 // HTML 4.0 loose DTD 74 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); 75 m_elementFlags.put( 76 "FRAME", 77 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 78 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); 79 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); 80 m_elementFlags.put( 81 "ISINDEX", 82 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 83 m_elementFlags.put( 84 "APPLET", 85 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); 86 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); 87 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); 88 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); 89 90 // HTML 4.0 strict DTD 91 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 92 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 93 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 94 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 95 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 96 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); 97 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); 98 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); 99 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); 100 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); 101 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); 102 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); 103 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); 104 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); 105 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); 106 m_elementFlags.put( 107 "SUP", 108 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 109 m_elementFlags.put( 110 "SUB", 111 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 112 m_elementFlags.put( 113 "SPAN", 114 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 115 m_elementFlags.put( 116 "BDO", 117 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 118 m_elementFlags.put( 119 "BR", 120 new ElemDesc( 121 0 122 | ElemDesc.SPECIAL 123 | ElemDesc.ASPECIAL 124 | ElemDesc.EMPTY 125 | ElemDesc.BLOCK)); 126 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); 127 m_elementFlags.put( 128 "ADDRESS", 129 new ElemDesc( 130 0 131 | ElemDesc.BLOCK 132 | ElemDesc.BLOCKFORM 133 | ElemDesc.BLOCKFORMFIELDSET)); 134 m_elementFlags.put( 135 "DIV", 136 new ElemDesc( 137 0 138 | ElemDesc.BLOCK 139 | ElemDesc.BLOCKFORM 140 | ElemDesc.BLOCKFORMFIELDSET)); 141 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); 142 m_elementFlags.put( 143 "MAP", 144 new ElemDesc( 145 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); 146 m_elementFlags.put( 147 "AREA", 148 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 149 m_elementFlags.put( 150 "LINK", 151 new ElemDesc( 152 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 153 m_elementFlags.put( 154 "IMG", 155 new ElemDesc( 156 0 157 | ElemDesc.SPECIAL 158 | ElemDesc.ASPECIAL 159 | ElemDesc.EMPTY 160 | ElemDesc.WHITESPACESENSITIVE)); 161 m_elementFlags.put( 162 "OBJECT", 163 new ElemDesc( 164 0 165 | ElemDesc.SPECIAL 166 | ElemDesc.ASPECIAL 167 | ElemDesc.HEADMISC 168 | ElemDesc.WHITESPACESENSITIVE)); 169 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); 170 m_elementFlags.put( 171 "HR", 172 new ElemDesc( 173 0 174 | ElemDesc.BLOCK 175 | ElemDesc.BLOCKFORM 176 | ElemDesc.BLOCKFORMFIELDSET 177 | ElemDesc.EMPTY)); 178 m_elementFlags.put( 179 "P", 180 new ElemDesc( 181 0 182 | ElemDesc.BLOCK 183 | ElemDesc.BLOCKFORM 184 | ElemDesc.BLOCKFORMFIELDSET)); 185 m_elementFlags.put( 186 "H1", 187 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 188 m_elementFlags.put( 189 "H2", 190 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 191 m_elementFlags.put( 192 "H3", 193 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 194 m_elementFlags.put( 195 "H4", 196 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 197 m_elementFlags.put( 198 "H5", 199 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 200 m_elementFlags.put( 201 "H6", 202 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 203 m_elementFlags.put( 204 "PRE", 205 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); 206 m_elementFlags.put( 207 "Q", 208 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 209 m_elementFlags.put( 210 "BLOCKQUOTE", 211 new ElemDesc( 212 0 213 | ElemDesc.BLOCK 214 | ElemDesc.BLOCKFORM 215 | ElemDesc.BLOCKFORMFIELDSET)); 216 m_elementFlags.put("INS", new ElemDesc(0)); 217 m_elementFlags.put("DEL", new ElemDesc(0)); 218 m_elementFlags.put( 219 "DL", 220 new ElemDesc( 221 0 222 | ElemDesc.BLOCK 223 | ElemDesc.BLOCKFORM 224 | ElemDesc.BLOCKFORMFIELDSET)); 225 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); 226 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); 227 m_elementFlags.put( 228 "OL", 229 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 230 m_elementFlags.put( 231 "UL", 232 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 233 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); 234 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); 235 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); 236 m_elementFlags.put( 237 "INPUT", 238 new ElemDesc( 239 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); 240 m_elementFlags.put( 241 "SELECT", 242 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 243 m_elementFlags.put("OPTGROUP", new ElemDesc(0)); 244 m_elementFlags.put("OPTION", new ElemDesc(0)); 245 m_elementFlags.put( 246 "TEXTAREA", 247 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 248 m_elementFlags.put( 249 "FIELDSET", 250 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); 251 m_elementFlags.put("LEGEND", new ElemDesc(0)); 252 m_elementFlags.put( 253 "BUTTON", 254 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 255 m_elementFlags.put( 256 "TABLE", 257 new ElemDesc( 258 0 259 | ElemDesc.BLOCK 260 | ElemDesc.BLOCKFORM 261 | ElemDesc.BLOCKFORMFIELDSET)); 262 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); 263 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); 264 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); 265 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); 266 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); 267 m_elementFlags.put( 268 "COL", 269 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 270 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); 271 m_elementFlags.put("TH", new ElemDesc(0)); 272 m_elementFlags.put("TD", new ElemDesc(0)); 273 m_elementFlags.put( 274 "HEAD", 275 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); 276 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); 277 m_elementFlags.put( 278 "BASE", 279 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 280 m_elementFlags.put( 281 "META", 282 new ElemDesc( 283 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 284 m_elementFlags.put( 285 "STYLE", 286 new ElemDesc( 287 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); 288 m_elementFlags.put( 289 "SCRIPT", 290 new ElemDesc( 291 0 292 | ElemDesc.SPECIAL 293 | ElemDesc.ASPECIAL 294 | ElemDesc.HEADMISC 295 | ElemDesc.RAW)); 296 m_elementFlags.put( 297 "NOSCRIPT", 298 new ElemDesc( 299 0 300 | ElemDesc.BLOCK 301 | ElemDesc.BLOCKFORM 302 | ElemDesc.BLOCKFORMFIELDSET)); 303 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); 304 305 // From "John Ky" <hand@syd.speednet.com.au 306 // Transitional Document Type Definition () 307 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont 308 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 309 310 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE 311 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 312 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 313 314 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U 315 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 316 317 // From "John Ky" <hand@syd.speednet.com.au 318 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 319 320 // HTML 4.0, section 16.5 321 m_elementFlags.put( 322 "IFRAME", 323 new ElemDesc( 324 0 325 | ElemDesc.BLOCK 326 | ElemDesc.BLOCKFORM 327 | ElemDesc.BLOCKFORMFIELDSET)); 328 329 // Netscape 4 extension 330 m_elementFlags.put( 331 "LAYER", 332 new ElemDesc( 333 0 334 | ElemDesc.BLOCK 335 | ElemDesc.BLOCKFORM 336 | ElemDesc.BLOCKFORMFIELDSET)); 337 // Netscape 4 extension 338 m_elementFlags.put( 339 "ILAYER", 340 new ElemDesc( 341 0 342 | ElemDesc.BLOCK 343 | ElemDesc.BLOCKFORM 344 | ElemDesc.BLOCKFORMFIELDSET)); 345 346 347 // NOW FOR ATTRIBUTE INFORMATION . . . 348 ElemDesc elemDesc; 349 350 351 // ---------------------------------------------- 352 elemDesc = (ElemDesc) m_elementFlags.get("A"); 353 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 354 elemDesc.setAttr("NAME", ElemDesc.ATTRURL); 355 356 // ---------------------------------------------- 357 elemDesc = (ElemDesc) m_elementFlags.get("AREA"); 358 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 359 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); 360 361 // ---------------------------------------------- 362 elemDesc = (ElemDesc) m_elementFlags.get("BASE"); 363 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 364 365 // ---------------------------------------------- 366 elemDesc = (ElemDesc) m_elementFlags.get("BUTTON"); 367 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 368 369 // ---------------------------------------------- 370 elemDesc = (ElemDesc) m_elementFlags.get("BLOCKQUOTE"); 371 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 372 373 // ---------------------------------------------- 374 elemDesc = (ElemDesc) m_elementFlags.get("DEL"); 375 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 376 377 // ---------------------------------------------- 378 elemDesc = (ElemDesc) m_elementFlags.get("DIR"); 379 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 380 381 // ---------------------------------------------- 382 383 elemDesc = (ElemDesc) m_elementFlags.get("DIV"); 384 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension 385 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 386 387 // ---------------------------------------------- 388 elemDesc = (ElemDesc) m_elementFlags.get("DL"); 389 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 390 391 // ---------------------------------------------- 392 elemDesc = (ElemDesc) m_elementFlags.get("FORM"); 393 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); 394 395 // ---------------------------------------------- 396 // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> 397 elemDesc = (ElemDesc) m_elementFlags.get("FRAME"); 398 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 399 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 400 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); 401 402 // ---------------------------------------------- 403 elemDesc = (ElemDesc) m_elementFlags.get("HEAD"); 404 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); 405 406 // ---------------------------------------------- 407 elemDesc = (ElemDesc) m_elementFlags.get("HR"); 408 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); 409 410 // ---------------------------------------------- 411 // HTML 4.0, section 16.5 412 elemDesc = (ElemDesc) m_elementFlags.get("IFRAME"); 413 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 414 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 415 416 // ---------------------------------------------- 417 // Netscape 4 extension 418 elemDesc = (ElemDesc) m_elementFlags.get("ILAYER"); 419 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 420 421 // ---------------------------------------------- 422 elemDesc = (ElemDesc) m_elementFlags.get("IMG"); 423 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 424 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 425 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 426 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 427 428 // ---------------------------------------------- 429 elemDesc = (ElemDesc) m_elementFlags.get("INPUT"); 430 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 431 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 432 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); 433 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 434 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 435 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 436 437 // ---------------------------------------------- 438 elemDesc = (ElemDesc) m_elementFlags.get("INS"); 439 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 440 441 // ---------------------------------------------- 442 // Netscape 4 extension 443 elemDesc = (ElemDesc) m_elementFlags.get("LAYER"); 444 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 445 446 // ---------------------------------------------- 447 elemDesc = (ElemDesc) m_elementFlags.get("LINK"); 448 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 449 450 // ---------------------------------------------- 451 elemDesc = (ElemDesc) m_elementFlags.get("MENU"); 452 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 453 454 // ---------------------------------------------- 455 elemDesc = (ElemDesc) m_elementFlags.get("OBJECT"); 456 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); 457 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); 458 elemDesc.setAttr("DATA", ElemDesc.ATTRURL); 459 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); 460 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 461 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); 462 463 // ---------------------------------------------- 464 elemDesc = (ElemDesc) m_elementFlags.get("OL"); 465 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 466 467 // ---------------------------------------------- 468 elemDesc = (ElemDesc) m_elementFlags.get("OPTGROUP"); 469 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 470 471 // ---------------------------------------------- 472 elemDesc = (ElemDesc) m_elementFlags.get("OPTION"); 473 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); 474 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 475 476 // ---------------------------------------------- 477 elemDesc = (ElemDesc) m_elementFlags.get("Q"); 478 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 479 480 // ---------------------------------------------- 481 elemDesc = (ElemDesc) m_elementFlags.get("SCRIPT"); 482 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 483 elemDesc.setAttr("FOR", ElemDesc.ATTRURL); 484 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); 485 486 // ---------------------------------------------- 487 elemDesc = (ElemDesc) m_elementFlags.get("SELECT"); 488 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 489 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); 490 491 // ---------------------------------------------- 492 elemDesc = (ElemDesc) m_elementFlags.get("TABLE"); 493 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 494 495 // ---------------------------------------------- 496 elemDesc = (ElemDesc) m_elementFlags.get("TD"); 497 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 498 499 // ---------------------------------------------- 500 elemDesc = (ElemDesc) m_elementFlags.get("TEXTAREA"); 501 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 502 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 503 504 // ---------------------------------------------- 505 elemDesc = (ElemDesc) m_elementFlags.get("TH"); 506 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 507 508 // ---------------------------------------------- 509 // The nowrap attribute of a tr element is both 510 // a Netscape and Internet-Explorer extension 511 elemDesc = (ElemDesc) m_elementFlags.get("TR"); 512 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 513 514 // ---------------------------------------------- 515 elemDesc = (ElemDesc) m_elementFlags.get("UL"); 516 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 517 } 518 519 /** 520 * Dummy element for elements not found. 521 */ 522 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); 523 524 /** True if URLs should be specially escaped with the %xx form. */ 525 private boolean m_specialEscapeURLs = true; 526 527 /** True if the META tag should be omitted. */ 528 private boolean m_omitMetaTag = false; 529 530 /** 531 * Tells if the formatter should use special URL escaping. 532 * 533 * @param bool True if URLs should be specially escaped with the %xx form. 534 */ 535 public void setSpecialEscapeURLs(boolean bool) 536 { 537 m_specialEscapeURLs = bool; 538 } 539 540 /** 541 * Tells if the formatter should omit the META tag. 542 * 543 * @param bool True if the META tag should be omitted. 544 */ 545 public void setOmitMetaTag(boolean bool) 546 { 547 m_omitMetaTag = bool; 548 } 549 550 /** 551 * Specifies an output format for this serializer. It the 552 * serializer has already been associated with an output format, 553 * it will switch to the new format. This method should not be 554 * called while the serializer is in the process of serializing 555 * a document. 556 * 557 * This method can be called multiple times before starting 558 * the serialization of a particular result-tree. In principle 559 * all serialization parameters can be changed, with the exception 560 * of method="html" (it must be method="html" otherwise we 561 * shouldn't even have a ToHTMLStream object here!) 562 * 563 * @param format The output format or serialzation parameters 564 * to use. 565 */ 566 public void setOutputFormat(Properties format) 567 { 568 569 m_specialEscapeURLs = 570 OutputPropertyUtils.getBooleanProperty( 571 OutputPropertiesFactory.S_USE_URL_ESCAPING, 572 format); 573 574 m_omitMetaTag = 575 OutputPropertyUtils.getBooleanProperty( 576 OutputPropertiesFactory.S_OMIT_META_TAG, 577 format); 578 579 super.setOutputFormat(format); 580 } 581 582 /** 583 * Tells if the formatter should use special URL escaping. 584 * 585 * @return True if URLs should be specially escaped with the %xx form. 586 */ 587 private final boolean getSpecialEscapeURLs() 588 { 589 return m_specialEscapeURLs; 590 } 591 592 /** 593 * Tells if the formatter should omit the META tag. 594 * 595 * @return True if the META tag should be omitted. 596 */ 597 private final boolean getOmitMetaTag() 598 { 599 return m_omitMetaTag; 600 } 601 602 /** 603 * Get a description of the given element. 604 * 605 * @param name non-null name of element, case insensitive. 606 * 607 * @return non-null reference to ElemDesc, which may be m_dummy if no 608 * element description matches the given name. 609 */ 610 public static final ElemDesc getElemDesc(String name) 611 { 612 /* this method used to return m_dummy when name was null 613 * but now it doesn't check and and requires non-null name. 614 */ 615 Object obj = m_elementFlags.get(name); 616 if (null != obj) 617 return (ElemDesc)obj; 618 return m_dummy; 619 } 620 621 /** 622 * A Trie that is just a copy of the "static" one. 623 * We need this one to be able to use the faster, but not thread-safe 624 * method Trie.get2(name) 625 */ 626 private Trie m_htmlInfo = new Trie(m_elementFlags); 627 /** 628 * Calls to this method could be replaced with calls to 629 * getElemDesc(name), but this one should be faster. 630 */ 631 private ElemDesc getElemDesc2(String name) 632 { 633 Object obj = m_htmlInfo.get2(name); 634 if (null != obj) 635 return (ElemDesc)obj; 636 return m_dummy; 637 } 638 639 /** 640 * Default constructor. 641 */ 642 public ToHTMLStream() 643 { 644 645 super(); 646 m_charInfo = m_htmlcharInfo; 647 // initialize namespaces 648 m_prefixMap = new NamespaceMappings(); 649 650 } 651 652 /** The name of the current element. */ 653 // private String m_currentElementName = null; 654 655 /** 656 * Receive notification of the beginning of a document. 657 * 658 * @throws org.xml.sax.SAXException Any SAX exception, possibly 659 * wrapping another exception. 660 * 661 * @throws org.xml.sax.SAXException 662 */ 663 protected void startDocumentInternal() throws org.xml.sax.SAXException 664 { 665 super.startDocumentInternal(); 666 667 m_needToCallStartDocument = false; 668 m_needToOutputDocTypeDecl = true; 669 m_startNewLine = false; 670 setOmitXMLDeclaration(true); 671 672 if (true == m_needToOutputDocTypeDecl) 673 { 674 String doctypeSystem = getDoctypeSystem(); 675 String doctypePublic = getDoctypePublic(); 676 if ((null != doctypeSystem) || (null != doctypePublic)) 677 { 678 final java.io.Writer writer = m_writer; 679 try 680 { 681 writer.write("<!DOCTYPE html"); 682 683 if (null != doctypePublic) 684 { 685 writer.write(" PUBLIC \""); 686 writer.write(doctypePublic); 687 writer.write('"'); 688 } 689 690 if (null != doctypeSystem) 691 { 692 if (null == doctypePublic) 693 writer.write(" SYSTEM \""); 694 else 695 writer.write(" \""); 696 697 writer.write(doctypeSystem); 698 writer.write('"'); 699 } 700 701 writer.write('>'); 702 outputLineSep(); 703 } 704 catch(IOException e) 705 { 706 throw new SAXException(e); 707 } 708 } 709 } 710 711 m_needToOutputDocTypeDecl = false; 712 } 713 714 /** 715 * Receive notification of the end of a document. 716 * 717 * @throws org.xml.sax.SAXException Any SAX exception, possibly 718 * wrapping another exception. 719 * 720 * @throws org.xml.sax.SAXException 721 */ 722 public final void endDocument() throws org.xml.sax.SAXException 723 { 724 725 flushPending(); 726 if (m_doIndent && !m_isprevtext) 727 { 728 try 729 { 730 outputLineSep(); 731 } 732 catch(IOException e) 733 { 734 throw new SAXException(e); 735 } 736 } 737 738 flushWriter(); 739 if (m_tracer != null) 740 super.fireEndDoc(); 741 } 742 743 /** 744 * Receive notification of the beginning of an element. 745 * 746 * 747 * @param namespaceURI 748 * @param localName 749 * @param name The element type name. 750 * @param atts The attributes attached to the element, if any. 751 * @throws org.xml.sax.SAXException Any SAX exception, possibly 752 * wrapping another exception. 753 * @see #endElement 754 * @see org.xml.sax.AttributeList 755 */ 756 public void startElement( 757 String namespaceURI, 758 String localName, 759 String name, 760 Attributes atts) 761 throws org.xml.sax.SAXException 762 { 763 764 ElemContext elemContext = m_elemContext; 765 766 // clean up any pending things first 767 if (elemContext.m_startTagOpen) 768 { 769 closeStartTag(); 770 elemContext.m_startTagOpen = false; 771 } 772 else if (m_cdataTagOpen) 773 { 774 closeCDATA(); 775 m_cdataTagOpen = false; 776 } 777 else if (m_needToCallStartDocument) 778 { 779 startDocumentInternal(); 780 m_needToCallStartDocument = false; 781 } 782 783 784 // if this element has a namespace then treat it like XML 785 if (null != namespaceURI && namespaceURI.length() > 0) 786 { 787 super.startElement(namespaceURI, localName, name, atts); 788 789 return; 790 } 791 792 try 793 { 794 // getElemDesc2(name) is faster than getElemDesc(name) 795 ElemDesc elemDesc = getElemDesc2(name); 796 int elemFlags = elemDesc.getFlags(); 797 798 // deal with indentation issues first 799 if (m_doIndent) 800 { 801 802 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 803 if (m_ispreserve) 804 m_ispreserve = false; 805 else if ( 806 (null != elemContext.m_elementName) 807 && (!m_inBlockElem 808 || isBlockElement) /* && !isWhiteSpaceSensitive */ 809 ) 810 { 811 m_startNewLine = true; 812 813 indent(); 814 815 } 816 m_inBlockElem = !isBlockElement; 817 } 818 819 // save any attributes for later processing 820 if (atts != null) 821 addAttributes(atts); 822 823 m_isprevtext = false; 824 final java.io.Writer writer = m_writer; 825 writer.write('<'); 826 writer.write(name); 827 828 829 830 if (m_tracer != null) 831 firePseudoAttributes(); 832 833 if ((elemFlags & ElemDesc.EMPTY) != 0) 834 { 835 // an optimization for elements which are expected 836 // to be empty. 837 m_elemContext = elemContext.push(); 838 /* XSLTC sometimes calls namespaceAfterStartElement() 839 * so we need to remember the name 840 */ 841 m_elemContext.m_elementName = name; 842 m_elemContext.m_elementDesc = elemDesc; 843 return; 844 } 845 else 846 { 847 elemContext = elemContext.push(namespaceURI,localName,name); 848 m_elemContext = elemContext; 849 elemContext.m_elementDesc = elemDesc; 850 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; 851 } 852 853 854 if ((elemFlags & ElemDesc.HEADELEM) != 0) 855 { 856 // This is the <HEAD> element, do some special processing 857 closeStartTag(); 858 elemContext.m_startTagOpen = false; 859 if (!m_omitMetaTag) 860 { 861 if (m_doIndent) 862 indent(); 863 writer.write( 864 "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); 865 String encoding = getEncoding(); 866 String encode = Encodings.getMimeEncoding(encoding); 867 writer.write(encode); 868 writer.write("\">"); 869 } 870 } 871 } 872 catch (IOException e) 873 { 874 throw new SAXException(e); 875 } 876 } 877 878 /** 879 * Receive notification of the end of an element. 880 * 881 * 882 * @param namespaceURI 883 * @param localName 884 * @param name The element type name 885 * @throws org.xml.sax.SAXException Any SAX exception, possibly 886 * wrapping another exception. 887 */ 888 public final void endElement( 889 final String namespaceURI, 890 final String localName, 891 final String name) 892 throws org.xml.sax.SAXException 893 { 894 // deal with any pending issues 895 if (m_cdataTagOpen) 896 closeCDATA(); 897 898 // if the element has a namespace, treat it like XML, not HTML 899 if (null != namespaceURI && namespaceURI.length() > 0) 900 { 901 super.endElement(namespaceURI, localName, name); 902 903 return; 904 } 905 906 try 907 { 908 909 ElemContext elemContext = m_elemContext; 910 final ElemDesc elemDesc = elemContext.m_elementDesc; 911 final int elemFlags = elemDesc.getFlags(); 912 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; 913 914 // deal with any indentation issues 915 if (m_doIndent) 916 { 917 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; 918 boolean shouldIndent = false; 919 920 if (m_ispreserve) 921 { 922 m_ispreserve = false; 923 } 924 else if (m_doIndent && (!m_inBlockElem || isBlockElement)) 925 { 926 m_startNewLine = true; 927 shouldIndent = true; 928 } 929 if (!elemContext.m_startTagOpen && shouldIndent) 930 indent(elemContext.m_currentElemDepth - 1); 931 m_inBlockElem = !isBlockElement; 932 } 933 934 final java.io.Writer writer = m_writer; 935 if (!elemContext.m_startTagOpen) 936 { 937 writer.write("</"); 938 writer.write(name); 939 writer.write('>'); 940 } 941 else 942 { 943 // the start-tag open when this method was called, 944 // so we need to process it now. 945 946 if (m_tracer != null) 947 super.fireStartElem(name); 948 949 // the starting tag was still open when we received this endElement() call 950 // so we need to process any gathered attributes NOW, before they go away. 951 int nAttrs = m_attributes.getLength(); 952 if (nAttrs > 0) 953 { 954 processAttributes(m_writer, nAttrs); 955 // clear attributes object for re-use with next element 956 m_attributes.clear(); 957 } 958 if (!elemEmpty) 959 { 960 // As per Dave/Paul recommendation 12/06/2000 961 // if (shouldIndent) 962 // writer.write('>'); 963 // indent(m_currentIndent); 964 965 writer.write("></"); 966 writer.write(name); 967 writer.write('>'); 968 } 969 else 970 { 971 writer.write('>'); 972 } 973 } 974 975 // clean up because the element has ended 976 if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0) 977 m_ispreserve = true; 978 m_isprevtext = false; 979 980 // fire off the end element event 981 if (m_tracer != null) 982 super.fireEndElem(name); 983 984 // OPTIMIZE-EMPTY 985 if (elemEmpty) 986 { 987 // a quick exit if the HTML element had no children. 988 // This block of code can be removed if the corresponding block of code 989 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed 990 m_elemContext = elemContext.m_prev; 991 return; 992 } 993 994 // some more clean because the element has ended. 995 if (!elemContext.m_startTagOpen) 996 { 997 if (m_doIndent && !m_preserves.isEmpty()) 998 m_preserves.pop(); 999 } 1000 m_elemContext = elemContext.m_prev; 1001 // m_isRawStack.pop(); 1002 } 1003 catch (IOException e) 1004 { 1005 throw new SAXException(e); 1006 } 1007 } 1008 1009 /** 1010 * Process an attribute. 1011 * @param writer The writer to write the processed output to. 1012 * @param name The name of the attribute. 1013 * @param value The value of the attribute. 1014 * @param elemDesc The description of the HTML element 1015 * that has this attribute. 1016 * 1017 * @throws org.xml.sax.SAXException 1018 */ 1019 protected void processAttribute( 1020 java.io.Writer writer, 1021 String name, 1022 String value, 1023 ElemDesc elemDesc) 1024 throws IOException 1025 { 1026 writer.write(' '); 1027 1028 if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) 1029 && elemDesc != null 1030 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) 1031 { 1032 writer.write(name); 1033 } 1034 else 1035 { 1036 // %REVIEW% %OPT% 1037 // Two calls to single-char write may NOT 1038 // be more efficient than one to string-write... 1039 writer.write(name); 1040 writer.write("=\""); 1041 if ( elemDesc != null 1042 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) 1043 writeAttrURI(writer, value, m_specialEscapeURLs); 1044 else 1045 writeAttrString(writer, value, this.getEncoding()); 1046 writer.write('"'); 1047 1048 } 1049 } 1050 1051 /** 1052 * Tell if a character is an ASCII digit. 1053 */ 1054 private boolean isASCIIDigit(char c) 1055 { 1056 return (c >= '0' && c <= '9'); 1057 } 1058 1059 /** 1060 * Make an integer into an HH hex value. 1061 * Does no checking on the size of the input, since this 1062 * is only meant to be used locally by writeAttrURI. 1063 * 1064 * @param i must be a value less than 255. 1065 * 1066 * @return should be a two character string. 1067 */ 1068 private static String makeHHString(int i) 1069 { 1070 String s = Integer.toHexString(i).toUpperCase(); 1071 if (s.length() == 1) 1072 { 1073 s = "0" + s; 1074 } 1075 return s; 1076 } 1077 1078 /** 1079 * Dmitri Ilyin: Makes sure if the String is HH encoded sign. 1080 * @param str must be 2 characters long 1081 * 1082 * @return true or false 1083 */ 1084 private boolean isHHSign(String str) 1085 { 1086 boolean sign = true; 1087 try 1088 { 1089 char r = (char) Integer.parseInt(str, 16); 1090 } 1091 catch (NumberFormatException e) 1092 { 1093 sign = false; 1094 } 1095 return sign; 1096 } 1097 1098 /** 1099 * Write the specified <var>string</var> after substituting non ASCII characters, 1100 * with <CODE>%HH</CODE>, where HH is the hex of the byte value. 1101 * 1102 * @param string String to convert to XML format. 1103 * @param doURLEscaping True if we should try to encode as 1104 * per http://www.ietf.org/rfc/rfc2396.txt. 1105 * 1106 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. 1107 */ 1108 public void writeAttrURI( 1109 final java.io.Writer writer, String string, boolean doURLEscaping) 1110 throws IOException 1111 { 1112 // http://www.ietf.org/rfc/rfc2396.txt says: 1113 // A URI is always in an "escaped" form, since escaping or unescaping a 1114 // completed URI might change its semantics. Normally, the only time 1115 // escape encodings can safely be made is when the URI is being created 1116 // from its component parts; each component may have its own set of 1117 // characters that are reserved, so only the mechanism responsible for 1118 // generating or interpreting that component can determine whether or 1119 // not escaping a character will change its semantics. Likewise, a URI 1120 // must be separated into its components before the escaped characters 1121 // within those components can be safely decoded. 1122 // 1123 // ...So we do our best to do limited escaping of the URL, without 1124 // causing damage. If the URL is already properly escaped, in theory, this 1125 // function should not change the string value. 1126 1127 final int end = string.length(); 1128 if (end > m_attrBuff.length) 1129 { 1130 m_attrBuff = new char[end*2 + 1]; 1131 } 1132 string.getChars(0,end, m_attrBuff, 0); 1133 final char[] chars = m_attrBuff; 1134 1135 int cleanStart = 0; 1136 int cleanLength = 0; 1137 1138 1139 char ch = 0; 1140 for (int i = 0; i < end; i++) 1141 { 1142 ch = chars[i]; 1143 1144 if ((ch < 32) || (ch > 126)) 1145 { 1146 if (cleanLength > 0) 1147 { 1148 writer.write(chars, cleanStart, cleanLength); 1149 cleanLength = 0; 1150 } 1151 if (doURLEscaping) 1152 { 1153 // Encode UTF16 to UTF8. 1154 // Reference is Unicode, A Primer, by Tony Graham. 1155 // Page 92. 1156 1157 // Note that Kay doesn't escape 0x20... 1158 // if(ch == 0x20) // Not sure about this... -sb 1159 // { 1160 // writer.write(ch); 1161 // } 1162 // else 1163 if (ch <= 0x7F) 1164 { 1165 writer.write('%'); 1166 writer.write(makeHHString(ch)); 1167 } 1168 else if (ch <= 0x7FF) 1169 { 1170 // Clear low 6 bits before rotate, put high 4 bits in low byte, 1171 // and set two high bits. 1172 int high = (ch >> 6) | 0xC0; 1173 int low = (ch & 0x3F) | 0x80; 1174 // First 6 bits, + high bit 1175 writer.write('%'); 1176 writer.write(makeHHString(high)); 1177 writer.write('%'); 1178 writer.write(makeHHString(low)); 1179 } 1180 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate 1181 { 1182 // I'm sure this can be done in 3 instructions, but I choose 1183 // to try and do it exactly like it is done in the book, at least 1184 // until we are sure this is totally clean. I don't think performance 1185 // is a big issue with this particular function, though I could be 1186 // wrong. Also, the stuff below clearly does more masking than 1187 // it needs to do. 1188 1189 // Clear high 6 bits. 1190 int highSurrogate = ((int) ch) & 0x03FF; 1191 1192 // Middle 4 bits (wwww) + 1 1193 // "Note that the value of wwww from the high surrogate bit pattern 1194 // is incremented to make the uuuuu bit pattern in the scalar value 1195 // so the surrogate pair don't address the BMP." 1196 int wwww = ((highSurrogate & 0x03C0) >> 6); 1197 int uuuuu = wwww + 1; 1198 1199 // next 4 bits 1200 int zzzz = (highSurrogate & 0x003C) >> 2; 1201 1202 // low 2 bits 1203 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; 1204 1205 // Get low surrogate character. 1206 ch = chars[++i]; 1207 1208 // Clear high 6 bits. 1209 int lowSurrogate = ((int) ch) & 0x03FF; 1210 1211 // put the middle 4 bits into the bottom of yyyyyy (byte 3) 1212 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); 1213 1214 // bottom 6 bits. 1215 int xxxxxx = (lowSurrogate & 0x003F); 1216 1217 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu 1218 int byte2 = 1219 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; 1220 int byte3 = 0x80 | yyyyyy; 1221 int byte4 = 0x80 | xxxxxx; 1222 1223 writer.write('%'); 1224 writer.write(makeHHString(byte1)); 1225 writer.write('%'); 1226 writer.write(makeHHString(byte2)); 1227 writer.write('%'); 1228 writer.write(makeHHString(byte3)); 1229 writer.write('%'); 1230 writer.write(makeHHString(byte4)); 1231 } 1232 else 1233 { 1234 int high = (ch >> 12) | 0xE0; // top 4 bits 1235 int middle = ((ch & 0x0FC0) >> 6) | 0x80; 1236 // middle 6 bits 1237 int low = (ch & 0x3F) | 0x80; 1238 // First 6 bits, + high bit 1239 writer.write('%'); 1240 writer.write(makeHHString(high)); 1241 writer.write('%'); 1242 writer.write(makeHHString(middle)); 1243 writer.write('%'); 1244 writer.write(makeHHString(low)); 1245 } 1246 1247 } 1248 else if (escapingNotNeeded(ch)) 1249 { 1250 writer.write(ch); 1251 } 1252 else 1253 { 1254 writer.write("&#"); 1255 writer.write(Integer.toString(ch)); 1256 writer.write(';'); 1257 } 1258 // In this character range we have first written out any previously accumulated 1259 // "clean" characters, then processed the current more complicated character, 1260 // which may have incremented "i". 1261 // We now we reset the next possible clean character. 1262 cleanStart = i + 1; 1263 } 1264 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as 1265 // not allowing quotes in the URI proper syntax, nor in the fragment 1266 // identifier, we believe that it's OK to double escape quotes. 1267 else if (ch == '"') 1268 { 1269 // If the character is a '%' number number, try to avoid double-escaping. 1270 // There is a question if this is legal behavior. 1271 1272 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded 1273 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. 1274 1275 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) 1276 1277 // We are no longer escaping '%' 1278 1279 if (cleanLength > 0) 1280 { 1281 writer.write(chars, cleanStart, cleanLength); 1282 cleanLength = 0; 1283 } 1284 1285 1286 // Mike Kay encodes this as ", so he may know something I don't? 1287 if (doURLEscaping) 1288 writer.write("%22"); 1289 else 1290 writer.write("""); // we have to escape this, I guess. 1291 1292 // We have written out any clean characters, then the escaped '%' and now we 1293 // We now we reset the next possible clean character. 1294 cleanStart = i + 1; 1295 } 1296 else if (ch == '&') 1297 { 1298 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) 1299 // instead of "&" to avoid confusion with the beginning of a character 1300 // reference (entity reference open delimiter). 1301 if (cleanLength > 0) 1302 { 1303 writer.write(chars, cleanStart, cleanLength); 1304 cleanLength = 0; 1305 } 1306 writer.write("&"); 1307 cleanStart = i + 1; 1308 } 1309 else 1310 { 1311 // no processing for this character, just count how 1312 // many characters in a row that we have that need no processing 1313 cleanLength++; 1314 } 1315 } 1316 1317 // are there any clean characters at the end of the array 1318 // that we haven't processed yet? 1319 if (cleanLength > 1) 1320 { 1321 // if the whole string can be written out as-is do so 1322 // otherwise write out the clean chars at the end of the 1323 // array 1324 if (cleanStart == 0) 1325 writer.write(string); 1326 else 1327 writer.write(chars, cleanStart, cleanLength); 1328 } 1329 else if (cleanLength == 1) 1330 { 1331 // a little optimization for 1 clean character 1332 // (we could have let the previous if(...) handle them all) 1333 writer.write(ch); 1334 } 1335 } 1336 1337 /** 1338 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, 1339 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. 1340 * 1341 * @param string String to convert to XML format. 1342 * @param encoding CURRENTLY NOT IMPLEMENTED. 1343 * 1344 * @throws org.xml.sax.SAXException 1345 */ 1346 public void writeAttrString( 1347 final java.io.Writer writer, String string, String encoding) 1348 throws IOException 1349 { 1350 final int end = string.length(); 1351 if (end > m_attrBuff.length) 1352 { 1353 m_attrBuff = new char[end * 2 + 1]; 1354 } 1355 string.getChars(0, end, m_attrBuff, 0); 1356 final char[] chars = m_attrBuff; 1357 1358 1359 1360 int cleanStart = 0; 1361 int cleanLength = 0; 1362 1363 char ch = 0; 1364 for (int i = 0; i < end; i++) 1365 { 1366 ch = chars[i]; 1367 1368 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); 1369 // System.out.println("ch: "+(int)ch); 1370 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); 1371 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); 1372 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) 1373 { 1374 cleanLength++; 1375 } 1376 else if ('<' == ch || '>' == ch) 1377 { 1378 cleanLength++; // no escaping in this case, as specified in 15.2 1379 } 1380 else if ( 1381 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) 1382 { 1383 cleanLength++; // no escaping in this case, as specified in 15.2 1384 } 1385 else 1386 { 1387 if (cleanLength > 0) 1388 { 1389 writer.write(chars,cleanStart,cleanLength); 1390 cleanLength = 0; 1391 } 1392 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); 1393 1394 if (i != pos) 1395 { 1396 i = pos - 1; 1397 } 1398 else 1399 { 1400 if (Encodings.isHighUTF16Surrogate(ch)) 1401 { 1402 1403 writeUTF16Surrogate(ch, chars, i, end); 1404 i++; // two input characters processed 1405 // this increments by one and the for() 1406 // loop itself increments by another one. 1407 } 1408 1409 // The next is kind of a hack to keep from escaping in the case 1410 // of Shift_JIS and the like. 1411 1412 /* 1413 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) 1414 && (ch != 160)) 1415 { 1416 writer.write(ch); // no escaping in this case 1417 } 1418 else 1419 */ 1420 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); 1421 if (null != outputStringForChar) 1422 { 1423 writer.write(outputStringForChar); 1424 } 1425 else if (escapingNotNeeded(ch)) 1426 { 1427 writer.write(ch); // no escaping in this case 1428 } 1429 else 1430 { 1431 writer.write("&#"); 1432 writer.write(Integer.toString(ch)); 1433 writer.write(';'); 1434 } 1435 } 1436 cleanStart = i + 1; 1437 } 1438 } // end of for() 1439 1440 // are there any clean characters at the end of the array 1441 // that we haven't processed yet? 1442 if (cleanLength > 1) 1443 { 1444 // if the whole string can be written out as-is do so 1445 // otherwise write out the clean chars at the end of the 1446 // array 1447 if (cleanStart == 0) 1448 writer.write(string); 1449 else 1450 writer.write(chars, cleanStart, cleanLength); 1451 } 1452 else if (cleanLength == 1) 1453 { 1454 // a little optimization for 1 clean character 1455 // (we could have let the previous if(...) handle them all) 1456 writer.write(ch); 1457 } 1458 } 1459 1460 1461 1462 /** 1463 * Receive notification of character data. 1464 * 1465 * <p>The Parser will call this method to report each chunk of 1466 * character data. SAX parsers may return all contiguous character 1467 * data in a single chunk, or they may split it into several 1468 * chunks; however, all of the characters in any single event 1469 * must come from the same external entity, so that the Locator 1470 * provides useful information.</p> 1471 * 1472 * <p>The application must not attempt to read from the array 1473 * outside of the specified range.</p> 1474 * 1475 * <p>Note that some parsers will report whitespace using the 1476 * ignorableWhitespace() method rather than this one (validating 1477 * parsers must do so).</p> 1478 * 1479 * @param chars The characters from the XML document. 1480 * @param start The start position in the array. 1481 * @param length The number of characters to read from the array. 1482 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1483 * wrapping another exception. 1484 * @see #ignorableWhitespace 1485 * @see org.xml.sax.Locator 1486 * 1487 * @throws org.xml.sax.SAXException 1488 */ 1489 public final void characters(char chars[], int start, int length) 1490 throws org.xml.sax.SAXException 1491 { 1492 1493 if (m_elemContext.m_isRaw) 1494 { 1495 try 1496 { 1497 if (m_elemContext.m_startTagOpen) 1498 { 1499 closeStartTag(); 1500 m_elemContext.m_startTagOpen = false; 1501 } 1502 m_ispreserve = true; 1503 1504 // With m_ispreserve just set true it looks like shouldIndent() 1505 // will always return false, so drop any possible indentation. 1506 // if (shouldIndent()) 1507 // indent(); 1508 1509 // writer.write("<![CDATA["); 1510 // writer.write(chars, start, length); 1511 writeNormalizedChars(chars, start, length, false, m_lineSepUse); 1512 1513 // writer.write("]]>"); 1514 1515 // time to generate characters event 1516 if (m_tracer != null) 1517 super.fireCharEvent(chars, start, length); 1518 1519 return; 1520 } 1521 catch (IOException ioe) 1522 { 1523 throw new org.xml.sax.SAXException( 1524 Utils.messages.createMessage( 1525 MsgKey.ER_OIERROR, 1526 null), 1527 ioe); 1528 //"IO error", ioe); 1529 } 1530 } 1531 else 1532 { 1533 super.characters(chars, start, length); 1534 } 1535 } 1536 1537 /** 1538 * Receive notification of cdata. 1539 * 1540 * <p>The Parser will call this method to report each chunk of 1541 * character data. SAX parsers may return all contiguous character 1542 * data in a single chunk, or they may split it into several 1543 * chunks; however, all of the characters in any single event 1544 * must come from the same external entity, so that the Locator 1545 * provides useful information.</p> 1546 * 1547 * <p>The application must not attempt to read from the array 1548 * outside of the specified range.</p> 1549 * 1550 * <p>Note that some parsers will report whitespace using the 1551 * ignorableWhitespace() method rather than this one (validating 1552 * parsers must do so).</p> 1553 * 1554 * @param ch The characters from the XML document. 1555 * @param start The start position in the array. 1556 * @param length The number of characters to read from the array. 1557 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1558 * wrapping another exception. 1559 * @see #ignorableWhitespace 1560 * @see org.xml.sax.Locator 1561 * 1562 * @throws org.xml.sax.SAXException 1563 */ 1564 public final void cdata(char ch[], int start, int length) 1565 throws org.xml.sax.SAXException 1566 { 1567 1568 if ((null != m_elemContext.m_elementName) 1569 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") 1570 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) 1571 { 1572 try 1573 { 1574 if (m_elemContext.m_startTagOpen) 1575 { 1576 closeStartTag(); 1577 m_elemContext.m_startTagOpen = false; 1578 } 1579 1580 m_ispreserve = true; 1581 1582 if (shouldIndent()) 1583 indent(); 1584 1585 // writer.write(ch, start, length); 1586 writeNormalizedChars(ch, start, length, true, m_lineSepUse); 1587 } 1588 catch (IOException ioe) 1589 { 1590 throw new org.xml.sax.SAXException( 1591 Utils.messages.createMessage( 1592 MsgKey.ER_OIERROR, 1593 null), 1594 ioe); 1595 //"IO error", ioe); 1596 } 1597 } 1598 else 1599 { 1600 super.cdata(ch, start, length); 1601 } 1602 } 1603 1604 /** 1605 * Receive notification of a processing instruction. 1606 * 1607 * @param target The processing instruction target. 1608 * @param data The processing instruction data, or null if 1609 * none was supplied. 1610 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1611 * wrapping another exception. 1612 * 1613 * @throws org.xml.sax.SAXException 1614 */ 1615 public void processingInstruction(String target, String data) 1616 throws org.xml.sax.SAXException 1617 { 1618 1619 // Process any pending starDocument and startElement first. 1620 flushPending(); 1621 1622 // Use a fairly nasty hack to tell if the next node is supposed to be 1623 // unescaped text. 1624 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) 1625 { 1626 startNonEscaping(); 1627 } 1628 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) 1629 { 1630 endNonEscaping(); 1631 } 1632 else 1633 { 1634 try 1635 { 1636 if (m_elemContext.m_startTagOpen) 1637 { 1638 closeStartTag(); 1639 m_elemContext.m_startTagOpen = false; 1640 } 1641 else if (m_needToCallStartDocument) 1642 startDocumentInternal(); 1643 1644 if (shouldIndent()) 1645 indent(); 1646 1647 final java.io.Writer writer = m_writer; 1648 //writer.write("<?" + target); 1649 writer.write("<?"); 1650 writer.write(target); 1651 1652 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) 1653 writer.write(' '); 1654 1655 //writer.write(data + ">"); // different from XML 1656 writer.write(data); // different from XML 1657 writer.write('>'); // different from XML 1658 1659 // Always output a newline char if not inside of an 1660 // element. The whitespace is not significant in that 1661 // case. 1662 if (m_elemContext.m_currentElemDepth <= 0) 1663 outputLineSep(); 1664 1665 m_startNewLine = true; 1666 } 1667 catch(IOException e) 1668 { 1669 throw new SAXException(e); 1670 } 1671 } 1672 1673 // now generate the PI event 1674 if (m_tracer != null) 1675 super.fireEscapingEvent(target, data); 1676 } 1677 1678 /** 1679 * Receive notivication of a entityReference. 1680 * 1681 * @param name non-null reference to entity name string. 1682 * 1683 * @throws org.xml.sax.SAXException 1684 */ 1685 public final void entityReference(String name) 1686 throws org.xml.sax.SAXException 1687 { 1688 try 1689 { 1690 1691 final java.io.Writer writer = m_writer; 1692 writer.write('&'); 1693 writer.write(name); 1694 writer.write(';'); 1695 1696 } catch(IOException e) 1697 { 1698 throw new SAXException(e); 1699 } 1700 } 1701 /** 1702 * @see ExtendedContentHandler#endElement(String) 1703 */ 1704 public final void endElement(String elemName) throws SAXException 1705 { 1706 endElement(null, null, elemName); 1707 } 1708 1709 /** 1710 * Process the attributes, which means to write out the currently 1711 * collected attributes to the writer. The attributes are not 1712 * cleared by this method 1713 * 1714 * @param writer the writer to write processed attributes to. 1715 * @param nAttrs the number of attributes in m_attributes 1716 * to be processed 1717 * 1718 * @throws org.xml.sax.SAXException 1719 */ 1720 public void processAttributes(java.io.Writer writer, int nAttrs) 1721 throws IOException,SAXException 1722 { 1723 /* 1724 * process the collected attributes 1725 */ 1726 for (int i = 0; i < nAttrs; i++) 1727 { 1728 processAttribute( 1729 writer, 1730 m_attributes.getQName(i), 1731 m_attributes.getValue(i), 1732 m_elemContext.m_elementDesc); 1733 } 1734 } 1735 1736 /** 1737 * For the enclosing elements starting tag write out out any attributes 1738 * followed by ">" 1739 * 1740 *@throws org.xml.sax.SAXException 1741 */ 1742 protected void closeStartTag() throws SAXException 1743 { 1744 try 1745 { 1746 1747 // finish processing attributes, time to fire off the start element event 1748 if (m_tracer != null) 1749 super.fireStartElem(m_elemContext.m_elementName); 1750 1751 int nAttrs = m_attributes.getLength(); 1752 if (nAttrs>0) 1753 { 1754 processAttributes(m_writer, nAttrs); 1755 // clear attributes object for re-use with next element 1756 m_attributes.clear(); 1757 } 1758 1759 m_writer.write('>'); 1760 1761 /* whether Xalan or XSLTC, we have the prefix mappings now, so 1762 * lets determine if the current element is specified in the cdata- 1763 * section-elements list. 1764 */ 1765 if (m_cdataSectionElements != null) 1766 m_elemContext.m_isCdataSection = isCdataSection(); 1767 if (m_doIndent) 1768 { 1769 m_isprevtext = false; 1770 m_preserves.push(m_ispreserve); 1771 } 1772 1773 } 1774 catch(IOException e) 1775 { 1776 throw new SAXException(e); 1777 } 1778 } 1779 /** 1780 * Initialize the serializer with the specified output stream and output 1781 * format. Must be called before calling any of the serialize methods. 1782 * 1783 * @param output The output stream to use 1784 * @param format The output format 1785 * @throws UnsupportedEncodingException The encoding specified in the 1786 * output format is not supported 1787 */ 1788 protected synchronized void init(OutputStream output, Properties format) 1789 throws UnsupportedEncodingException 1790 { 1791 if (null == format) 1792 { 1793 format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML); 1794 } 1795 super.init(output,format, false); 1796 } 1797 1798 /** 1799 * Specifies an output stream to which the document should be 1800 * serialized. This method should not be called while the 1801 * serializer is in the process of serializing a document. 1802 * <p> 1803 * The encoding specified in the output properties is used, or 1804 * if no encoding was specified, the default for the selected 1805 * output method. 1806 * 1807 * @param output The output stream 1808 */ 1809 public void setOutputStream(OutputStream output) 1810 { 1811 1812 try 1813 { 1814 Properties format; 1815 if (null == m_format) 1816 format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML); 1817 else 1818 format = m_format; 1819 init(output, format, true); 1820 } 1821 catch (UnsupportedEncodingException uee) 1822 { 1823 1824 // Should have been warned in init, I guess... 1825 } 1826 } 1827 /** 1828 * This method is used when a prefix/uri namespace mapping 1829 * is indicated after the element was started with a 1830 * startElement() and before and endElement(). 1831 * startPrefixMapping(prefix,uri) would be used before the 1832 * startElement() call. 1833 * @param uri the URI of the namespace 1834 * @param prefix the prefix associated with the given URI. 1835 * 1836 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) 1837 */ 1838 public void namespaceAfterStartElement(String prefix, String uri) 1839 throws SAXException 1840 { 1841 // hack for XSLTC with finding URI for default namespace 1842 if (m_elemContext.m_elementURI == null) 1843 { 1844 String prefix1 = getPrefixPart(m_elemContext.m_elementName); 1845 if (prefix1 == null && EMPTYSTRING.equals(prefix)) 1846 { 1847 // the elements URI is not known yet, and it 1848 // doesn't have a prefix, and we are currently 1849 // setting the uri for prefix "", so we have 1850 // the uri for the element... lets remember it 1851 m_elemContext.m_elementURI = uri; 1852 } 1853 } 1854 startPrefixMapping(prefix,uri,false); 1855 } 1856 1857 public void startDTD(String name, String publicId, String systemId) 1858 throws SAXException 1859 { 1860 m_inDTD = true; 1861 super.startDTD(name, publicId, systemId); 1862 } 1863 1864 /** 1865 * Report the end of DTD declarations. 1866 * @throws org.xml.sax.SAXException The application may raise an exception. 1867 * @see #startDTD 1868 */ 1869 public void endDTD() throws org.xml.sax.SAXException 1870 { 1871 m_inDTD = false; 1872 /* for ToHTMLStream the DOCTYPE is entirely output in the 1873 * startDocumentInternal() method, so don't do anything here 1874 */ 1875 } 1876 /** 1877 * This method does nothing. 1878 */ 1879 public void attributeDecl( 1880 String eName, 1881 String aName, 1882 String type, 1883 String valueDefault, 1884 String value) 1885 throws SAXException 1886 { 1887 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1888 } 1889 1890 /** 1891 * This method does nothing. 1892 */ 1893 public void elementDecl(String name, String model) throws SAXException 1894 { 1895 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1896 } 1897 /** 1898 * This method does nothing. 1899 */ 1900 public void internalEntityDecl(String name, String value) 1901 throws SAXException 1902 { 1903 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1904 } 1905 /** 1906 * This method does nothing. 1907 */ 1908 public void externalEntityDecl( 1909 String name, 1910 String publicId, 1911 String systemId) 1912 throws SAXException 1913 { 1914 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1915 } 1916 1917 /** 1918 * This method is used to add an attribute to the currently open element. 1919 * The caller has guaranted that this attribute is unique, which means that it 1920 * not been seen before and will not be seen again. 1921 * 1922 * @param name the qualified name of the attribute 1923 * @param value the value of the attribute which can contain only 1924 * ASCII printable characters characters in the range 32 to 127 inclusive. 1925 * @param flags the bit values of this integer give optimization information. 1926 */ 1927 public void addUniqueAttribute(String name, String value, int flags) 1928 throws SAXException 1929 { 1930 try 1931 { 1932 final java.io.Writer writer = m_writer; 1933 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) 1934 { 1935 // "flags" has indicated that the characters 1936 // '>' '<' '&' and '"' are not in the value and 1937 // m_htmlcharInfo has recorded that there are no other 1938 // entities in the range 0 to 127 so we write out the 1939 // value directly 1940 writer.write(' '); 1941 writer.write(name); 1942 writer.write("=\""); 1943 writer.write(value); 1944 writer.write('"'); 1945 } 1946 else if ( 1947 (flags & HTML_ATTREMPTY) > 0 1948 && (value.length() == 0 || value.equalsIgnoreCase(name))) 1949 { 1950 writer.write(' '); 1951 writer.write(name); 1952 } 1953 else 1954 { 1955 writer.write(' '); 1956 writer.write(name); 1957 writer.write("=\""); 1958 if ((flags & HTML_ATTRURL) > 0) 1959 { 1960 writeAttrURI(writer, value, m_specialEscapeURLs); 1961 } 1962 else 1963 { 1964 writeAttrString(writer, value, this.getEncoding()); 1965 } 1966 writer.write('"'); 1967 } 1968 } catch (IOException e) { 1969 throw new SAXException(e); 1970 } 1971 } 1972 1973 public void comment(char ch[], int start, int length) 1974 throws SAXException 1975 { 1976 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1977 if (m_inDTD) 1978 return; 1979 super.comment(ch, start, length); 1980 } 1981 1982 public boolean reset() 1983 { 1984 boolean ret = super.reset(); 1985 if (!ret) 1986 return false; 1987 initToHTMLStream(); 1988 return true; 1989 } 1990 1991 private void initToHTMLStream() 1992 { 1993 // m_elementDesc = null; 1994 m_inBlockElem = false; 1995 m_inDTD = false; 1996 // m_isRawStack.clear(); 1997 m_omitMetaTag = false; 1998 m_specialEscapeURLs = true; 1999 } 2000 2001 static class Trie 2002 { 2003 /** 2004 * A digital search trie for 7-bit ASCII text 2005 * The API is a subset of java.util.Hashtable 2006 * The key must be a 7-bit ASCII string 2007 * The value may be any Java Object 2008 * One can get an object stored in a trie from its key, 2009 * but the search is either case sensitive or case 2010 * insensitive to the characters in the key, and this 2011 * choice of sensitivity or insensitivity is made when 2012 * the Trie is created, before any objects are put in it. 2013 * 2014 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. 2015 * It exists to cut the serializers dependancy on that package. 2016 * 2017 * @xsl.usage internal 2018 */ 2019 2020 /** Size of the m_nextChar array. */ 2021 public static final int ALPHA_SIZE = 128; 2022 2023 /** The root node of the tree. */ 2024 final Node m_Root; 2025 2026 /** helper buffer to convert Strings to char arrays */ 2027 private char[] m_charBuffer = new char[0]; 2028 2029 /** true if the search for an object is lower case only with the key */ 2030 private final boolean m_lowerCaseOnly; 2031 2032 /** 2033 * Construct the trie that has a case insensitive search. 2034 */ 2035 public Trie() 2036 { 2037 m_Root = new Node(); 2038 m_lowerCaseOnly = false; 2039 } 2040 2041 /** 2042 * Construct the trie given the desired case sensitivity with the key. 2043 * @param lowerCaseOnly true if the search keys are to be loser case only, 2044 * not case insensitive. 2045 */ 2046 public Trie(boolean lowerCaseOnly) 2047 { 2048 m_Root = new Node(); 2049 m_lowerCaseOnly = lowerCaseOnly; 2050 } 2051 2052 /** 2053 * Put an object into the trie for lookup. 2054 * 2055 * @param key must be a 7-bit ASCII string 2056 * @param value any java object. 2057 * 2058 * @return The old object that matched key, or null. 2059 */ 2060 public Object put(String key, Object value) 2061 { 2062 2063 final int len = key.length(); 2064 if (len > m_charBuffer.length) 2065 { 2066 // make the biggest buffer ever needed in get(String) 2067 m_charBuffer = new char[len]; 2068 } 2069 2070 Node node = m_Root; 2071 2072 for (int i = 0; i < len; i++) 2073 { 2074 Node nextNode = 2075 node.m_nextChar[Character.toLowerCase(key.charAt(i))]; 2076 2077 if (nextNode != null) 2078 { 2079 node = nextNode; 2080 } 2081 else 2082 { 2083 for (; i < len; i++) 2084 { 2085 Node newNode = new Node(); 2086 if (m_lowerCaseOnly) 2087 { 2088 // put this value into the tree only with a lower case key 2089 node.m_nextChar[Character.toLowerCase( 2090 key.charAt(i))] = 2091 newNode; 2092 } 2093 else 2094 { 2095 // put this value into the tree with a case insensitive key 2096 node.m_nextChar[Character.toUpperCase( 2097 key.charAt(i))] = 2098 newNode; 2099 node.m_nextChar[Character.toLowerCase( 2100 key.charAt(i))] = 2101 newNode; 2102 } 2103 node = newNode; 2104 } 2105 break; 2106 } 2107 } 2108 2109 Object ret = node.m_Value; 2110 2111 node.m_Value = value; 2112 2113 return ret; 2114 } 2115 2116 /** 2117 * Get an object that matches the key. 2118 * 2119 * @param key must be a 7-bit ASCII string 2120 * 2121 * @return The object that matches the key, or null. 2122 */ 2123 public Object get(final String key) 2124 { 2125 2126 final int len = key.length(); 2127 2128 /* If the name is too long, we won't find it, this also keeps us 2129 * from overflowing m_charBuffer 2130 */ 2131 if (m_charBuffer.length < len) 2132 return null; 2133 2134 Node node = m_Root; 2135 switch (len) // optimize the look up based on the number of chars 2136 { 2137 // case 0 looks silly, but the generated bytecode runs 2138 // faster for lookup of elements of length 2 with this in 2139 // and a fair bit faster. Don't know why. 2140 case 0 : 2141 { 2142 return null; 2143 } 2144 2145 case 1 : 2146 { 2147 final char ch = key.charAt(0); 2148 if (ch < ALPHA_SIZE) 2149 { 2150 node = node.m_nextChar[ch]; 2151 if (node != null) 2152 return node.m_Value; 2153 } 2154 return null; 2155 } 2156 // comment out case 2 because the default is faster 2157 // case 2 : 2158 // { 2159 // final char ch0 = key.charAt(0); 2160 // final char ch1 = key.charAt(1); 2161 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) 2162 // { 2163 // node = node.m_nextChar[ch0]; 2164 // if (node != null) 2165 // { 2166 // 2167 // if (ch1 < ALPHA_SIZE) 2168 // { 2169 // node = node.m_nextChar[ch1]; 2170 // if (node != null) 2171 // return node.m_Value; 2172 // } 2173 // } 2174 // } 2175 // return null; 2176 // } 2177 default : 2178 { 2179 for (int i = 0; i < len; i++) 2180 { 2181 // A thread-safe way to loop over the characters 2182 final char ch = key.charAt(i); 2183 if (ALPHA_SIZE <= ch) 2184 { 2185 // the key is not 7-bit ASCII so we won't find it here 2186 return null; 2187 } 2188 2189 node = node.m_nextChar[ch]; 2190 if (node == null) 2191 return null; 2192 } 2193 2194 return node.m_Value; 2195 } 2196 } 2197 } 2198 2199 /** 2200 * The node representation for the trie. 2201 * @xsl.usage internal 2202 */ 2203 private class Node 2204 { 2205 2206 /** 2207 * Constructor, creates a Node[ALPHA_SIZE]. 2208 */ 2209 Node() 2210 { 2211 m_nextChar = new Node[ALPHA_SIZE]; 2212 m_Value = null; 2213 } 2214 2215 /** The next nodes. */ 2216 final Node m_nextChar[]; 2217 2218 /** The value. */ 2219 Object m_Value; 2220 } 2221 /** 2222 * Construct the trie from another Trie. 2223 * Both the existing Trie and this new one share the same table for 2224 * lookup, and it is assumed that the table is fully populated and 2225 * not changing anymore. 2226 * 2227 * @param existingTrie the Trie that this one is a copy of. 2228 */ 2229 public Trie(Trie existingTrie) 2230 { 2231 // copy some fields from the existing Trie into this one. 2232 m_Root = existingTrie.m_Root; 2233 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; 2234 2235 // get a buffer just big enough to hold the longest key in the table. 2236 int max = existingTrie.getLongestKeyLength(); 2237 m_charBuffer = new char[max]; 2238 } 2239 2240 /** 2241 * Get an object that matches the key. 2242 * This method is faster than get(), but is not thread-safe. 2243 * 2244 * @param key must be a 7-bit ASCII string 2245 * 2246 * @return The object that matches the key, or null. 2247 */ 2248 public Object get2(final String key) 2249 { 2250 2251 final int len = key.length(); 2252 2253 /* If the name is too long, we won't find it, this also keeps us 2254 * from overflowing m_charBuffer 2255 */ 2256 if (m_charBuffer.length < len) 2257 return null; 2258 2259 Node node = m_Root; 2260 switch (len) // optimize the look up based on the number of chars 2261 { 2262 // case 0 looks silly, but the generated bytecode runs 2263 // faster for lookup of elements of length 2 with this in 2264 // and a fair bit faster. Don't know why. 2265 case 0 : 2266 { 2267 return null; 2268 } 2269 2270 case 1 : 2271 { 2272 final char ch = key.charAt(0); 2273 if (ch < ALPHA_SIZE) 2274 { 2275 node = node.m_nextChar[ch]; 2276 if (node != null) 2277 return node.m_Value; 2278 } 2279 return null; 2280 } 2281 default : 2282 { 2283 /* Copy string into array. This is not thread-safe because 2284 * it modifies the contents of m_charBuffer. If multiple 2285 * threads were to use this Trie they all would be 2286 * using this same array (not good). So this 2287 * method is not thread-safe, but it is faster because 2288 * converting to a char[] and looping over elements of 2289 * the array is faster than a String's charAt(i). 2290 */ 2291 key.getChars(0, len, m_charBuffer, 0); 2292 2293 for (int i = 0; i < len; i++) 2294 { 2295 final char ch = m_charBuffer[i]; 2296 if (ALPHA_SIZE <= ch) 2297 { 2298 // the key is not 7-bit ASCII so we won't find it here 2299 return null; 2300 } 2301 2302 node = node.m_nextChar[ch]; 2303 if (node == null) 2304 return null; 2305 } 2306 2307 return node.m_Value; 2308 } 2309 } 2310 } 2311 2312 /** 2313 * Get the length of the longest key used in the table. 2314 */ 2315 public int getLongestKeyLength() 2316 { 2317 return m_charBuffer.length; 2318 } 2319 } 2320 }