1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one 7 * or more contributor license agreements. See the NOTICE file 8 * distributed with this work for additional information 9 * regarding copyright ownership. The ASF licenses this file 10 * to you under the Apache License, Version 2.0 (the "License"); 11 * you may not use this file except in compliance with the License. 12 * You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 */ 22 /* 23 * $Id: ToHTMLStream.java,v 1.2.4.1 2005/09/15 08:15:26 suresh_emailid Exp $ 24 */ 25 package com.sun.org.apache.xml.internal.serializer; 26 27 import java.io.IOException; 28 import java.io.OutputStream; 29 import java.io.UnsupportedEncodingException; 30 import java.util.Properties; 31 32 import javax.xml.transform.Result; 33 34 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 35 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 36 import org.xml.sax.Attributes; 37 import org.xml.sax.SAXException; 38 39 /** 40 * This serializer takes a series of SAX or 41 * SAX-like events and writes its output 42 * to the given stream. 43 * 44 * This class is not a public API, it is public 45 * because it is used from another package. 46 * 47 * @xsl.usage internal 48 */ 49 public final class ToHTMLStream extends ToStream 50 { 51 52 /** This flag is set while receiving events from the DTD */ 53 protected boolean m_inDTD = false; 54 55 /** True if the current element is a block element. (seems like 56 * this needs to be a stack. -sb). */ 57 private boolean m_inBlockElem = false; 58 59 /** 60 * Map that tells which XML characters should have special treatment, and it 61 * provides character to entity name lookup. 62 */ 63 private static final CharInfo m_htmlcharInfo = 64 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); 65 CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); 66 67 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ 68 static final Trie m_elementFlags = new Trie(); 69 70 static { 71 initTagReference(m_elementFlags); 72 } 73 static void initTagReference(Trie m_elementFlags) { 74 75 // HTML 4.0 loose DTD 76 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); 77 m_elementFlags.put( 78 "FRAME", 79 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 80 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); 81 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); 82 m_elementFlags.put( 83 "ISINDEX", 84 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 85 m_elementFlags.put( 86 "APPLET", 87 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); 88 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); 89 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); 90 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); 91 92 // HTML 4.0 strict DTD 93 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 94 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 95 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 96 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 97 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 98 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); 99 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); 100 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); 101 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); 102 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); 103 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); 104 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); 105 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); 106 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); 107 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); 108 m_elementFlags.put( 109 "SUP", 110 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 111 m_elementFlags.put( 112 "SUB", 113 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 114 m_elementFlags.put( 115 "SPAN", 116 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 117 m_elementFlags.put( 118 "BDO", 119 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 120 m_elementFlags.put( 121 "BR", 122 new ElemDesc( 123 0 124 | ElemDesc.SPECIAL 125 | ElemDesc.ASPECIAL 126 | ElemDesc.EMPTY 127 | ElemDesc.BLOCK)); 128 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); 129 m_elementFlags.put( 130 "ADDRESS", 131 new ElemDesc( 132 0 133 | ElemDesc.BLOCK 134 | ElemDesc.BLOCKFORM 135 | ElemDesc.BLOCKFORMFIELDSET)); 136 m_elementFlags.put( 137 "DIV", 138 new ElemDesc( 139 0 140 | ElemDesc.BLOCK 141 | ElemDesc.BLOCKFORM 142 | ElemDesc.BLOCKFORMFIELDSET)); 143 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); 144 m_elementFlags.put( 145 "MAP", 146 new ElemDesc( 147 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); 148 m_elementFlags.put( 149 "AREA", 150 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 151 m_elementFlags.put( 152 "LINK", 153 new ElemDesc( 154 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 155 m_elementFlags.put( 156 "IMG", 157 new ElemDesc( 158 0 159 | ElemDesc.SPECIAL 160 | ElemDesc.ASPECIAL 161 | ElemDesc.EMPTY 162 | ElemDesc.WHITESPACESENSITIVE)); 163 m_elementFlags.put( 164 "OBJECT", 165 new ElemDesc( 166 0 167 | ElemDesc.SPECIAL 168 | ElemDesc.ASPECIAL 169 | ElemDesc.HEADMISC 170 | ElemDesc.WHITESPACESENSITIVE)); 171 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); 172 m_elementFlags.put( 173 "HR", 174 new ElemDesc( 175 0 176 | ElemDesc.BLOCK 177 | ElemDesc.BLOCKFORM 178 | ElemDesc.BLOCKFORMFIELDSET 179 | ElemDesc.EMPTY)); 180 m_elementFlags.put( 181 "P", 182 new ElemDesc( 183 0 184 | ElemDesc.BLOCK 185 | ElemDesc.BLOCKFORM 186 | ElemDesc.BLOCKFORMFIELDSET)); 187 m_elementFlags.put( 188 "H1", 189 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 190 m_elementFlags.put( 191 "H2", 192 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 193 m_elementFlags.put( 194 "H3", 195 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 196 m_elementFlags.put( 197 "H4", 198 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 199 m_elementFlags.put( 200 "H5", 201 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 202 m_elementFlags.put( 203 "H6", 204 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 205 m_elementFlags.put( 206 "PRE", 207 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); 208 m_elementFlags.put( 209 "Q", 210 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 211 m_elementFlags.put( 212 "BLOCKQUOTE", 213 new ElemDesc( 214 0 215 | ElemDesc.BLOCK 216 | ElemDesc.BLOCKFORM 217 | ElemDesc.BLOCKFORMFIELDSET)); 218 m_elementFlags.put("INS", new ElemDesc(0)); 219 m_elementFlags.put("DEL", new ElemDesc(0)); 220 m_elementFlags.put( 221 "DL", 222 new ElemDesc( 223 0 224 | ElemDesc.BLOCK 225 | ElemDesc.BLOCKFORM 226 | ElemDesc.BLOCKFORMFIELDSET)); 227 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); 228 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); 229 m_elementFlags.put( 230 "OL", 231 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 232 m_elementFlags.put( 233 "UL", 234 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 235 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); 236 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); 237 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); 238 m_elementFlags.put( 239 "INPUT", 240 new ElemDesc( 241 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); 242 m_elementFlags.put( 243 "SELECT", 244 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 245 m_elementFlags.put("OPTGROUP", new ElemDesc(0)); 246 m_elementFlags.put("OPTION", new ElemDesc(0)); 247 m_elementFlags.put( 248 "TEXTAREA", 249 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 250 m_elementFlags.put( 251 "FIELDSET", 252 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); 253 m_elementFlags.put("LEGEND", new ElemDesc(0)); 254 m_elementFlags.put( 255 "BUTTON", 256 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 257 m_elementFlags.put( 258 "TABLE", 259 new ElemDesc( 260 0 261 | ElemDesc.BLOCK 262 | ElemDesc.BLOCKFORM 263 | ElemDesc.BLOCKFORMFIELDSET)); 264 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); 265 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); 266 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); 267 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); 268 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); 269 m_elementFlags.put( 270 "COL", 271 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 272 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); 273 m_elementFlags.put("TH", new ElemDesc(0)); 274 m_elementFlags.put("TD", new ElemDesc(0)); 275 m_elementFlags.put( 276 "HEAD", 277 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); 278 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); 279 m_elementFlags.put( 280 "BASE", 281 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 282 m_elementFlags.put( 283 "META", 284 new ElemDesc( 285 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 286 m_elementFlags.put( 287 "STYLE", 288 new ElemDesc( 289 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); 290 m_elementFlags.put( 291 "SCRIPT", 292 new ElemDesc( 293 0 294 | ElemDesc.SPECIAL 295 | ElemDesc.ASPECIAL 296 | ElemDesc.HEADMISC 297 | ElemDesc.RAW)); 298 m_elementFlags.put( 299 "NOSCRIPT", 300 new ElemDesc( 301 0 302 | ElemDesc.BLOCK 303 | ElemDesc.BLOCKFORM 304 | ElemDesc.BLOCKFORMFIELDSET)); 305 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); 306 307 // From "John Ky" <hand@syd.speednet.com.au 308 // Transitional Document Type Definition () 309 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont 310 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 311 312 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE 313 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 314 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 315 316 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U 317 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 318 319 // From "John Ky" <hand@syd.speednet.com.au 320 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 321 322 // HTML 4.0, section 16.5 323 m_elementFlags.put( 324 "IFRAME", 325 new ElemDesc( 326 0 327 | ElemDesc.BLOCK 328 | ElemDesc.BLOCKFORM 329 | ElemDesc.BLOCKFORMFIELDSET)); 330 331 // Netscape 4 extension 332 m_elementFlags.put( 333 "LAYER", 334 new ElemDesc( 335 0 336 | ElemDesc.BLOCK 337 | ElemDesc.BLOCKFORM 338 | ElemDesc.BLOCKFORMFIELDSET)); 339 // Netscape 4 extension 340 m_elementFlags.put( 341 "ILAYER", 342 new ElemDesc( 343 0 344 | ElemDesc.BLOCK 345 | ElemDesc.BLOCKFORM 346 | ElemDesc.BLOCKFORMFIELDSET)); 347 348 349 // NOW FOR ATTRIBUTE INFORMATION . . . 350 ElemDesc elemDesc; 351 352 353 // ---------------------------------------------- 354 elemDesc = (ElemDesc) m_elementFlags.get("a"); 355 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 356 elemDesc.setAttr("NAME", ElemDesc.ATTRURL); 357 358 // ---------------------------------------------- 359 elemDesc = (ElemDesc) m_elementFlags.get("area"); 360 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 361 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); 362 363 // ---------------------------------------------- 364 elemDesc = (ElemDesc) m_elementFlags.get("base"); 365 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 366 367 // ---------------------------------------------- 368 elemDesc = (ElemDesc) m_elementFlags.get("button"); 369 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 370 371 // ---------------------------------------------- 372 elemDesc = (ElemDesc) m_elementFlags.get("blockquote"); 373 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 374 375 // ---------------------------------------------- 376 elemDesc = (ElemDesc) m_elementFlags.get("del"); 377 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 378 379 // ---------------------------------------------- 380 elemDesc = (ElemDesc) m_elementFlags.get("dir"); 381 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 382 383 // ---------------------------------------------- 384 385 elemDesc = (ElemDesc) m_elementFlags.get("div"); 386 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension 387 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 388 389 // ---------------------------------------------- 390 elemDesc = (ElemDesc) m_elementFlags.get("dl"); 391 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 392 393 // ---------------------------------------------- 394 elemDesc = (ElemDesc) m_elementFlags.get("form"); 395 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); 396 397 // ---------------------------------------------- 398 // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> 399 elemDesc = (ElemDesc) m_elementFlags.get("frame"); 400 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 401 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 402 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); 403 404 // ---------------------------------------------- 405 elemDesc = (ElemDesc) m_elementFlags.get("head"); 406 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); 407 408 // ---------------------------------------------- 409 elemDesc = (ElemDesc) m_elementFlags.get("hr"); 410 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); 411 412 // ---------------------------------------------- 413 // HTML 4.0, section 16.5 414 elemDesc = (ElemDesc) m_elementFlags.get("iframe"); 415 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 416 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 417 418 // ---------------------------------------------- 419 // Netscape 4 extension 420 elemDesc = (ElemDesc) m_elementFlags.get("ilayer"); 421 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 422 423 // ---------------------------------------------- 424 elemDesc = (ElemDesc) m_elementFlags.get("img"); 425 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 426 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 427 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 428 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 429 430 // ---------------------------------------------- 431 elemDesc = (ElemDesc) m_elementFlags.get("input"); 432 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 433 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 434 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); 435 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 436 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 437 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 438 439 // ---------------------------------------------- 440 elemDesc = (ElemDesc) m_elementFlags.get("ins"); 441 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 442 443 // ---------------------------------------------- 444 // Netscape 4 extension 445 elemDesc = (ElemDesc) m_elementFlags.get("layer"); 446 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 447 448 // ---------------------------------------------- 449 elemDesc = (ElemDesc) m_elementFlags.get("link"); 450 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 451 452 // ---------------------------------------------- 453 elemDesc = (ElemDesc) m_elementFlags.get("menu"); 454 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 455 456 // ---------------------------------------------- 457 elemDesc = (ElemDesc) m_elementFlags.get("object"); 458 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); 459 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); 460 elemDesc.setAttr("DATA", ElemDesc.ATTRURL); 461 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); 462 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 463 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); 464 465 // ---------------------------------------------- 466 elemDesc = (ElemDesc) m_elementFlags.get("ol"); 467 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 468 469 // ---------------------------------------------- 470 elemDesc = (ElemDesc) m_elementFlags.get("optgroup"); 471 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 472 473 // ---------------------------------------------- 474 elemDesc = (ElemDesc) m_elementFlags.get("option"); 475 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); 476 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 477 478 // ---------------------------------------------- 479 elemDesc = (ElemDesc) m_elementFlags.get("q"); 480 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 481 482 // ---------------------------------------------- 483 elemDesc = (ElemDesc) m_elementFlags.get("script"); 484 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 485 elemDesc.setAttr("FOR", ElemDesc.ATTRURL); 486 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); 487 488 // ---------------------------------------------- 489 elemDesc = (ElemDesc) m_elementFlags.get("select"); 490 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 491 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); 492 493 // ---------------------------------------------- 494 elemDesc = (ElemDesc) m_elementFlags.get("table"); 495 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 496 497 // ---------------------------------------------- 498 elemDesc = (ElemDesc) m_elementFlags.get("td"); 499 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 500 501 // ---------------------------------------------- 502 elemDesc = (ElemDesc) m_elementFlags.get("textarea"); 503 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 504 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 505 506 // ---------------------------------------------- 507 elemDesc = (ElemDesc) m_elementFlags.get("th"); 508 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 509 510 // ---------------------------------------------- 511 // The nowrap attribute of a tr element is both 512 // a Netscape and Internet-Explorer extension 513 elemDesc = (ElemDesc) m_elementFlags.get("tr"); 514 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 515 516 // ---------------------------------------------- 517 elemDesc = (ElemDesc) m_elementFlags.get("ul"); 518 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 519 } 520 521 /** 522 * Dummy element for elements not found. 523 */ 524 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); 525 526 /** True if URLs should be specially escaped with the %xx form. */ 527 private boolean m_specialEscapeURLs = true; 528 529 /** True if the META tag should be omitted. */ 530 private boolean m_omitMetaTag = false; 531 532 /** 533 * Tells if the formatter should use special URL escaping. 534 * 535 * @param bool True if URLs should be specially escaped with the %xx form. 536 */ 537 public void setSpecialEscapeURLs(boolean bool) 538 { 539 m_specialEscapeURLs = bool; 540 } 541 542 /** 543 * Tells if the formatter should omit the META tag. 544 * 545 * @param bool True if the META tag should be omitted. 546 */ 547 public void setOmitMetaTag(boolean bool) 548 { 549 m_omitMetaTag = bool; 550 } 551 552 /** 553 * Specifies an output format for this serializer. It the 554 * serializer has already been associated with an output format, 555 * it will switch to the new format. This method should not be 556 * called while the serializer is in the process of serializing 557 * a document. 558 * 559 * This method can be called multiple times before starting 560 * the serialization of a particular result-tree. In principle 561 * all serialization parameters can be changed, with the exception 562 * of method="html" (it must be method="html" otherwise we 563 * shouldn't even have a ToHTMLStream object here!) 564 * 565 * @param format The output format or serialzation parameters 566 * to use. 567 */ 568 public void setOutputFormat(Properties format) 569 { 570 571 m_specialEscapeURLs = 572 OutputPropertyUtils.getBooleanProperty( 573 OutputPropertiesFactory.S_USE_URL_ESCAPING, 574 format); 575 576 m_omitMetaTag = 577 OutputPropertyUtils.getBooleanProperty( 578 OutputPropertiesFactory.S_OMIT_META_TAG, 579 format); 580 581 super.setOutputFormat(format); 582 } 583 584 /** 585 * Tells if the formatter should use special URL escaping. 586 * 587 * @return True if URLs should be specially escaped with the %xx form. 588 */ 589 private final boolean getSpecialEscapeURLs() 590 { 591 return m_specialEscapeURLs; 592 } 593 594 /** 595 * Tells if the formatter should omit the META tag. 596 * 597 * @return True if the META tag should be omitted. 598 */ 599 private final boolean getOmitMetaTag() 600 { 601 return m_omitMetaTag; 602 } 603 604 /** 605 * Get a description of the given element. 606 * 607 * @param name non-null name of element, case insensitive. 608 * 609 * @return non-null reference to ElemDesc, which may be m_dummy if no 610 * element description matches the given name. 611 */ 612 public static final ElemDesc getElemDesc(String name) 613 { 614 /* this method used to return m_dummy when name was null 615 * but now it doesn't check and and requires non-null name. 616 */ 617 Object obj = m_elementFlags.get(name); 618 if (null != obj) 619 return (ElemDesc)obj; 620 return m_dummy; 621 } 622 623 /** 624 * A Trie that is just a copy of the "static" one. 625 * We need this one to be able to use the faster, but not thread-safe 626 * method Trie.get2(name) 627 */ 628 private Trie m_htmlInfo = new Trie(m_elementFlags); 629 /** 630 * Calls to this method could be replaced with calls to 631 * getElemDesc(name), but this one should be faster. 632 */ 633 private ElemDesc getElemDesc2(String name) 634 { 635 Object obj = m_htmlInfo.get2(name); 636 if (null != obj) 637 return (ElemDesc)obj; 638 return m_dummy; 639 } 640 641 /** 642 * Default constructor. 643 */ 644 public ToHTMLStream() 645 { 646 647 super(); 648 m_charInfo = m_htmlcharInfo; 649 // initialize namespaces 650 m_prefixMap = new NamespaceMappings(); 651 652 } 653 654 /** The name of the current element. */ 655 // private String m_currentElementName = null; 656 657 /** 658 * Receive notification of the beginning of a document. 659 * 660 * @throws org.xml.sax.SAXException Any SAX exception, possibly 661 * wrapping another exception. 662 * 663 * @throws org.xml.sax.SAXException 664 */ 665 protected void startDocumentInternal() throws org.xml.sax.SAXException 666 { 667 super.startDocumentInternal(); 668 669 m_needToCallStartDocument = false; 670 m_needToOutputDocTypeDecl = true; 671 m_startNewLine = false; 672 setOmitXMLDeclaration(true); 673 674 if (true == m_needToOutputDocTypeDecl) 675 { 676 String doctypeSystem = getDoctypeSystem(); 677 String doctypePublic = getDoctypePublic(); 678 if ((null != doctypeSystem) || (null != doctypePublic)) 679 { 680 final java.io.Writer writer = m_writer; 681 try 682 { 683 writer.write("<!DOCTYPE html"); 684 685 if (null != doctypePublic) 686 { 687 writer.write(" PUBLIC \""); 688 writer.write(doctypePublic); 689 writer.write('"'); 690 } 691 692 if (null != doctypeSystem) 693 { 694 if (null == doctypePublic) 695 writer.write(" SYSTEM \""); 696 else 697 writer.write(" \""); 698 699 writer.write(doctypeSystem); 700 writer.write('"'); 701 } 702 703 writer.write('>'); 704 outputLineSep(); 705 } 706 catch(IOException e) 707 { 708 throw new SAXException(e); 709 } 710 } 711 } 712 713 m_needToOutputDocTypeDecl = false; 714 } 715 716 /** 717 * Receive notification of the end of a document. 718 * 719 * @throws org.xml.sax.SAXException Any SAX exception, possibly 720 * wrapping another exception. 721 * 722 * @throws org.xml.sax.SAXException 723 */ 724 public final void endDocument() throws org.xml.sax.SAXException 725 { 726 727 flushPending(); 728 if (m_doIndent && !m_isprevtext) 729 { 730 try 731 { 732 outputLineSep(); 733 } 734 catch(IOException e) 735 { 736 throw new SAXException(e); 737 } 738 } 739 740 flushWriter(); 741 if (m_tracer != null) 742 super.fireEndDoc(); 743 } 744 745 /** 746 * Receive notification of the beginning of an element. 747 * 748 * 749 * @param namespaceURI 750 * @param localName 751 * @param name The element type name. 752 * @param atts The attributes attached to the element, if any. 753 * @throws org.xml.sax.SAXException Any SAX exception, possibly 754 * wrapping another exception. 755 * @see #endElement 756 * @see org.xml.sax.AttributeList 757 */ 758 public void startElement( 759 String namespaceURI, 760 String localName, 761 String name, 762 Attributes atts) 763 throws org.xml.sax.SAXException 764 { 765 766 ElemContext elemContext = m_elemContext; 767 768 // clean up any pending things first 769 if (elemContext.m_startTagOpen) 770 { 771 closeStartTag(); 772 elemContext.m_startTagOpen = false; 773 } 774 else if (m_cdataTagOpen) 775 { 776 closeCDATA(); 777 m_cdataTagOpen = false; 778 } 779 else if (m_needToCallStartDocument) 780 { 781 startDocumentInternal(); 782 m_needToCallStartDocument = false; 783 } 784 785 786 // if this element has a namespace then treat it like XML 787 if (null != namespaceURI && namespaceURI.length() > 0) 788 { 789 super.startElement(namespaceURI, localName, name, atts); 790 791 return; 792 } 793 794 try 795 { 796 // getElemDesc2(name) is faster than getElemDesc(name) 797 ElemDesc elemDesc = getElemDesc2(name); 798 int elemFlags = elemDesc.getFlags(); 799 800 // deal with indentation issues first 801 if (m_doIndent) 802 { 803 804 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 805 if (m_ispreserve) 806 m_ispreserve = false; 807 else if ( 808 (null != elemContext.m_elementName) 809 && (!m_inBlockElem 810 || isBlockElement) /* && !isWhiteSpaceSensitive */ 811 ) 812 { 813 m_startNewLine = true; 814 815 indent(); 816 817 } 818 m_inBlockElem = !isBlockElement; 819 } 820 821 // save any attributes for later processing 822 if (atts != null) 823 addAttributes(atts); 824 825 m_isprevtext = false; 826 final java.io.Writer writer = m_writer; 827 writer.write('<'); 828 writer.write(name); 829 830 831 832 if (m_tracer != null) 833 firePseudoAttributes(); 834 835 if ((elemFlags & ElemDesc.EMPTY) != 0) 836 { 837 // an optimization for elements which are expected 838 // to be empty. 839 m_elemContext = elemContext.push(); 840 /* XSLTC sometimes calls namespaceAfterStartElement() 841 * so we need to remember the name 842 */ 843 m_elemContext.m_elementName = name; 844 m_elemContext.m_elementDesc = elemDesc; 845 return; 846 } 847 else 848 { 849 elemContext = elemContext.push(namespaceURI,localName,name); 850 m_elemContext = elemContext; 851 elemContext.m_elementDesc = elemDesc; 852 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; 853 } 854 855 856 if ((elemFlags & ElemDesc.HEADELEM) != 0) 857 { 858 // This is the <HEAD> element, do some special processing 859 closeStartTag(); 860 elemContext.m_startTagOpen = false; 861 if (!m_omitMetaTag) 862 { 863 if (m_doIndent) 864 indent(); 865 writer.write( 866 "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); 867 String encoding = getEncoding(); 868 String encode = Encodings.getMimeEncoding(encoding); 869 writer.write(encode); 870 writer.write("\">"); 871 } 872 } 873 } 874 catch (IOException e) 875 { 876 throw new SAXException(e); 877 } 878 } 879 880 /** 881 * Receive notification of the end of an element. 882 * 883 * 884 * @param namespaceURI 885 * @param localName 886 * @param name The element type name 887 * @throws org.xml.sax.SAXException Any SAX exception, possibly 888 * wrapping another exception. 889 */ 890 public final void endElement( 891 final String namespaceURI, 892 final String localName, 893 final String name) 894 throws org.xml.sax.SAXException 895 { 896 // deal with any pending issues 897 if (m_cdataTagOpen) 898 closeCDATA(); 899 900 // if the element has a namespace, treat it like XML, not HTML 901 if (null != namespaceURI && namespaceURI.length() > 0) 902 { 903 super.endElement(namespaceURI, localName, name); 904 905 return; 906 } 907 908 try 909 { 910 911 ElemContext elemContext = m_elemContext; 912 final ElemDesc elemDesc = elemContext.m_elementDesc; 913 final int elemFlags = elemDesc.getFlags(); 914 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; 915 916 // deal with any indentation issues 917 if (m_doIndent) 918 { 919 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; 920 boolean shouldIndent = false; 921 922 if (m_ispreserve) 923 { 924 m_ispreserve = false; 925 } 926 else if (m_doIndent && (!m_inBlockElem || isBlockElement)) 927 { 928 m_startNewLine = true; 929 shouldIndent = true; 930 } 931 if (!elemContext.m_startTagOpen && shouldIndent) 932 indent(elemContext.m_currentElemDepth - 1); 933 m_inBlockElem = !isBlockElement; 934 } 935 936 final java.io.Writer writer = m_writer; 937 if (!elemContext.m_startTagOpen) 938 { 939 writer.write("</"); 940 writer.write(name); 941 writer.write('>'); 942 } 943 else 944 { 945 // the start-tag open when this method was called, 946 // so we need to process it now. 947 948 if (m_tracer != null) 949 super.fireStartElem(name); 950 951 // the starting tag was still open when we received this endElement() call 952 // so we need to process any gathered attributes NOW, before they go away. 953 int nAttrs = m_attributes.getLength(); 954 if (nAttrs > 0) 955 { 956 processAttributes(m_writer, nAttrs); 957 // clear attributes object for re-use with next element 958 m_attributes.clear(); 959 } 960 if (!elemEmpty) 961 { 962 // As per Dave/Paul recommendation 12/06/2000 963 // if (shouldIndent) 964 // writer.write('>'); 965 // indent(m_currentIndent); 966 967 writer.write("></"); 968 writer.write(name); 969 writer.write('>'); 970 } 971 else 972 { 973 writer.write('>'); 974 } 975 } 976 977 // clean up because the element has ended 978 if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0) 979 m_ispreserve = true; 980 m_isprevtext = false; 981 982 // fire off the end element event 983 if (m_tracer != null) 984 super.fireEndElem(name); 985 986 // OPTIMIZE-EMPTY 987 if (elemEmpty) 988 { 989 // a quick exit if the HTML element had no children. 990 // This block of code can be removed if the corresponding block of code 991 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed 992 m_elemContext = elemContext.m_prev; 993 return; 994 } 995 996 // some more clean because the element has ended. 997 if (!elemContext.m_startTagOpen) 998 { 999 if (m_doIndent && !m_preserves.isEmpty()) 1000 m_preserves.pop(); 1001 } 1002 m_elemContext = elemContext.m_prev; 1003 // m_isRawStack.pop(); 1004 } 1005 catch (IOException e) 1006 { 1007 throw new SAXException(e); 1008 } 1009 } 1010 1011 /** 1012 * Process an attribute. 1013 * @param writer The writer to write the processed output to. 1014 * @param name The name of the attribute. 1015 * @param value The value of the attribute. 1016 * @param elemDesc The description of the HTML element 1017 * that has this attribute. 1018 * 1019 * @throws org.xml.sax.SAXException 1020 */ 1021 protected void processAttribute( 1022 java.io.Writer writer, 1023 String name, 1024 String value, 1025 ElemDesc elemDesc) 1026 throws IOException 1027 { 1028 writer.write(' '); 1029 1030 if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) 1031 && elemDesc != null 1032 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) 1033 { 1034 writer.write(name); 1035 } 1036 else 1037 { 1038 // %REVIEW% %OPT% 1039 // Two calls to single-char write may NOT 1040 // be more efficient than one to string-write... 1041 writer.write(name); 1042 writer.write("=\""); 1043 if ( elemDesc != null 1044 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) 1045 writeAttrURI(writer, value, m_specialEscapeURLs); 1046 else 1047 writeAttrString(writer, value, this.getEncoding()); 1048 writer.write('"'); 1049 1050 } 1051 } 1052 1053 /** 1054 * Tell if a character is an ASCII digit. 1055 */ 1056 private boolean isASCIIDigit(char c) 1057 { 1058 return (c >= '0' && c <= '9'); 1059 } 1060 1061 /** 1062 * Make an integer into an HH hex value. 1063 * Does no checking on the size of the input, since this 1064 * is only meant to be used locally by writeAttrURI. 1065 * 1066 * @param i must be a value less than 255. 1067 * 1068 * @return should be a two character string. 1069 */ 1070 private static String makeHHString(int i) 1071 { 1072 String s = Integer.toHexString(i).toUpperCase(); 1073 if (s.length() == 1) 1074 { 1075 s = "0" + s; 1076 } 1077 return s; 1078 } 1079 1080 /** 1081 * Dmitri Ilyin: Makes sure if the String is HH encoded sign. 1082 * @param str must be 2 characters long 1083 * 1084 * @return true or false 1085 */ 1086 private boolean isHHSign(String str) 1087 { 1088 boolean sign = true; 1089 try 1090 { 1091 char r = (char) Integer.parseInt(str, 16); 1092 } 1093 catch (NumberFormatException e) 1094 { 1095 sign = false; 1096 } 1097 return sign; 1098 } 1099 1100 /** 1101 * Write the specified <var>string</var> after substituting non ASCII characters, 1102 * with <CODE>%HH</CODE>, where HH is the hex of the byte value. 1103 * 1104 * @param string String to convert to XML format. 1105 * @param doURLEscaping True if we should try to encode as 1106 * per http://www.ietf.org/rfc/rfc2396.txt. 1107 * 1108 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. 1109 */ 1110 public void writeAttrURI( 1111 final java.io.Writer writer, String string, boolean doURLEscaping) 1112 throws IOException 1113 { 1114 // http://www.ietf.org/rfc/rfc2396.txt says: 1115 // A URI is always in an "escaped" form, since escaping or unescaping a 1116 // completed URI might change its semantics. Normally, the only time 1117 // escape encodings can safely be made is when the URI is being created 1118 // from its component parts; each component may have its own set of 1119 // characters that are reserved, so only the mechanism responsible for 1120 // generating or interpreting that component can determine whether or 1121 // not escaping a character will change its semantics. Likewise, a URI 1122 // must be separated into its components before the escaped characters 1123 // within those components can be safely decoded. 1124 // 1125 // ...So we do our best to do limited escaping of the URL, without 1126 // causing damage. If the URL is already properly escaped, in theory, this 1127 // function should not change the string value. 1128 1129 final int end = string.length(); 1130 if (end > m_attrBuff.length) 1131 { 1132 m_attrBuff = new char[end*2 + 1]; 1133 } 1134 string.getChars(0,end, m_attrBuff, 0); 1135 final char[] chars = m_attrBuff; 1136 1137 int cleanStart = 0; 1138 int cleanLength = 0; 1139 1140 1141 char ch = 0; 1142 for (int i = 0; i < end; i++) 1143 { 1144 ch = chars[i]; 1145 1146 if ((ch < 32) || (ch > 126)) 1147 { 1148 if (cleanLength > 0) 1149 { 1150 writer.write(chars, cleanStart, cleanLength); 1151 cleanLength = 0; 1152 } 1153 if (doURLEscaping) 1154 { 1155 // Encode UTF16 to UTF8. 1156 // Reference is Unicode, A Primer, by Tony Graham. 1157 // Page 92. 1158 1159 // Note that Kay doesn't escape 0x20... 1160 // if(ch == 0x20) // Not sure about this... -sb 1161 // { 1162 // writer.write(ch); 1163 // } 1164 // else 1165 if (ch <= 0x7F) 1166 { 1167 writer.write('%'); 1168 writer.write(makeHHString(ch)); 1169 } 1170 else if (ch <= 0x7FF) 1171 { 1172 // Clear low 6 bits before rotate, put high 4 bits in low byte, 1173 // and set two high bits. 1174 int high = (ch >> 6) | 0xC0; 1175 int low = (ch & 0x3F) | 0x80; 1176 // First 6 bits, + high bit 1177 writer.write('%'); 1178 writer.write(makeHHString(high)); 1179 writer.write('%'); 1180 writer.write(makeHHString(low)); 1181 } 1182 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate 1183 { 1184 // I'm sure this can be done in 3 instructions, but I choose 1185 // to try and do it exactly like it is done in the book, at least 1186 // until we are sure this is totally clean. I don't think performance 1187 // is a big issue with this particular function, though I could be 1188 // wrong. Also, the stuff below clearly does more masking than 1189 // it needs to do. 1190 1191 // Clear high 6 bits. 1192 int highSurrogate = ((int) ch) & 0x03FF; 1193 1194 // Middle 4 bits (wwww) + 1 1195 // "Note that the value of wwww from the high surrogate bit pattern 1196 // is incremented to make the uuuuu bit pattern in the scalar value 1197 // so the surrogate pair don't address the BMP." 1198 int wwww = ((highSurrogate & 0x03C0) >> 6); 1199 int uuuuu = wwww + 1; 1200 1201 // next 4 bits 1202 int zzzz = (highSurrogate & 0x003C) >> 2; 1203 1204 // low 2 bits 1205 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; 1206 1207 // Get low surrogate character. 1208 ch = chars[++i]; 1209 1210 // Clear high 6 bits. 1211 int lowSurrogate = ((int) ch) & 0x03FF; 1212 1213 // put the middle 4 bits into the bottom of yyyyyy (byte 3) 1214 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); 1215 1216 // bottom 6 bits. 1217 int xxxxxx = (lowSurrogate & 0x003F); 1218 1219 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu 1220 int byte2 = 1221 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; 1222 int byte3 = 0x80 | yyyyyy; 1223 int byte4 = 0x80 | xxxxxx; 1224 1225 writer.write('%'); 1226 writer.write(makeHHString(byte1)); 1227 writer.write('%'); 1228 writer.write(makeHHString(byte2)); 1229 writer.write('%'); 1230 writer.write(makeHHString(byte3)); 1231 writer.write('%'); 1232 writer.write(makeHHString(byte4)); 1233 } 1234 else 1235 { 1236 int high = (ch >> 12) | 0xE0; // top 4 bits 1237 int middle = ((ch & 0x0FC0) >> 6) | 0x80; 1238 // middle 6 bits 1239 int low = (ch & 0x3F) | 0x80; 1240 // First 6 bits, + high bit 1241 writer.write('%'); 1242 writer.write(makeHHString(high)); 1243 writer.write('%'); 1244 writer.write(makeHHString(middle)); 1245 writer.write('%'); 1246 writer.write(makeHHString(low)); 1247 } 1248 1249 } 1250 else if (escapingNotNeeded(ch)) 1251 { 1252 writer.write(ch); 1253 } 1254 else 1255 { 1256 writer.write("&#"); 1257 writer.write(Integer.toString(ch)); 1258 writer.write(';'); 1259 } 1260 // In this character range we have first written out any previously accumulated 1261 // "clean" characters, then processed the current more complicated character, 1262 // which may have incremented "i". 1263 // We now we reset the next possible clean character. 1264 cleanStart = i + 1; 1265 } 1266 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as 1267 // not allowing quotes in the URI proper syntax, nor in the fragment 1268 // identifier, we believe that it's OK to double escape quotes. 1269 else if (ch == '"') 1270 { 1271 // If the character is a '%' number number, try to avoid double-escaping. 1272 // There is a question if this is legal behavior. 1273 1274 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded 1275 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. 1276 1277 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) 1278 1279 // We are no longer escaping '%' 1280 1281 if (cleanLength > 0) 1282 { 1283 writer.write(chars, cleanStart, cleanLength); 1284 cleanLength = 0; 1285 } 1286 1287 1288 // Mike Kay encodes this as ", so he may know something I don't? 1289 if (doURLEscaping) 1290 writer.write("%22"); 1291 else 1292 writer.write("""); // we have to escape this, I guess. 1293 1294 // We have written out any clean characters, then the escaped '%' and now we 1295 // We now we reset the next possible clean character. 1296 cleanStart = i + 1; 1297 } 1298 else if (ch == '&') 1299 { 1300 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) 1301 // instead of "&" to avoid confusion with the beginning of a character 1302 // reference (entity reference open delimiter). 1303 if (cleanLength > 0) 1304 { 1305 writer.write(chars, cleanStart, cleanLength); 1306 cleanLength = 0; 1307 } 1308 writer.write("&"); 1309 cleanStart = i + 1; 1310 } 1311 else 1312 { 1313 // no processing for this character, just count how 1314 // many characters in a row that we have that need no processing 1315 cleanLength++; 1316 } 1317 } 1318 1319 // are there any clean characters at the end of the array 1320 // that we haven't processed yet? 1321 if (cleanLength > 1) 1322 { 1323 // if the whole string can be written out as-is do so 1324 // otherwise write out the clean chars at the end of the 1325 // array 1326 if (cleanStart == 0) 1327 writer.write(string); 1328 else 1329 writer.write(chars, cleanStart, cleanLength); 1330 } 1331 else if (cleanLength == 1) 1332 { 1333 // a little optimization for 1 clean character 1334 // (we could have let the previous if(...) handle them all) 1335 writer.write(ch); 1336 } 1337 } 1338 1339 /** 1340 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, 1341 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. 1342 * 1343 * @param string String to convert to XML format. 1344 * @param encoding CURRENTLY NOT IMPLEMENTED. 1345 * 1346 * @throws org.xml.sax.SAXException 1347 */ 1348 public void writeAttrString( 1349 final java.io.Writer writer, String string, String encoding) 1350 throws IOException 1351 { 1352 final int end = string.length(); 1353 if (end > m_attrBuff.length) 1354 { 1355 m_attrBuff = new char[end * 2 + 1]; 1356 } 1357 string.getChars(0, end, m_attrBuff, 0); 1358 final char[] chars = m_attrBuff; 1359 1360 1361 1362 int cleanStart = 0; 1363 int cleanLength = 0; 1364 1365 char ch = 0; 1366 for (int i = 0; i < end; i++) 1367 { 1368 ch = chars[i]; 1369 1370 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); 1371 // System.out.println("ch: "+(int)ch); 1372 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); 1373 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); 1374 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) 1375 { 1376 cleanLength++; 1377 } 1378 else if ('<' == ch || '>' == ch) 1379 { 1380 cleanLength++; // no escaping in this case, as specified in 15.2 1381 } 1382 else if ( 1383 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) 1384 { 1385 cleanLength++; // no escaping in this case, as specified in 15.2 1386 } 1387 else 1388 { 1389 if (cleanLength > 0) 1390 { 1391 writer.write(chars,cleanStart,cleanLength); 1392 cleanLength = 0; 1393 } 1394 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); 1395 1396 if (i != pos) 1397 { 1398 i = pos - 1; 1399 } 1400 else 1401 { 1402 if (Encodings.isHighUTF16Surrogate(ch)) 1403 { 1404 1405 writeUTF16Surrogate(ch, chars, i, end); 1406 i++; // two input characters processed 1407 // this increments by one and the for() 1408 // loop itself increments by another one. 1409 } 1410 1411 // The next is kind of a hack to keep from escaping in the case 1412 // of Shift_JIS and the like. 1413 1414 /* 1415 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) 1416 && (ch != 160)) 1417 { 1418 writer.write(ch); // no escaping in this case 1419 } 1420 else 1421 */ 1422 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); 1423 if (null != outputStringForChar) 1424 { 1425 writer.write(outputStringForChar); 1426 } 1427 else if (escapingNotNeeded(ch)) 1428 { 1429 writer.write(ch); // no escaping in this case 1430 } 1431 else 1432 { 1433 writer.write("&#"); 1434 writer.write(Integer.toString(ch)); 1435 writer.write(';'); 1436 } 1437 } 1438 cleanStart = i + 1; 1439 } 1440 } // end of for() 1441 1442 // are there any clean characters at the end of the array 1443 // that we haven't processed yet? 1444 if (cleanLength > 1) 1445 { 1446 // if the whole string can be written out as-is do so 1447 // otherwise write out the clean chars at the end of the 1448 // array 1449 if (cleanStart == 0) 1450 writer.write(string); 1451 else 1452 writer.write(chars, cleanStart, cleanLength); 1453 } 1454 else if (cleanLength == 1) 1455 { 1456 // a little optimization for 1 clean character 1457 // (we could have let the previous if(...) handle them all) 1458 writer.write(ch); 1459 } 1460 } 1461 1462 1463 1464 /** 1465 * Receive notification of character data. 1466 * 1467 * <p>The Parser will call this method to report each chunk of 1468 * character data. SAX parsers may return all contiguous character 1469 * data in a single chunk, or they may split it into several 1470 * chunks; however, all of the characters in any single event 1471 * must come from the same external entity, so that the Locator 1472 * provides useful information.</p> 1473 * 1474 * <p>The application must not attempt to read from the array 1475 * outside of the specified range.</p> 1476 * 1477 * <p>Note that some parsers will report whitespace using the 1478 * ignorableWhitespace() method rather than this one (validating 1479 * parsers must do so).</p> 1480 * 1481 * @param chars The characters from the XML document. 1482 * @param start The start position in the array. 1483 * @param length The number of characters to read from the array. 1484 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1485 * wrapping another exception. 1486 * @see #ignorableWhitespace 1487 * @see org.xml.sax.Locator 1488 * 1489 * @throws org.xml.sax.SAXException 1490 */ 1491 public final void characters(char chars[], int start, int length) 1492 throws org.xml.sax.SAXException 1493 { 1494 1495 if (m_elemContext.m_isRaw) 1496 { 1497 try 1498 { 1499 if (m_elemContext.m_startTagOpen) 1500 { 1501 closeStartTag(); 1502 m_elemContext.m_startTagOpen = false; 1503 } 1504 m_ispreserve = true; 1505 1506 // With m_ispreserve just set true it looks like shouldIndent() 1507 // will always return false, so drop any possible indentation. 1508 // if (shouldIndent()) 1509 // indent(); 1510 1511 // writer.write("<![CDATA["); 1512 // writer.write(chars, start, length); 1513 writeNormalizedChars(chars, start, length, false, m_lineSepUse); 1514 1515 // writer.write("]]>"); 1516 1517 // time to generate characters event 1518 if (m_tracer != null) 1519 super.fireCharEvent(chars, start, length); 1520 1521 return; 1522 } 1523 catch (IOException ioe) 1524 { 1525 throw new org.xml.sax.SAXException( 1526 Utils.messages.createMessage( 1527 MsgKey.ER_OIERROR, 1528 null), 1529 ioe); 1530 //"IO error", ioe); 1531 } 1532 } 1533 else 1534 { 1535 super.characters(chars, start, length); 1536 } 1537 } 1538 1539 /** 1540 * Receive notification of cdata. 1541 * 1542 * <p>The Parser will call this method to report each chunk of 1543 * character data. SAX parsers may return all contiguous character 1544 * data in a single chunk, or they may split it into several 1545 * chunks; however, all of the characters in any single event 1546 * must come from the same external entity, so that the Locator 1547 * provides useful information.</p> 1548 * 1549 * <p>The application must not attempt to read from the array 1550 * outside of the specified range.</p> 1551 * 1552 * <p>Note that some parsers will report whitespace using the 1553 * ignorableWhitespace() method rather than this one (validating 1554 * parsers must do so).</p> 1555 * 1556 * @param ch The characters from the XML document. 1557 * @param start The start position in the array. 1558 * @param length The number of characters to read from the array. 1559 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1560 * wrapping another exception. 1561 * @see #ignorableWhitespace 1562 * @see org.xml.sax.Locator 1563 * 1564 * @throws org.xml.sax.SAXException 1565 */ 1566 public final void cdata(char ch[], int start, int length) 1567 throws org.xml.sax.SAXException 1568 { 1569 1570 if ((null != m_elemContext.m_elementName) 1571 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") 1572 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) 1573 { 1574 try 1575 { 1576 if (m_elemContext.m_startTagOpen) 1577 { 1578 closeStartTag(); 1579 m_elemContext.m_startTagOpen = false; 1580 } 1581 1582 m_ispreserve = true; 1583 1584 if (shouldIndent()) 1585 indent(); 1586 1587 // writer.write(ch, start, length); 1588 writeNormalizedChars(ch, start, length, true, m_lineSepUse); 1589 } 1590 catch (IOException ioe) 1591 { 1592 throw new org.xml.sax.SAXException( 1593 Utils.messages.createMessage( 1594 MsgKey.ER_OIERROR, 1595 null), 1596 ioe); 1597 //"IO error", ioe); 1598 } 1599 } 1600 else 1601 { 1602 super.cdata(ch, start, length); 1603 } 1604 } 1605 1606 /** 1607 * Receive notification of a processing instruction. 1608 * 1609 * @param target The processing instruction target. 1610 * @param data The processing instruction data, or null if 1611 * none was supplied. 1612 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1613 * wrapping another exception. 1614 * 1615 * @throws org.xml.sax.SAXException 1616 */ 1617 public void processingInstruction(String target, String data) 1618 throws org.xml.sax.SAXException 1619 { 1620 1621 // Process any pending starDocument and startElement first. 1622 flushPending(); 1623 1624 // Use a fairly nasty hack to tell if the next node is supposed to be 1625 // unescaped text. 1626 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) 1627 { 1628 startNonEscaping(); 1629 } 1630 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) 1631 { 1632 endNonEscaping(); 1633 } 1634 else 1635 { 1636 try 1637 { 1638 if (m_elemContext.m_startTagOpen) 1639 { 1640 closeStartTag(); 1641 m_elemContext.m_startTagOpen = false; 1642 } 1643 else if (m_needToCallStartDocument) 1644 startDocumentInternal(); 1645 1646 if (shouldIndent()) 1647 indent(); 1648 1649 final java.io.Writer writer = m_writer; 1650 //writer.write("<?" + target); 1651 writer.write("<?"); 1652 writer.write(target); 1653 1654 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) 1655 writer.write(' '); 1656 1657 //writer.write(data + ">"); // different from XML 1658 writer.write(data); // different from XML 1659 writer.write('>'); // different from XML 1660 1661 // Always output a newline char if not inside of an 1662 // element. The whitespace is not significant in that 1663 // case. 1664 if (m_elemContext.m_currentElemDepth <= 0) 1665 outputLineSep(); 1666 1667 m_startNewLine = true; 1668 } 1669 catch(IOException e) 1670 { 1671 throw new SAXException(e); 1672 } 1673 } 1674 1675 // now generate the PI event 1676 if (m_tracer != null) 1677 super.fireEscapingEvent(target, data); 1678 } 1679 1680 /** 1681 * Receive notivication of a entityReference. 1682 * 1683 * @param name non-null reference to entity name string. 1684 * 1685 * @throws org.xml.sax.SAXException 1686 */ 1687 public final void entityReference(String name) 1688 throws org.xml.sax.SAXException 1689 { 1690 try 1691 { 1692 1693 final java.io.Writer writer = m_writer; 1694 writer.write('&'); 1695 writer.write(name); 1696 writer.write(';'); 1697 1698 } catch(IOException e) 1699 { 1700 throw new SAXException(e); 1701 } 1702 } 1703 /** 1704 * @see ExtendedContentHandler#endElement(String) 1705 */ 1706 public final void endElement(String elemName) throws SAXException 1707 { 1708 endElement(null, null, elemName); 1709 } 1710 1711 /** 1712 * Process the attributes, which means to write out the currently 1713 * collected attributes to the writer. The attributes are not 1714 * cleared by this method 1715 * 1716 * @param writer the writer to write processed attributes to. 1717 * @param nAttrs the number of attributes in m_attributes 1718 * to be processed 1719 * 1720 * @throws org.xml.sax.SAXException 1721 */ 1722 public void processAttributes(java.io.Writer writer, int nAttrs) 1723 throws IOException,SAXException 1724 { 1725 /* 1726 * process the collected attributes 1727 */ 1728 for (int i = 0; i < nAttrs; i++) 1729 { 1730 processAttribute( 1731 writer, 1732 m_attributes.getQName(i), 1733 m_attributes.getValue(i), 1734 m_elemContext.m_elementDesc); 1735 } 1736 } 1737 1738 /** 1739 * For the enclosing elements starting tag write out out any attributes 1740 * followed by ">" 1741 * 1742 *@throws org.xml.sax.SAXException 1743 */ 1744 protected void closeStartTag() throws SAXException 1745 { 1746 try 1747 { 1748 1749 // finish processing attributes, time to fire off the start element event 1750 if (m_tracer != null) 1751 super.fireStartElem(m_elemContext.m_elementName); 1752 1753 int nAttrs = m_attributes.getLength(); 1754 if (nAttrs>0) 1755 { 1756 processAttributes(m_writer, nAttrs); 1757 // clear attributes object for re-use with next element 1758 m_attributes.clear(); 1759 } 1760 1761 m_writer.write('>'); 1762 1763 /* whether Xalan or XSLTC, we have the prefix mappings now, so 1764 * lets determine if the current element is specified in the cdata- 1765 * section-elements list. 1766 */ 1767 if (m_StringOfCDATASections != null) 1768 m_elemContext.m_isCdataSection = isCdataSection(); 1769 if (m_doIndent) 1770 { 1771 m_isprevtext = false; 1772 m_preserves.push(m_ispreserve); 1773 } 1774 1775 } 1776 catch(IOException e) 1777 { 1778 throw new SAXException(e); 1779 } 1780 } 1781 1782 /** 1783 * This method is used when a prefix/uri namespace mapping 1784 * is indicated after the element was started with a 1785 * startElement() and before and endElement(). 1786 * startPrefixMapping(prefix,uri) would be used before the 1787 * startElement() call. 1788 * @param uri the URI of the namespace 1789 * @param prefix the prefix associated with the given URI. 1790 * 1791 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) 1792 */ 1793 public void namespaceAfterStartElement(String prefix, String uri) 1794 throws SAXException 1795 { 1796 // hack for XSLTC with finding URI for default namespace 1797 if (m_elemContext.m_elementURI == null) 1798 { 1799 String prefix1 = getPrefixPart(m_elemContext.m_elementName); 1800 if (prefix1 == null && EMPTYSTRING.equals(prefix)) 1801 { 1802 // the elements URI is not known yet, and it 1803 // doesn't have a prefix, and we are currently 1804 // setting the uri for prefix "", so we have 1805 // the uri for the element... lets remember it 1806 m_elemContext.m_elementURI = uri; 1807 } 1808 } 1809 startPrefixMapping(prefix,uri,false); 1810 } 1811 1812 public void startDTD(String name, String publicId, String systemId) 1813 throws SAXException 1814 { 1815 m_inDTD = true; 1816 super.startDTD(name, publicId, systemId); 1817 } 1818 1819 /** 1820 * Report the end of DTD declarations. 1821 * @throws org.xml.sax.SAXException The application may raise an exception. 1822 * @see #startDTD 1823 */ 1824 public void endDTD() throws org.xml.sax.SAXException 1825 { 1826 m_inDTD = false; 1827 /* for ToHTMLStream the DOCTYPE is entirely output in the 1828 * startDocumentInternal() method, so don't do anything here 1829 */ 1830 } 1831 /** 1832 * This method does nothing. 1833 */ 1834 public void attributeDecl( 1835 String eName, 1836 String aName, 1837 String type, 1838 String valueDefault, 1839 String value) 1840 throws SAXException 1841 { 1842 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1843 } 1844 1845 /** 1846 * This method does nothing. 1847 */ 1848 public void elementDecl(String name, String model) throws SAXException 1849 { 1850 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1851 } 1852 /** 1853 * This method does nothing. 1854 */ 1855 public void internalEntityDecl(String name, String value) 1856 throws SAXException 1857 { 1858 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1859 } 1860 /** 1861 * This method does nothing. 1862 */ 1863 public void externalEntityDecl( 1864 String name, 1865 String publicId, 1866 String systemId) 1867 throws SAXException 1868 { 1869 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1870 } 1871 1872 /** 1873 * This method is used to add an attribute to the currently open element. 1874 * The caller has guaranted that this attribute is unique, which means that it 1875 * not been seen before and will not be seen again. 1876 * 1877 * @param name the qualified name of the attribute 1878 * @param value the value of the attribute which can contain only 1879 * ASCII printable characters characters in the range 32 to 127 inclusive. 1880 * @param flags the bit values of this integer give optimization information. 1881 */ 1882 public void addUniqueAttribute(String name, String value, int flags) 1883 throws SAXException 1884 { 1885 try 1886 { 1887 final java.io.Writer writer = m_writer; 1888 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) 1889 { 1890 // "flags" has indicated that the characters 1891 // '>' '<' '&' and '"' are not in the value and 1892 // m_htmlcharInfo has recorded that there are no other 1893 // entities in the range 0 to 127 so we write out the 1894 // value directly 1895 writer.write(' '); 1896 writer.write(name); 1897 writer.write("=\""); 1898 writer.write(value); 1899 writer.write('"'); 1900 } 1901 else if ( 1902 (flags & HTML_ATTREMPTY) > 0 1903 && (value.length() == 0 || value.equalsIgnoreCase(name))) 1904 { 1905 writer.write(' '); 1906 writer.write(name); 1907 } 1908 else 1909 { 1910 writer.write(' '); 1911 writer.write(name); 1912 writer.write("=\""); 1913 if ((flags & HTML_ATTRURL) > 0) 1914 { 1915 writeAttrURI(writer, value, m_specialEscapeURLs); 1916 } 1917 else 1918 { 1919 writeAttrString(writer, value, this.getEncoding()); 1920 } 1921 writer.write('"'); 1922 } 1923 } catch (IOException e) { 1924 throw new SAXException(e); 1925 } 1926 } 1927 1928 public void comment(char ch[], int start, int length) 1929 throws SAXException 1930 { 1931 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1932 if (m_inDTD) 1933 return; 1934 super.comment(ch, start, length); 1935 } 1936 1937 public boolean reset() 1938 { 1939 boolean ret = super.reset(); 1940 if (!ret) 1941 return false; 1942 initToHTMLStream(); 1943 return true; 1944 } 1945 1946 private void initToHTMLStream() 1947 { 1948 // m_elementDesc = null; 1949 m_inBlockElem = false; 1950 m_inDTD = false; 1951 // m_isRawStack.clear(); 1952 m_omitMetaTag = false; 1953 m_specialEscapeURLs = true; 1954 } 1955 1956 static class Trie 1957 { 1958 /** 1959 * A digital search trie for 7-bit ASCII text 1960 * The API is a subset of java.util.Hashtable 1961 * The key must be a 7-bit ASCII string 1962 * The value may be any Java Object 1963 * One can get an object stored in a trie from its key, 1964 * but the search is either case sensitive or case 1965 * insensitive to the characters in the key, and this 1966 * choice of sensitivity or insensitivity is made when 1967 * the Trie is created, before any objects are put in it. 1968 * 1969 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. 1970 * It exists to cut the serializers dependancy on that package. 1971 * 1972 * @xsl.usage internal 1973 */ 1974 1975 /** Size of the m_nextChar array. */ 1976 public static final int ALPHA_SIZE = 128; 1977 1978 /** The root node of the tree. */ 1979 final Node m_Root; 1980 1981 /** helper buffer to convert Strings to char arrays */ 1982 private char[] m_charBuffer = new char[0]; 1983 1984 /** true if the search for an object is lower case only with the key */ 1985 private final boolean m_lowerCaseOnly; 1986 1987 /** 1988 * Construct the trie that has a case insensitive search. 1989 */ 1990 public Trie() 1991 { 1992 m_Root = new Node(); 1993 m_lowerCaseOnly = false; 1994 } 1995 1996 /** 1997 * Construct the trie given the desired case sensitivity with the key. 1998 * @param lowerCaseOnly true if the search keys are to be loser case only, 1999 * not case insensitive. 2000 */ 2001 public Trie(boolean lowerCaseOnly) 2002 { 2003 m_Root = new Node(); 2004 m_lowerCaseOnly = lowerCaseOnly; 2005 } 2006 2007 /** 2008 * Put an object into the trie for lookup. 2009 * 2010 * @param key must be a 7-bit ASCII string 2011 * @param value any java object. 2012 * 2013 * @return The old object that matched key, or null. 2014 */ 2015 public Object put(String key, Object value) 2016 { 2017 2018 final int len = key.length(); 2019 if (len > m_charBuffer.length) 2020 { 2021 // make the biggest buffer ever needed in get(String) 2022 m_charBuffer = new char[len]; 2023 } 2024 2025 Node node = m_Root; 2026 2027 for (int i = 0; i < len; i++) 2028 { 2029 Node nextNode = 2030 node.m_nextChar[Character.toLowerCase(key.charAt(i))]; 2031 2032 if (nextNode != null) 2033 { 2034 node = nextNode; 2035 } 2036 else 2037 { 2038 for (; i < len; i++) 2039 { 2040 Node newNode = new Node(); 2041 if (m_lowerCaseOnly) 2042 { 2043 // put this value into the tree only with a lower case key 2044 node.m_nextChar[Character.toLowerCase( 2045 key.charAt(i))] = 2046 newNode; 2047 } 2048 else 2049 { 2050 // put this value into the tree with a case insensitive key 2051 node.m_nextChar[Character.toUpperCase( 2052 key.charAt(i))] = 2053 newNode; 2054 node.m_nextChar[Character.toLowerCase( 2055 key.charAt(i))] = 2056 newNode; 2057 } 2058 node = newNode; 2059 } 2060 break; 2061 } 2062 } 2063 2064 Object ret = node.m_Value; 2065 2066 node.m_Value = value; 2067 2068 return ret; 2069 } 2070 2071 /** 2072 * Get an object that matches the key. 2073 * 2074 * @param key must be a 7-bit ASCII string 2075 * 2076 * @return The object that matches the key, or null. 2077 */ 2078 public Object get(final String key) 2079 { 2080 2081 final int len = key.length(); 2082 2083 /* If the name is too long, we won't find it, this also keeps us 2084 * from overflowing m_charBuffer 2085 */ 2086 if (m_charBuffer.length < len) 2087 return null; 2088 2089 Node node = m_Root; 2090 switch (len) // optimize the look up based on the number of chars 2091 { 2092 // case 0 looks silly, but the generated bytecode runs 2093 // faster for lookup of elements of length 2 with this in 2094 // and a fair bit faster. Don't know why. 2095 case 0 : 2096 { 2097 return null; 2098 } 2099 2100 case 1 : 2101 { 2102 final char ch = key.charAt(0); 2103 if (ch < ALPHA_SIZE) 2104 { 2105 node = node.m_nextChar[ch]; 2106 if (node != null) 2107 return node.m_Value; 2108 } 2109 return null; 2110 } 2111 // comment out case 2 because the default is faster 2112 // case 2 : 2113 // { 2114 // final char ch0 = key.charAt(0); 2115 // final char ch1 = key.charAt(1); 2116 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) 2117 // { 2118 // node = node.m_nextChar[ch0]; 2119 // if (node != null) 2120 // { 2121 // 2122 // if (ch1 < ALPHA_SIZE) 2123 // { 2124 // node = node.m_nextChar[ch1]; 2125 // if (node != null) 2126 // return node.m_Value; 2127 // } 2128 // } 2129 // } 2130 // return null; 2131 // } 2132 default : 2133 { 2134 for (int i = 0; i < len; i++) 2135 { 2136 // A thread-safe way to loop over the characters 2137 final char ch = key.charAt(i); 2138 if (ALPHA_SIZE <= ch) 2139 { 2140 // the key is not 7-bit ASCII so we won't find it here 2141 return null; 2142 } 2143 2144 node = node.m_nextChar[ch]; 2145 if (node == null) 2146 return null; 2147 } 2148 2149 return node.m_Value; 2150 } 2151 } 2152 } 2153 2154 /** 2155 * The node representation for the trie. 2156 * @xsl.usage internal 2157 */ 2158 private class Node 2159 { 2160 2161 /** 2162 * Constructor, creates a Node[ALPHA_SIZE]. 2163 */ 2164 Node() 2165 { 2166 m_nextChar = new Node[ALPHA_SIZE]; 2167 m_Value = null; 2168 } 2169 2170 /** The next nodes. */ 2171 final Node m_nextChar[]; 2172 2173 /** The value. */ 2174 Object m_Value; 2175 } 2176 /** 2177 * Construct the trie from another Trie. 2178 * Both the existing Trie and this new one share the same table for 2179 * lookup, and it is assumed that the table is fully populated and 2180 * not changing anymore. 2181 * 2182 * @param existingTrie the Trie that this one is a copy of. 2183 */ 2184 public Trie(Trie existingTrie) 2185 { 2186 // copy some fields from the existing Trie into this one. 2187 m_Root = existingTrie.m_Root; 2188 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; 2189 2190 // get a buffer just big enough to hold the longest key in the table. 2191 int max = existingTrie.getLongestKeyLength(); 2192 m_charBuffer = new char[max]; 2193 } 2194 2195 /** 2196 * Get an object that matches the key. 2197 * This method is faster than get(), but is not thread-safe. 2198 * 2199 * @param key must be a 7-bit ASCII string 2200 * 2201 * @return The object that matches the key, or null. 2202 */ 2203 public Object get2(final String key) 2204 { 2205 2206 final int len = key.length(); 2207 2208 /* If the name is too long, we won't find it, this also keeps us 2209 * from overflowing m_charBuffer 2210 */ 2211 if (m_charBuffer.length < len) 2212 return null; 2213 2214 Node node = m_Root; 2215 switch (len) // optimize the look up based on the number of chars 2216 { 2217 // case 0 looks silly, but the generated bytecode runs 2218 // faster for lookup of elements of length 2 with this in 2219 // and a fair bit faster. Don't know why. 2220 case 0 : 2221 { 2222 return null; 2223 } 2224 2225 case 1 : 2226 { 2227 final char ch = key.charAt(0); 2228 if (ch < ALPHA_SIZE) 2229 { 2230 node = node.m_nextChar[ch]; 2231 if (node != null) 2232 return node.m_Value; 2233 } 2234 return null; 2235 } 2236 default : 2237 { 2238 /* Copy string into array. This is not thread-safe because 2239 * it modifies the contents of m_charBuffer. If multiple 2240 * threads were to use this Trie they all would be 2241 * using this same array (not good). So this 2242 * method is not thread-safe, but it is faster because 2243 * converting to a char[] and looping over elements of 2244 * the array is faster than a String's charAt(i). 2245 */ 2246 key.getChars(0, len, m_charBuffer, 0); 2247 2248 for (int i = 0; i < len; i++) 2249 { 2250 final char ch = m_charBuffer[i]; 2251 if (ALPHA_SIZE <= ch) 2252 { 2253 // the key is not 7-bit ASCII so we won't find it here 2254 return null; 2255 } 2256 2257 node = node.m_nextChar[ch]; 2258 if (node == null) 2259 return null; 2260 } 2261 2262 return node.m_Value; 2263 } 2264 } 2265 } 2266 2267 /** 2268 * Get the length of the longest key used in the table. 2269 */ 2270 public int getLongestKeyLength() 2271 { 2272 return m_charBuffer.length; 2273 } 2274 } 2275 }