1 /* 2 * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.serializer; 22 23 import java.io.IOException; 24 import java.util.Properties; 25 26 import javax.xml.transform.Result; 27 28 import org.xml.sax.Attributes; 29 import org.xml.sax.SAXException; 30 31 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 32 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 33 34 /** 35 * This serializer takes a series of SAX or 36 * SAX-like events and writes its output 37 * to the given stream. 38 * 39 * This class is not a public API, it is public 40 * because it is used from another package. 41 * 42 * @xsl.usage internal 43 * @LastModified: Sept 2018 44 */ 45 public final class ToHTMLStream extends ToStream 46 { 47 48 /** This flag is set while receiving events from the DTD */ 49 protected boolean m_inDTD = false; 50 51 /** True if the previous element is a block element. */ 52 private boolean m_isprevblock = false; 53 54 /** 55 * Map that tells which XML characters should have special treatment, and it 56 * provides character to entity name lookup. 57 */ 58 private static final CharInfo m_htmlcharInfo = 59 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); 60 CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); 61 62 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ 63 static final Trie m_elementFlags = new Trie(); 64 65 static { 66 initTagReference(m_elementFlags); 67 } 68 static void initTagReference(Trie m_elementFlags) { 69 70 // HTML 4.0 loose DTD 71 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); 72 m_elementFlags.put( 73 "FRAME", 74 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 75 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); 76 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); 77 m_elementFlags.put( 78 "ISINDEX", 79 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 80 m_elementFlags.put( 81 "APPLET", 82 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); 83 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); 84 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); 85 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); 86 87 // HTML 4.0 strict DTD 88 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 89 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 90 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 91 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 92 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 93 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); 94 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); 95 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); 96 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); 97 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); 98 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); 99 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); 100 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); 101 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); 102 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); 103 m_elementFlags.put( 104 "SUP", 105 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 106 m_elementFlags.put( 107 "SUB", 108 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 109 m_elementFlags.put( 110 "SPAN", 111 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 112 m_elementFlags.put( 113 "BDO", 114 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 115 m_elementFlags.put( 116 "BR", 117 new ElemDesc( 118 0 119 | ElemDesc.SPECIAL 120 | ElemDesc.ASPECIAL 121 | ElemDesc.EMPTY 122 | ElemDesc.BLOCK)); 123 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); 124 m_elementFlags.put( 125 "ADDRESS", 126 new ElemDesc( 127 0 128 | ElemDesc.BLOCK 129 | ElemDesc.BLOCKFORM 130 | ElemDesc.BLOCKFORMFIELDSET)); 131 m_elementFlags.put( 132 "DIV", 133 new ElemDesc( 134 0 135 | ElemDesc.BLOCK 136 | ElemDesc.BLOCKFORM 137 | ElemDesc.BLOCKFORMFIELDSET)); 138 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); 139 m_elementFlags.put( 140 "MAP", 141 new ElemDesc( 142 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); 143 m_elementFlags.put( 144 "AREA", 145 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 146 m_elementFlags.put( 147 "LINK", 148 new ElemDesc( 149 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 150 m_elementFlags.put( 151 "IMG", 152 new ElemDesc( 153 0 154 | ElemDesc.SPECIAL 155 | ElemDesc.ASPECIAL 156 | ElemDesc.EMPTY 157 | ElemDesc.WHITESPACESENSITIVE)); 158 m_elementFlags.put( 159 "OBJECT", 160 new ElemDesc( 161 0 162 | ElemDesc.SPECIAL 163 | ElemDesc.ASPECIAL 164 | ElemDesc.HEADMISC 165 | ElemDesc.WHITESPACESENSITIVE)); 166 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); 167 m_elementFlags.put( 168 "HR", 169 new ElemDesc( 170 0 171 | ElemDesc.BLOCK 172 | ElemDesc.BLOCKFORM 173 | ElemDesc.BLOCKFORMFIELDSET 174 | ElemDesc.EMPTY)); 175 m_elementFlags.put( 176 "P", 177 new ElemDesc( 178 0 179 | ElemDesc.BLOCK 180 | ElemDesc.BLOCKFORM 181 | ElemDesc.BLOCKFORMFIELDSET)); 182 m_elementFlags.put( 183 "H1", 184 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 185 m_elementFlags.put( 186 "H2", 187 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 188 m_elementFlags.put( 189 "H3", 190 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 191 m_elementFlags.put( 192 "H4", 193 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 194 m_elementFlags.put( 195 "H5", 196 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 197 m_elementFlags.put( 198 "H6", 199 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 200 m_elementFlags.put( 201 "PRE", 202 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); 203 m_elementFlags.put( 204 "Q", 205 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 206 m_elementFlags.put( 207 "BLOCKQUOTE", 208 new ElemDesc( 209 0 210 | ElemDesc.BLOCK 211 | ElemDesc.BLOCKFORM 212 | ElemDesc.BLOCKFORMFIELDSET)); 213 m_elementFlags.put("INS", new ElemDesc(0)); 214 m_elementFlags.put("DEL", new ElemDesc(0)); 215 m_elementFlags.put( 216 "DL", 217 new ElemDesc( 218 0 219 | ElemDesc.BLOCK 220 | ElemDesc.BLOCKFORM 221 | ElemDesc.BLOCKFORMFIELDSET)); 222 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); 223 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); 224 m_elementFlags.put( 225 "OL", 226 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 227 m_elementFlags.put( 228 "UL", 229 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 230 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); 231 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); 232 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); 233 m_elementFlags.put( 234 "INPUT", 235 new ElemDesc( 236 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); 237 m_elementFlags.put( 238 "SELECT", 239 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 240 m_elementFlags.put("OPTGROUP", new ElemDesc(0)); 241 m_elementFlags.put("OPTION", new ElemDesc(0)); 242 m_elementFlags.put( 243 "TEXTAREA", 244 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 245 m_elementFlags.put( 246 "FIELDSET", 247 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); 248 m_elementFlags.put("LEGEND", new ElemDesc(0)); 249 m_elementFlags.put( 250 "BUTTON", 251 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 252 m_elementFlags.put( 253 "TABLE", 254 new ElemDesc( 255 0 256 | ElemDesc.BLOCK 257 | ElemDesc.BLOCKFORM 258 | ElemDesc.BLOCKFORMFIELDSET)); 259 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); 260 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); 261 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); 262 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); 263 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); 264 m_elementFlags.put( 265 "COL", 266 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 267 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); 268 m_elementFlags.put("TH", new ElemDesc(0)); 269 m_elementFlags.put("TD", new ElemDesc(0)); 270 m_elementFlags.put( 271 "HEAD", 272 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); 273 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); 274 m_elementFlags.put( 275 "BASE", 276 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 277 m_elementFlags.put( 278 "META", 279 new ElemDesc( 280 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 281 m_elementFlags.put( 282 "STYLE", 283 new ElemDesc( 284 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); 285 m_elementFlags.put( 286 "SCRIPT", 287 new ElemDesc( 288 0 289 | ElemDesc.SPECIAL 290 | ElemDesc.ASPECIAL 291 | ElemDesc.HEADMISC 292 | ElemDesc.RAW)); 293 m_elementFlags.put( 294 "NOSCRIPT", 295 new ElemDesc( 296 0 297 | ElemDesc.BLOCK 298 | ElemDesc.BLOCKFORM 299 | ElemDesc.BLOCKFORMFIELDSET)); 300 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); 301 302 // From "John Ky" <hand@syd.speednet.com.au 303 // Transitional Document Type Definition () 304 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont 305 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 306 307 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE 308 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 309 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 310 311 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U 312 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 313 314 // From "John Ky" <hand@syd.speednet.com.au 315 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 316 317 // HTML 4.0, section 16.5 318 m_elementFlags.put( 319 "IFRAME", 320 new ElemDesc( 321 0 322 | ElemDesc.BLOCK 323 | ElemDesc.BLOCKFORM 324 | ElemDesc.BLOCKFORMFIELDSET)); 325 326 // Netscape 4 extension 327 m_elementFlags.put( 328 "LAYER", 329 new ElemDesc( 330 0 331 | ElemDesc.BLOCK 332 | ElemDesc.BLOCKFORM 333 | ElemDesc.BLOCKFORMFIELDSET)); 334 // Netscape 4 extension 335 m_elementFlags.put( 336 "ILAYER", 337 new ElemDesc( 338 0 339 | ElemDesc.BLOCK 340 | ElemDesc.BLOCKFORM 341 | ElemDesc.BLOCKFORMFIELDSET)); 342 343 344 // NOW FOR ATTRIBUTE INFORMATION . . . 345 ElemDesc elemDesc; 346 347 348 // ---------------------------------------------- 349 elemDesc = (ElemDesc) m_elementFlags.get("a"); 350 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 351 elemDesc.setAttr("NAME", ElemDesc.ATTRURL); 352 353 // ---------------------------------------------- 354 elemDesc = (ElemDesc) m_elementFlags.get("area"); 355 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 356 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); 357 358 // ---------------------------------------------- 359 elemDesc = (ElemDesc) m_elementFlags.get("base"); 360 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 361 362 // ---------------------------------------------- 363 elemDesc = (ElemDesc) m_elementFlags.get("button"); 364 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 365 366 // ---------------------------------------------- 367 elemDesc = (ElemDesc) m_elementFlags.get("blockquote"); 368 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 369 370 // ---------------------------------------------- 371 elemDesc = (ElemDesc) m_elementFlags.get("del"); 372 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 373 374 // ---------------------------------------------- 375 elemDesc = (ElemDesc) m_elementFlags.get("dir"); 376 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 377 378 // ---------------------------------------------- 379 380 elemDesc = (ElemDesc) m_elementFlags.get("div"); 381 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension 382 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 383 384 // ---------------------------------------------- 385 elemDesc = (ElemDesc) m_elementFlags.get("dl"); 386 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 387 388 // ---------------------------------------------- 389 elemDesc = (ElemDesc) m_elementFlags.get("form"); 390 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); 391 392 // ---------------------------------------------- 393 // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> 394 elemDesc = (ElemDesc) m_elementFlags.get("frame"); 395 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 396 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 397 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); 398 399 // ---------------------------------------------- 400 elemDesc = (ElemDesc) m_elementFlags.get("head"); 401 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); 402 403 // ---------------------------------------------- 404 elemDesc = (ElemDesc) m_elementFlags.get("hr"); 405 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); 406 407 // ---------------------------------------------- 408 // HTML 4.0, section 16.5 409 elemDesc = (ElemDesc) m_elementFlags.get("iframe"); 410 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 411 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 412 413 // ---------------------------------------------- 414 // Netscape 4 extension 415 elemDesc = (ElemDesc) m_elementFlags.get("ilayer"); 416 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 417 418 // ---------------------------------------------- 419 elemDesc = (ElemDesc) m_elementFlags.get("img"); 420 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 421 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 422 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 423 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 424 425 // ---------------------------------------------- 426 elemDesc = (ElemDesc) m_elementFlags.get("input"); 427 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 428 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 429 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); 430 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 431 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 432 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 433 434 // ---------------------------------------------- 435 elemDesc = (ElemDesc) m_elementFlags.get("ins"); 436 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 437 438 // ---------------------------------------------- 439 // Netscape 4 extension 440 elemDesc = (ElemDesc) m_elementFlags.get("layer"); 441 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 442 443 // ---------------------------------------------- 444 elemDesc = (ElemDesc) m_elementFlags.get("link"); 445 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 446 447 // ---------------------------------------------- 448 elemDesc = (ElemDesc) m_elementFlags.get("menu"); 449 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 450 451 // ---------------------------------------------- 452 elemDesc = (ElemDesc) m_elementFlags.get("object"); 453 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); 454 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); 455 elemDesc.setAttr("DATA", ElemDesc.ATTRURL); 456 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); 457 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 458 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); 459 460 // ---------------------------------------------- 461 elemDesc = (ElemDesc) m_elementFlags.get("ol"); 462 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 463 464 // ---------------------------------------------- 465 elemDesc = (ElemDesc) m_elementFlags.get("optgroup"); 466 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 467 468 // ---------------------------------------------- 469 elemDesc = (ElemDesc) m_elementFlags.get("option"); 470 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); 471 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 472 473 // ---------------------------------------------- 474 elemDesc = (ElemDesc) m_elementFlags.get("q"); 475 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 476 477 // ---------------------------------------------- 478 elemDesc = (ElemDesc) m_elementFlags.get("script"); 479 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 480 elemDesc.setAttr("FOR", ElemDesc.ATTRURL); 481 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); 482 483 // ---------------------------------------------- 484 elemDesc = (ElemDesc) m_elementFlags.get("select"); 485 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 486 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); 487 488 // ---------------------------------------------- 489 elemDesc = (ElemDesc) m_elementFlags.get("table"); 490 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 491 492 // ---------------------------------------------- 493 elemDesc = (ElemDesc) m_elementFlags.get("td"); 494 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 495 496 // ---------------------------------------------- 497 elemDesc = (ElemDesc) m_elementFlags.get("textarea"); 498 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 499 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 500 501 // ---------------------------------------------- 502 elemDesc = (ElemDesc) m_elementFlags.get("th"); 503 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 504 505 // ---------------------------------------------- 506 // The nowrap attribute of a tr element is both 507 // a Netscape and Internet-Explorer extension 508 elemDesc = (ElemDesc) m_elementFlags.get("tr"); 509 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 510 511 // ---------------------------------------------- 512 elemDesc = (ElemDesc) m_elementFlags.get("ul"); 513 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 514 } 515 516 /** 517 * Dummy element for elements not found. 518 */ 519 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); 520 521 /** True if URLs should be specially escaped with the %xx form. */ 522 private boolean m_specialEscapeURLs = true; 523 524 /** True if the META tag should be omitted. */ 525 private boolean m_omitMetaTag = false; 526 527 /** 528 * Tells if the formatter should use special URL escaping. 529 * 530 * @param bool True if URLs should be specially escaped with the %xx form. 531 */ 532 public void setSpecialEscapeURLs(boolean bool) 533 { 534 m_specialEscapeURLs = bool; 535 } 536 537 /** 538 * Tells if the formatter should omit the META tag. 539 * 540 * @param bool True if the META tag should be omitted. 541 */ 542 public void setOmitMetaTag(boolean bool) 543 { 544 m_omitMetaTag = bool; 545 } 546 547 /** 548 * Specifies an output format for this serializer. It the 549 * serializer has already been associated with an output format, 550 * it will switch to the new format. This method should not be 551 * called while the serializer is in the process of serializing 552 * a document. 553 * 554 * This method can be called multiple times before starting 555 * the serialization of a particular result-tree. In principle 556 * all serialization parameters can be changed, with the exception 557 * of method="html" (it must be method="html" otherwise we 558 * shouldn't even have a ToHTMLStream object here!) 559 * 560 * @param format The output format or serialzation parameters 561 * to use. 562 */ 563 public void setOutputFormat(Properties format) 564 { 565 566 m_specialEscapeURLs = 567 OutputPropertyUtils.getBooleanProperty( 568 OutputPropertiesFactory.S_USE_URL_ESCAPING, 569 format); 570 571 m_omitMetaTag = 572 OutputPropertyUtils.getBooleanProperty( 573 OutputPropertiesFactory.S_OMIT_META_TAG, 574 format); 575 576 super.setOutputFormat(format); 577 } 578 579 /** 580 * Tells if the formatter should use special URL escaping. 581 * 582 * @return True if URLs should be specially escaped with the %xx form. 583 */ 584 private final boolean getSpecialEscapeURLs() 585 { 586 return m_specialEscapeURLs; 587 } 588 589 /** 590 * Tells if the formatter should omit the META tag. 591 * 592 * @return True if the META tag should be omitted. 593 */ 594 private final boolean getOmitMetaTag() 595 { 596 return m_omitMetaTag; 597 } 598 599 /** 600 * Get a description of the given element. 601 * 602 * @param name non-null name of element, case insensitive. 603 * 604 * @return non-null reference to ElemDesc, which may be m_dummy if no 605 * element description matches the given name. 606 */ 607 public static final ElemDesc getElemDesc(String name) 608 { 609 /* this method used to return m_dummy when name was null 610 * but now it doesn't check and and requires non-null name. 611 */ 612 Object obj = m_elementFlags.get(name); 613 if (null != obj) 614 return (ElemDesc)obj; 615 return m_dummy; 616 } 617 618 /** 619 * A Trie that is just a copy of the "static" one. 620 * We need this one to be able to use the faster, but not thread-safe 621 * method Trie.get2(name) 622 */ 623 private Trie m_htmlInfo = new Trie(m_elementFlags); 624 /** 625 * Calls to this method could be replaced with calls to 626 * getElemDesc(name), but this one should be faster. 627 */ 628 private ElemDesc getElemDesc2(String name) 629 { 630 Object obj = m_htmlInfo.get2(name); 631 if (null != obj) 632 return (ElemDesc)obj; 633 return m_dummy; 634 } 635 636 /** 637 * Default constructor. 638 */ 639 public ToHTMLStream() 640 { 641 642 super(); 643 m_charInfo = m_htmlcharInfo; 644 // initialize namespaces 645 m_prefixMap = new NamespaceMappings(); 646 647 } 648 649 /** The name of the current element. */ 650 // private String m_currentElementName = null; 651 652 /** 653 * Receive notification of the beginning of a document. 654 * 655 * @throws org.xml.sax.SAXException Any SAX exception, possibly 656 * wrapping another exception. 657 * 658 * @throws org.xml.sax.SAXException 659 */ 660 protected void startDocumentInternal() throws org.xml.sax.SAXException 661 { 662 super.startDocumentInternal(); 663 664 m_needToCallStartDocument = false; 665 m_needToOutputDocTypeDecl = true; 666 m_startNewLine = false; 667 setOmitXMLDeclaration(true); 668 669 if (true == m_needToOutputDocTypeDecl) 670 { 671 String doctypeSystem = getDoctypeSystem(); 672 String doctypePublic = getDoctypePublic(); 673 if ((null != doctypeSystem) || (null != doctypePublic)) 674 { 675 final java.io.Writer writer = m_writer; 676 try 677 { 678 writer.write("<!DOCTYPE html"); 679 680 if (null != doctypePublic) 681 { 682 writer.write(" PUBLIC \""); 683 writer.write(doctypePublic); 684 writer.write('"'); 685 } 686 687 if (null != doctypeSystem) 688 { 689 if (null == doctypePublic) 690 writer.write(" SYSTEM \""); 691 else 692 writer.write(" \""); 693 694 writer.write(doctypeSystem); 695 writer.write('"'); 696 } 697 698 writer.write('>'); 699 outputLineSep(); 700 } 701 catch(IOException e) 702 { 703 throw new SAXException(e); 704 } 705 } 706 } 707 708 m_needToOutputDocTypeDecl = false; 709 } 710 711 /** 712 * Receive notification of the end of a document. 713 * 714 * @throws org.xml.sax.SAXException Any SAX exception, possibly 715 * wrapping another exception. 716 * 717 * @throws org.xml.sax.SAXException 718 */ 719 public final void endDocument() throws org.xml.sax.SAXException 720 { 721 if (m_doIndent) { 722 flushCharactersBuffer(); 723 } 724 flushPending(); 725 if (m_doIndent && !m_isprevtext) 726 { 727 try 728 { 729 outputLineSep(); 730 } 731 catch(IOException e) 732 { 733 throw new SAXException(e); 734 } 735 } 736 737 flushWriter(); 738 if (m_tracer != null) 739 super.fireEndDoc(); 740 } 741 742 /** 743 * If the previous is an inline element, won't insert a new line before the 744 * text. 745 * 746 */ 747 protected boolean shouldIndentForText() { 748 return super.shouldIndentForText() && m_isprevblock; 749 } 750 751 /** 752 * Only check m_doIndent, disregard m_ispreserveSpace. 753 * 754 * @return True if the content should be formatted. 755 */ 756 protected boolean shouldFormatOutput() { 757 return m_doIndent; 758 } 759 760 /** 761 * Receive notification of the beginning of an element. 762 * 763 * 764 * @param namespaceURI 765 * @param localName 766 * @param name 767 * The element type name. 768 * @param atts 769 * The attributes attached to the element, if any. 770 * @throws org.xml.sax.SAXException 771 * Any SAX exception, possibly wrapping another exception. 772 * @see #endElement 773 * @see org.xml.sax.AttributeList 774 */ 775 public void startElement( 776 String namespaceURI, 777 String localName, 778 String name, 779 Attributes atts) 780 throws SAXException 781 { 782 if (m_doIndent) { 783 // will add extra one if having namespace but no matter 784 m_childNodeNum++; 785 flushCharactersBuffer(); 786 } 787 ElemContext elemContext = m_elemContext; 788 789 // clean up any pending things first 790 if (elemContext.m_startTagOpen) 791 { 792 closeStartTag(); 793 elemContext.m_startTagOpen = false; 794 } 795 else if (m_cdataTagOpen) 796 { 797 closeCDATA(); 798 m_cdataTagOpen = false; 799 } 800 else if (m_needToCallStartDocument) 801 { 802 startDocumentInternal(); 803 m_needToCallStartDocument = false; 804 } 805 806 807 // if this element has a namespace then treat it like XML 808 if (null != namespaceURI && namespaceURI.length() > 0) 809 { 810 super.startElement(namespaceURI, localName, name, atts); 811 812 return; 813 } 814 815 try 816 { 817 // getElemDesc2(name) is faster than getElemDesc(name) 818 ElemDesc elemDesc = getElemDesc2(name); 819 int elemFlags = elemDesc.getFlags(); 820 821 // deal with indentation issues first 822 if (m_doIndent) 823 { 824 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 825 if ((elemContext.m_elementName != null) 826 // If this element is a block element, 827 // or if this is not a block element, then if the 828 // previous is neither a text nor an inline 829 && (isBlockElement || (!(m_isprevtext || !m_isprevblock)))) 830 { 831 m_startNewLine = true; 832 833 indent(); 834 } 835 m_isprevblock = isBlockElement; 836 } 837 838 // save any attributes for later processing 839 if (atts != null) 840 addAttributes(atts); 841 842 m_isprevtext = false; 843 final java.io.Writer writer = m_writer; 844 writer.write('<'); 845 writer.write(name); 846 847 if (m_doIndent) { 848 m_childNodeNumStack.add(m_childNodeNum); 849 m_childNodeNum = 0; 850 } 851 852 if (m_tracer != null) 853 firePseudoAttributes(); 854 855 if ((elemFlags & ElemDesc.EMPTY) != 0) 856 { 857 // an optimization for elements which are expected 858 // to be empty. 859 m_elemContext = elemContext.push(); 860 /* XSLTC sometimes calls namespaceAfterStartElement() 861 * so we need to remember the name 862 */ 863 m_elemContext.m_elementName = name; 864 m_elemContext.m_elementDesc = elemDesc; 865 return; 866 } 867 else 868 { 869 elemContext = elemContext.push(namespaceURI,localName,name); 870 m_elemContext = elemContext; 871 elemContext.m_elementDesc = elemDesc; 872 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; 873 874 // set m_startNewLine for the next element 875 if (m_doIndent) { 876 // elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(), 877 // in this branch m_elemContext.m_elementName is not null 878 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 879 if (isBlockElement) 880 m_startNewLine = true; 881 } 882 } 883 884 885 if ((elemFlags & ElemDesc.HEADELEM) != 0) 886 { 887 // This is the <HEAD> element, do some special processing 888 closeStartTag(); 889 elemContext.m_startTagOpen = false; 890 if (!m_omitMetaTag) 891 { 892 if (m_doIndent) 893 indent(); 894 writer.write( 895 "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); 896 String encoding = getEncoding(); 897 String encode = Encodings.getMimeEncoding(encoding); 898 writer.write(encode); 899 writer.write("\">"); 900 } 901 } 902 } 903 catch (IOException e) 904 { 905 throw new SAXException(e); 906 } 907 } 908 909 /** 910 * Receive notification of the end of an element. 911 * 912 * 913 * @param namespaceURI 914 * @param localName 915 * @param name The element type name 916 * @throws org.xml.sax.SAXException Any SAX exception, possibly 917 * wrapping another exception. 918 */ 919 public final void endElement( 920 final String namespaceURI, 921 final String localName, 922 final String name) 923 throws org.xml.sax.SAXException 924 { 925 if (m_doIndent) { 926 flushCharactersBuffer(); 927 } 928 // deal with any pending issues 929 if (m_cdataTagOpen) 930 closeCDATA(); 931 932 // if the element has a namespace, treat it like XML, not HTML 933 if (null != namespaceURI && namespaceURI.length() > 0) 934 { 935 super.endElement(namespaceURI, localName, name); 936 937 return; 938 } 939 940 try 941 { 942 943 ElemContext elemContext = m_elemContext; 944 final ElemDesc elemDesc = elemContext.m_elementDesc; 945 final int elemFlags = elemDesc.getFlags(); 946 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; 947 948 // deal with any indentation issues 949 if (m_doIndent) 950 { 951 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; 952 boolean shouldIndent = false; 953 954 // If this element is a block element, 955 // or if this is not a block element, then if the previous is 956 // neither a text nor an inline 957 if (isBlockElement || (!(m_isprevtext || !m_isprevblock))) 958 { 959 m_startNewLine = true; 960 shouldIndent = true; 961 } 962 if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext)) 963 indent(elemContext.m_currentElemDepth - 1); 964 965 m_isprevblock = isBlockElement; 966 } 967 968 final java.io.Writer writer = m_writer; 969 if (!elemContext.m_startTagOpen) 970 { 971 writer.write("</"); 972 writer.write(name); 973 writer.write('>'); 974 } 975 else 976 { 977 // the start-tag open when this method was called, 978 // so we need to process it now. 979 980 if (m_tracer != null) 981 super.fireStartElem(name); 982 983 // the starting tag was still open when we received this endElement() call 984 // so we need to process any gathered attributes NOW, before they go away. 985 int nAttrs = m_attributes.getLength(); 986 if (nAttrs > 0) 987 { 988 processAttributes(m_writer, nAttrs); 989 // clear attributes object for re-use with next element 990 m_attributes.clear(); 991 } 992 if (!elemEmpty) 993 { 994 // As per Dave/Paul recommendation 12/06/2000 995 // if (shouldIndent) 996 // writer.write('>'); 997 // indent(m_currentIndent); 998 999 writer.write("></"); 1000 writer.write(name); 1001 writer.write('>'); 1002 } 1003 else 1004 { 1005 writer.write('>'); 1006 } 1007 } 1008 1009 if (m_doIndent) { 1010 m_childNodeNum = m_childNodeNumStack.remove(m_childNodeNumStack.size() - 1); 1011 // clean up because the element has ended 1012 m_isprevtext = false; 1013 } 1014 // fire off the end element event 1015 if (m_tracer != null) 1016 super.fireEndElem(name); 1017 1018 // OPTIMIZE-EMPTY 1019 if (elemEmpty) 1020 { 1021 // a quick exit if the HTML element had no children. 1022 // This block of code can be removed if the corresponding block of code 1023 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed 1024 m_elemContext = elemContext.m_prev; 1025 return; 1026 } 1027 1028 // some more clean because the element has ended. 1029 m_elemContext = elemContext.m_prev; 1030 // m_isRawStack.pop(); 1031 } 1032 catch (IOException e) 1033 { 1034 throw new SAXException(e); 1035 } 1036 } 1037 1038 /** 1039 * Process an attribute. 1040 * @param writer The writer to write the processed output to. 1041 * @param name The name of the attribute. 1042 * @param value The value of the attribute. 1043 * @param elemDesc The description of the HTML element 1044 * that has this attribute. 1045 * 1046 * @throws org.xml.sax.SAXException 1047 */ 1048 protected void processAttribute( 1049 java.io.Writer writer, 1050 String name, 1051 String value, 1052 ElemDesc elemDesc) 1053 throws IOException, SAXException 1054 { 1055 writer.write(' '); 1056 1057 if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) 1058 && elemDesc != null 1059 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) 1060 { 1061 writer.write(name); 1062 } 1063 else 1064 { 1065 // %REVIEW% %OPT% 1066 // Two calls to single-char write may NOT 1067 // be more efficient than one to string-write... 1068 writer.write(name); 1069 writer.write("=\""); 1070 if ( elemDesc != null 1071 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) 1072 writeAttrURI(writer, value, m_specialEscapeURLs); 1073 else 1074 writeAttrString(writer, value, this.getEncoding()); 1075 writer.write('"'); 1076 1077 } 1078 } 1079 1080 /** 1081 * Tell if a character is an ASCII digit. 1082 */ 1083 private boolean isASCIIDigit(char c) 1084 { 1085 return (c >= '0' && c <= '9'); 1086 } 1087 1088 /** 1089 * Make an integer into an HH hex value. 1090 * Does no checking on the size of the input, since this 1091 * is only meant to be used locally by writeAttrURI. 1092 * 1093 * @param i must be a value less than 255. 1094 * 1095 * @return should be a two character string. 1096 */ 1097 private static String makeHHString(int i) 1098 { 1099 String s = Integer.toHexString(i).toUpperCase(); 1100 if (s.length() == 1) 1101 { 1102 s = "0" + s; 1103 } 1104 return s; 1105 } 1106 1107 /** 1108 * Dmitri Ilyin: Makes sure if the String is HH encoded sign. 1109 * @param str must be 2 characters long 1110 * 1111 * @return true or false 1112 */ 1113 private boolean isHHSign(String str) 1114 { 1115 boolean sign = true; 1116 try 1117 { 1118 char r = (char) Integer.parseInt(str, 16); 1119 } 1120 catch (NumberFormatException e) 1121 { 1122 sign = false; 1123 } 1124 return sign; 1125 } 1126 1127 /** 1128 * Write the specified <var>string</var> after substituting non ASCII characters, 1129 * with <CODE>%HH</CODE>, where HH is the hex of the byte value. 1130 * 1131 * @param string String to convert to XML format. 1132 * @param doURLEscaping True if we should try to encode as 1133 * per http://www.ietf.org/rfc/rfc2396.txt. 1134 * 1135 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. 1136 */ 1137 public void writeAttrURI( 1138 final java.io.Writer writer, String string, boolean doURLEscaping) 1139 throws IOException 1140 { 1141 // http://www.ietf.org/rfc/rfc2396.txt says: 1142 // A URI is always in an "escaped" form, since escaping or unescaping a 1143 // completed URI might change its semantics. Normally, the only time 1144 // escape encodings can safely be made is when the URI is being created 1145 // from its component parts; each component may have its own set of 1146 // characters that are reserved, so only the mechanism responsible for 1147 // generating or interpreting that component can determine whether or 1148 // not escaping a character will change its semantics. Likewise, a URI 1149 // must be separated into its components before the escaped characters 1150 // within those components can be safely decoded. 1151 // 1152 // ...So we do our best to do limited escaping of the URL, without 1153 // causing damage. If the URL is already properly escaped, in theory, this 1154 // function should not change the string value. 1155 1156 final int end = string.length(); 1157 if (end > m_attrBuff.length) 1158 { 1159 m_attrBuff = new char[end*2 + 1]; 1160 } 1161 string.getChars(0,end, m_attrBuff, 0); 1162 final char[] chars = m_attrBuff; 1163 1164 int cleanStart = 0; 1165 int cleanLength = 0; 1166 1167 1168 char ch = 0; 1169 for (int i = 0; i < end; i++) 1170 { 1171 ch = chars[i]; 1172 1173 if ((ch < 32) || (ch > 126)) 1174 { 1175 if (cleanLength > 0) 1176 { 1177 writer.write(chars, cleanStart, cleanLength); 1178 cleanLength = 0; 1179 } 1180 if (doURLEscaping) 1181 { 1182 // Encode UTF16 to UTF8. 1183 // Reference is Unicode, A Primer, by Tony Graham. 1184 // Page 92. 1185 1186 // Note that Kay doesn't escape 0x20... 1187 // if(ch == 0x20) // Not sure about this... -sb 1188 // { 1189 // writer.write(ch); 1190 // } 1191 // else 1192 if (ch <= 0x7F) 1193 { 1194 writer.write('%'); 1195 writer.write(makeHHString(ch)); 1196 } 1197 else if (ch <= 0x7FF) 1198 { 1199 // Clear low 6 bits before rotate, put high 4 bits in low byte, 1200 // and set two high bits. 1201 int high = (ch >> 6) | 0xC0; 1202 int low = (ch & 0x3F) | 0x80; 1203 // First 6 bits, + high bit 1204 writer.write('%'); 1205 writer.write(makeHHString(high)); 1206 writer.write('%'); 1207 writer.write(makeHHString(low)); 1208 } 1209 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate 1210 { 1211 // I'm sure this can be done in 3 instructions, but I choose 1212 // to try and do it exactly like it is done in the book, at least 1213 // until we are sure this is totally clean. I don't think performance 1214 // is a big issue with this particular function, though I could be 1215 // wrong. Also, the stuff below clearly does more masking than 1216 // it needs to do. 1217 1218 // Clear high 6 bits. 1219 int highSurrogate = ((int) ch) & 0x03FF; 1220 1221 // Middle 4 bits (wwww) + 1 1222 // "Note that the value of wwww from the high surrogate bit pattern 1223 // is incremented to make the uuuuu bit pattern in the scalar value 1224 // so the surrogate pair don't address the BMP." 1225 int wwww = ((highSurrogate & 0x03C0) >> 6); 1226 int uuuuu = wwww + 1; 1227 1228 // next 4 bits 1229 int zzzz = (highSurrogate & 0x003C) >> 2; 1230 1231 // low 2 bits 1232 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; 1233 1234 // Get low surrogate character. 1235 ch = chars[++i]; 1236 1237 // Clear high 6 bits. 1238 int lowSurrogate = ((int) ch) & 0x03FF; 1239 1240 // put the middle 4 bits into the bottom of yyyyyy (byte 3) 1241 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); 1242 1243 // bottom 6 bits. 1244 int xxxxxx = (lowSurrogate & 0x003F); 1245 1246 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu 1247 int byte2 = 1248 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; 1249 int byte3 = 0x80 | yyyyyy; 1250 int byte4 = 0x80 | xxxxxx; 1251 1252 writer.write('%'); 1253 writer.write(makeHHString(byte1)); 1254 writer.write('%'); 1255 writer.write(makeHHString(byte2)); 1256 writer.write('%'); 1257 writer.write(makeHHString(byte3)); 1258 writer.write('%'); 1259 writer.write(makeHHString(byte4)); 1260 } 1261 else 1262 { 1263 int high = (ch >> 12) | 0xE0; // top 4 bits 1264 int middle = ((ch & 0x0FC0) >> 6) | 0x80; 1265 // middle 6 bits 1266 int low = (ch & 0x3F) | 0x80; 1267 // First 6 bits, + high bit 1268 writer.write('%'); 1269 writer.write(makeHHString(high)); 1270 writer.write('%'); 1271 writer.write(makeHHString(middle)); 1272 writer.write('%'); 1273 writer.write(makeHHString(low)); 1274 } 1275 1276 } 1277 else if (escapingNotNeeded(ch)) 1278 { 1279 writer.write(ch); 1280 } 1281 else 1282 { 1283 writer.write("&#"); 1284 writer.write(Integer.toString(ch)); 1285 writer.write(';'); 1286 } 1287 // In this character range we have first written out any previously accumulated 1288 // "clean" characters, then processed the current more complicated character, 1289 // which may have incremented "i". 1290 // We now we reset the next possible clean character. 1291 cleanStart = i + 1; 1292 } 1293 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as 1294 // not allowing quotes in the URI proper syntax, nor in the fragment 1295 // identifier, we believe that it's OK to double escape quotes. 1296 else if (ch == '"') 1297 { 1298 // If the character is a '%' number number, try to avoid double-escaping. 1299 // There is a question if this is legal behavior. 1300 1301 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded 1302 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. 1303 1304 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) 1305 1306 // We are no longer escaping '%' 1307 1308 if (cleanLength > 0) 1309 { 1310 writer.write(chars, cleanStart, cleanLength); 1311 cleanLength = 0; 1312 } 1313 1314 1315 // Mike Kay encodes this as ", so he may know something I don't? 1316 if (doURLEscaping) 1317 writer.write("%22"); 1318 else 1319 writer.write("""); // we have to escape this, I guess. 1320 1321 // We have written out any clean characters, then the escaped '%' and now we 1322 // We now we reset the next possible clean character. 1323 cleanStart = i + 1; 1324 } 1325 else if (ch == '&') 1326 { 1327 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) 1328 // instead of "&" to avoid confusion with the beginning of a character 1329 // reference (entity reference open delimiter). 1330 if (cleanLength > 0) 1331 { 1332 writer.write(chars, cleanStart, cleanLength); 1333 cleanLength = 0; 1334 } 1335 writer.write("&"); 1336 cleanStart = i + 1; 1337 } 1338 else 1339 { 1340 // no processing for this character, just count how 1341 // many characters in a row that we have that need no processing 1342 cleanLength++; 1343 } 1344 } 1345 1346 // are there any clean characters at the end of the array 1347 // that we haven't processed yet? 1348 if (cleanLength > 1) 1349 { 1350 // if the whole string can be written out as-is do so 1351 // otherwise write out the clean chars at the end of the 1352 // array 1353 if (cleanStart == 0) 1354 writer.write(string); 1355 else 1356 writer.write(chars, cleanStart, cleanLength); 1357 } 1358 else if (cleanLength == 1) 1359 { 1360 // a little optimization for 1 clean character 1361 // (we could have let the previous if(...) handle them all) 1362 writer.write(ch); 1363 } 1364 } 1365 1366 /** 1367 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, 1368 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. 1369 * 1370 * @param string String to convert to XML format. 1371 * @param encoding CURRENTLY NOT IMPLEMENTED. 1372 * 1373 * @throws org.xml.sax.SAXException 1374 */ 1375 public void writeAttrString( 1376 final java.io.Writer writer, String string, String encoding) 1377 throws IOException, SAXException 1378 { 1379 final int end = string.length(); 1380 if (end > m_attrBuff.length) 1381 { 1382 m_attrBuff = new char[end * 2 + 1]; 1383 } 1384 string.getChars(0, end, m_attrBuff, 0); 1385 final char[] chars = m_attrBuff; 1386 1387 1388 1389 int cleanStart = 0; 1390 int cleanLength = 0; 1391 1392 char ch = 0; 1393 for (int i = 0; i < end; i++) 1394 { 1395 ch = chars[i]; 1396 1397 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); 1398 // System.out.println("ch: "+(int)ch); 1399 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); 1400 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); 1401 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) 1402 { 1403 cleanLength++; 1404 } 1405 else if ('<' == ch || '>' == ch) 1406 { 1407 cleanLength++; // no escaping in this case, as specified in 15.2 1408 } 1409 else if ( 1410 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) 1411 { 1412 cleanLength++; // no escaping in this case, as specified in 15.2 1413 } 1414 else 1415 { 1416 if (cleanLength > 0) 1417 { 1418 writer.write(chars,cleanStart,cleanLength); 1419 cleanLength = 0; 1420 } 1421 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); 1422 1423 if (i != pos) 1424 { 1425 i = pos - 1; 1426 } 1427 else 1428 { 1429 if (Encodings.isHighUTF16Surrogate(ch) || 1430 Encodings.isLowUTF16Surrogate(ch)) 1431 { 1432 if (writeUTF16Surrogate(ch, chars, i, end) >= 0) { 1433 // move the index if the low surrogate is consumed 1434 // as writeUTF16Surrogate has written the pair 1435 if (Encodings.isHighUTF16Surrogate(ch)) { 1436 i++; 1437 } 1438 } 1439 } 1440 1441 // The next is kind of a hack to keep from escaping in the case 1442 // of Shift_JIS and the like. 1443 1444 /* 1445 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) 1446 && (ch != 160)) 1447 { 1448 writer.write(ch); // no escaping in this case 1449 } 1450 else 1451 */ 1452 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); 1453 if (null != outputStringForChar) 1454 { 1455 writer.write(outputStringForChar); 1456 } 1457 else if (escapingNotNeeded(ch)) 1458 { 1459 writer.write(ch); // no escaping in this case 1460 } 1461 else 1462 { 1463 writer.write("&#"); 1464 writer.write(Integer.toString(ch)); 1465 writer.write(';'); 1466 } 1467 } 1468 cleanStart = i + 1; 1469 } 1470 } // end of for() 1471 1472 // are there any clean characters at the end of the array 1473 // that we haven't processed yet? 1474 if (cleanLength > 1) 1475 { 1476 // if the whole string can be written out as-is do so 1477 // otherwise write out the clean chars at the end of the 1478 // array 1479 if (cleanStart == 0) 1480 writer.write(string); 1481 else 1482 writer.write(chars, cleanStart, cleanLength); 1483 } 1484 else if (cleanLength == 1) 1485 { 1486 // a little optimization for 1 clean character 1487 // (we could have let the previous if(...) handle them all) 1488 writer.write(ch); 1489 } 1490 } 1491 1492 1493 1494 /** 1495 * Receive notification of character data. 1496 * 1497 * <p>The Parser will call this method to report each chunk of 1498 * character data. SAX parsers may return all contiguous character 1499 * data in a single chunk, or they may split it into several 1500 * chunks; however, all of the characters in any single event 1501 * must come from the same external entity, so that the Locator 1502 * provides useful information.</p> 1503 * 1504 * <p>The application must not attempt to read from the array 1505 * outside of the specified range.</p> 1506 * 1507 * <p>Note that some parsers will report whitespace using the 1508 * ignorableWhitespace() method rather than this one (validating 1509 * parsers must do so).</p> 1510 * 1511 * @param chars The characters from the XML document. 1512 * @param start The start position in the array. 1513 * @param length The number of characters to read from the array. 1514 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1515 * wrapping another exception. 1516 * @see #ignorableWhitespace 1517 * @see org.xml.sax.Locator 1518 * 1519 * @throws org.xml.sax.SAXException 1520 */ 1521 public final void characters(char chars[], int start, int length) 1522 throws org.xml.sax.SAXException 1523 { 1524 1525 if (m_elemContext.m_isRaw) 1526 { 1527 try 1528 { 1529 if (m_elemContext.m_startTagOpen) 1530 { 1531 closeStartTag(); 1532 m_elemContext.m_startTagOpen = false; 1533 } 1534 1535 // With m_ispreserve just set true it looks like shouldIndent() 1536 // will always return false, so drop any possible indentation. 1537 // if (shouldIndent()) 1538 // indent(); 1539 1540 // writer.write("<![CDATA["); 1541 // writer.write(chars, start, length); 1542 writeNormalizedChars(chars, start, length, false, m_lineSepUse); 1543 m_isprevtext = true; 1544 // writer.write("]]>"); 1545 1546 // time to generate characters event 1547 if (m_tracer != null) 1548 super.fireCharEvent(chars, start, length); 1549 1550 return; 1551 } 1552 catch (IOException ioe) 1553 { 1554 throw new org.xml.sax.SAXException( 1555 Utils.messages.createMessage( 1556 MsgKey.ER_OIERROR, 1557 null), 1558 ioe); 1559 //"IO error", ioe); 1560 } 1561 } 1562 else 1563 { 1564 super.characters(chars, start, length); 1565 } 1566 } 1567 1568 /** 1569 * Receive notification of cdata. 1570 * 1571 * <p>The Parser will call this method to report each chunk of 1572 * character data. SAX parsers may return all contiguous character 1573 * data in a single chunk, or they may split it into several 1574 * chunks; however, all of the characters in any single event 1575 * must come from the same external entity, so that the Locator 1576 * provides useful information.</p> 1577 * 1578 * <p>The application must not attempt to read from the array 1579 * outside of the specified range.</p> 1580 * 1581 * <p>Note that some parsers will report whitespace using the 1582 * ignorableWhitespace() method rather than this one (validating 1583 * parsers must do so).</p> 1584 * 1585 * @param ch The characters from the XML document. 1586 * @param start The start position in the array. 1587 * @param length The number of characters to read from the array. 1588 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1589 * wrapping another exception. 1590 * @see #ignorableWhitespace 1591 * @see org.xml.sax.Locator 1592 * 1593 * @throws org.xml.sax.SAXException 1594 */ 1595 public final void cdata(char ch[], int start, int length) 1596 throws org.xml.sax.SAXException 1597 { 1598 if ((null != m_elemContext.m_elementName) 1599 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") 1600 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) 1601 { 1602 try 1603 { 1604 if (m_elemContext.m_startTagOpen) 1605 { 1606 closeStartTag(); 1607 m_elemContext.m_startTagOpen = false; 1608 } 1609 1610 if (shouldIndent()) 1611 indent(); 1612 1613 // writer.write(ch, start, length); 1614 writeNormalizedChars(ch, start, length, true, m_lineSepUse); 1615 } 1616 catch (IOException ioe) 1617 { 1618 throw new org.xml.sax.SAXException( 1619 Utils.messages.createMessage( 1620 MsgKey.ER_OIERROR, 1621 null), 1622 ioe); 1623 //"IO error", ioe); 1624 } 1625 } 1626 else 1627 { 1628 super.cdata(ch, start, length); 1629 } 1630 } 1631 1632 /** 1633 * Receive notification of a processing instruction. 1634 * 1635 * @param target The processing instruction target. 1636 * @param data The processing instruction data, or null if 1637 * none was supplied. 1638 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1639 * wrapping another exception. 1640 * 1641 * @throws org.xml.sax.SAXException 1642 */ 1643 public void processingInstruction(String target, String data) 1644 throws org.xml.sax.SAXException 1645 { 1646 if (m_doIndent) { 1647 m_childNodeNum++; 1648 flushCharactersBuffer(); 1649 } 1650 // Process any pending starDocument and startElement first. 1651 flushPending(); 1652 1653 // Use a fairly nasty hack to tell if the next node is supposed to be 1654 // unescaped text. 1655 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) 1656 { 1657 startNonEscaping(); 1658 } 1659 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) 1660 { 1661 endNonEscaping(); 1662 } 1663 else 1664 { 1665 try 1666 { 1667 if (m_elemContext.m_startTagOpen) 1668 { 1669 closeStartTag(); 1670 m_elemContext.m_startTagOpen = false; 1671 } 1672 else if (m_needToCallStartDocument) 1673 startDocumentInternal(); 1674 1675 if (shouldIndent()) 1676 indent(); 1677 1678 final java.io.Writer writer = m_writer; 1679 //writer.write("<?" + target); 1680 writer.write("<?"); 1681 writer.write(target); 1682 1683 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) 1684 writer.write(' '); 1685 1686 //writer.write(data + ">"); // different from XML 1687 writer.write(data); // different from XML 1688 writer.write('>'); // different from XML 1689 1690 // Always output a newline char if not inside of an 1691 // element. The whitespace is not significant in that 1692 // case. 1693 if (m_elemContext.m_currentElemDepth <= 0) 1694 outputLineSep(); 1695 1696 m_startNewLine = true; 1697 } 1698 catch(IOException e) 1699 { 1700 throw new SAXException(e); 1701 } 1702 } 1703 1704 // now generate the PI event 1705 if (m_tracer != null) 1706 super.fireEscapingEvent(target, data); 1707 } 1708 1709 /** 1710 * Receive notivication of a entityReference. 1711 * 1712 * @param name non-null reference to entity name string. 1713 * 1714 * @throws org.xml.sax.SAXException 1715 */ 1716 public final void entityReference(String name) 1717 throws org.xml.sax.SAXException 1718 { 1719 try 1720 { 1721 1722 final java.io.Writer writer = m_writer; 1723 writer.write('&'); 1724 writer.write(name); 1725 writer.write(';'); 1726 1727 } catch(IOException e) 1728 { 1729 throw new SAXException(e); 1730 } 1731 } 1732 /** 1733 * @see ExtendedContentHandler#endElement(String) 1734 */ 1735 public final void endElement(String elemName) throws SAXException 1736 { 1737 endElement(null, null, elemName); 1738 } 1739 1740 /** 1741 * Process the attributes, which means to write out the currently 1742 * collected attributes to the writer. The attributes are not 1743 * cleared by this method 1744 * 1745 * @param writer the writer to write processed attributes to. 1746 * @param nAttrs the number of attributes in m_attributes 1747 * to be processed 1748 * 1749 * @throws org.xml.sax.SAXException 1750 */ 1751 public void processAttributes(java.io.Writer writer, int nAttrs) 1752 throws IOException,SAXException 1753 { 1754 /* 1755 * process the collected attributes 1756 */ 1757 for (int i = 0; i < nAttrs; i++) 1758 { 1759 processAttribute( 1760 writer, 1761 m_attributes.getQName(i), 1762 m_attributes.getValue(i), 1763 m_elemContext.m_elementDesc); 1764 } 1765 } 1766 1767 /** 1768 * For the enclosing elements starting tag write out out any attributes 1769 * followed by ">" 1770 * 1771 *@throws org.xml.sax.SAXException 1772 */ 1773 protected void closeStartTag() throws SAXException 1774 { 1775 try 1776 { 1777 1778 // finish processing attributes, time to fire off the start element event 1779 if (m_tracer != null) 1780 super.fireStartElem(m_elemContext.m_elementName); 1781 1782 int nAttrs = m_attributes.getLength(); 1783 if (nAttrs>0) 1784 { 1785 processAttributes(m_writer, nAttrs); 1786 // clear attributes object for re-use with next element 1787 m_attributes.clear(); 1788 } 1789 1790 m_writer.write('>'); 1791 1792 /* whether Xalan or XSLTC, we have the prefix mappings now, so 1793 * lets determine if the current element is specified in the cdata- 1794 * section-elements list. 1795 */ 1796 if (m_StringOfCDATASections != null) 1797 m_elemContext.m_isCdataSection = isCdataSection(); 1798 1799 } 1800 catch(IOException e) 1801 { 1802 throw new SAXException(e); 1803 } 1804 } 1805 1806 /** 1807 * This method is used when a prefix/uri namespace mapping 1808 * is indicated after the element was started with a 1809 * startElement() and before and endElement(). 1810 * startPrefixMapping(prefix,uri) would be used before the 1811 * startElement() call. 1812 * @param uri the URI of the namespace 1813 * @param prefix the prefix associated with the given URI. 1814 * 1815 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) 1816 */ 1817 public void namespaceAfterStartElement(String prefix, String uri) 1818 throws SAXException 1819 { 1820 // hack for XSLTC with finding URI for default namespace 1821 if (m_elemContext.m_elementURI == null) 1822 { 1823 String prefix1 = getPrefixPart(m_elemContext.m_elementName); 1824 if (prefix1 == null && EMPTYSTRING.equals(prefix)) 1825 { 1826 // the elements URI is not known yet, and it 1827 // doesn't have a prefix, and we are currently 1828 // setting the uri for prefix "", so we have 1829 // the uri for the element... lets remember it 1830 m_elemContext.m_elementURI = uri; 1831 } 1832 } 1833 startPrefixMapping(prefix,uri,false); 1834 } 1835 1836 public void startDTD(String name, String publicId, String systemId) 1837 throws SAXException 1838 { 1839 m_inDTD = true; 1840 super.startDTD(name, publicId, systemId); 1841 } 1842 1843 /** 1844 * Report the end of DTD declarations. 1845 * @throws org.xml.sax.SAXException The application may raise an exception. 1846 * @see #startDTD 1847 */ 1848 public void endDTD() throws org.xml.sax.SAXException 1849 { 1850 m_inDTD = false; 1851 /* for ToHTMLStream the DOCTYPE is entirely output in the 1852 * startDocumentInternal() method, so don't do anything here 1853 */ 1854 } 1855 /** 1856 * This method does nothing. 1857 */ 1858 public void attributeDecl( 1859 String eName, 1860 String aName, 1861 String type, 1862 String valueDefault, 1863 String value) 1864 throws SAXException 1865 { 1866 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1867 } 1868 1869 /** 1870 * This method does nothing. 1871 */ 1872 public void elementDecl(String name, String model) throws SAXException 1873 { 1874 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1875 } 1876 /** 1877 * This method does nothing. 1878 */ 1879 public void internalEntityDecl(String name, String value) 1880 throws SAXException 1881 { 1882 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1883 } 1884 /** 1885 * This method does nothing. 1886 */ 1887 public void externalEntityDecl( 1888 String name, 1889 String publicId, 1890 String systemId) 1891 throws SAXException 1892 { 1893 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1894 } 1895 1896 /** 1897 * This method is used to add an attribute to the currently open element. 1898 * The caller has guaranted that this attribute is unique, which means that it 1899 * not been seen before and will not be seen again. 1900 * 1901 * @param name the qualified name of the attribute 1902 * @param value the value of the attribute which can contain only 1903 * ASCII printable characters characters in the range 32 to 127 inclusive. 1904 * @param flags the bit values of this integer give optimization information. 1905 */ 1906 public void addUniqueAttribute(String name, String value, int flags) 1907 throws SAXException 1908 { 1909 try 1910 { 1911 final java.io.Writer writer = m_writer; 1912 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) 1913 { 1914 // "flags" has indicated that the characters 1915 // '>' '<' '&' and '"' are not in the value and 1916 // m_htmlcharInfo has recorded that there are no other 1917 // entities in the range 0 to 127 so we write out the 1918 // value directly 1919 writer.write(' '); 1920 writer.write(name); 1921 writer.write("=\""); 1922 writer.write(value); 1923 writer.write('"'); 1924 } 1925 else if ( 1926 (flags & HTML_ATTREMPTY) > 0 1927 && (value.length() == 0 || value.equalsIgnoreCase(name))) 1928 { 1929 writer.write(' '); 1930 writer.write(name); 1931 } 1932 else 1933 { 1934 writer.write(' '); 1935 writer.write(name); 1936 writer.write("=\""); 1937 if ((flags & HTML_ATTRURL) > 0) 1938 { 1939 writeAttrURI(writer, value, m_specialEscapeURLs); 1940 } 1941 else 1942 { 1943 writeAttrString(writer, value, this.getEncoding()); 1944 } 1945 writer.write('"'); 1946 } 1947 } catch (IOException e) { 1948 throw new SAXException(e); 1949 } 1950 } 1951 1952 public void comment(char ch[], int start, int length) 1953 throws SAXException 1954 { 1955 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1956 if (m_inDTD) 1957 return; 1958 super.comment(ch, start, length); 1959 } 1960 1961 public boolean reset() 1962 { 1963 boolean ret = super.reset(); 1964 if (!ret) 1965 return false; 1966 initToHTMLStream(); 1967 return true; 1968 } 1969 1970 private void initToHTMLStream() 1971 { 1972 m_isprevblock = false; 1973 m_inDTD = false; 1974 m_omitMetaTag = false; 1975 m_specialEscapeURLs = true; 1976 } 1977 1978 static class Trie 1979 { 1980 /** 1981 * A digital search trie for 7-bit ASCII text 1982 * The API is a subset of java.util.Hashtable 1983 * The key must be a 7-bit ASCII string 1984 * The value may be any Java Object 1985 * One can get an object stored in a trie from its key, 1986 * but the search is either case sensitive or case 1987 * insensitive to the characters in the key, and this 1988 * choice of sensitivity or insensitivity is made when 1989 * the Trie is created, before any objects are put in it. 1990 * 1991 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. 1992 * It exists to cut the serializers dependancy on that package. 1993 * 1994 * @xsl.usage internal 1995 */ 1996 1997 /** Size of the m_nextChar array. */ 1998 public static final int ALPHA_SIZE = 128; 1999 2000 /** The root node of the tree. */ 2001 final Node m_Root; 2002 2003 /** helper buffer to convert Strings to char arrays */ 2004 private char[] m_charBuffer = new char[0]; 2005 2006 /** true if the search for an object is lower case only with the key */ 2007 private final boolean m_lowerCaseOnly; 2008 2009 /** 2010 * Construct the trie that has a case insensitive search. 2011 */ 2012 public Trie() 2013 { 2014 m_Root = new Node(); 2015 m_lowerCaseOnly = false; 2016 } 2017 2018 /** 2019 * Construct the trie given the desired case sensitivity with the key. 2020 * @param lowerCaseOnly true if the search keys are to be loser case only, 2021 * not case insensitive. 2022 */ 2023 public Trie(boolean lowerCaseOnly) 2024 { 2025 m_Root = new Node(); 2026 m_lowerCaseOnly = lowerCaseOnly; 2027 } 2028 2029 /** 2030 * Put an object into the trie for lookup. 2031 * 2032 * @param key must be a 7-bit ASCII string 2033 * @param value any java object. 2034 * 2035 * @return The old object that matched key, or null. 2036 */ 2037 public Object put(String key, Object value) 2038 { 2039 2040 final int len = key.length(); 2041 if (len > m_charBuffer.length) 2042 { 2043 // make the biggest buffer ever needed in get(String) 2044 m_charBuffer = new char[len]; 2045 } 2046 2047 Node node = m_Root; 2048 2049 for (int i = 0; i < len; i++) 2050 { 2051 Node nextNode = 2052 node.m_nextChar[Character.toLowerCase(key.charAt(i))]; 2053 2054 if (nextNode != null) 2055 { 2056 node = nextNode; 2057 } 2058 else 2059 { 2060 for (; i < len; i++) 2061 { 2062 Node newNode = new Node(); 2063 if (m_lowerCaseOnly) 2064 { 2065 // put this value into the tree only with a lower case key 2066 node.m_nextChar[Character.toLowerCase( 2067 key.charAt(i))] = 2068 newNode; 2069 } 2070 else 2071 { 2072 // put this value into the tree with a case insensitive key 2073 node.m_nextChar[Character.toUpperCase( 2074 key.charAt(i))] = 2075 newNode; 2076 node.m_nextChar[Character.toLowerCase( 2077 key.charAt(i))] = 2078 newNode; 2079 } 2080 node = newNode; 2081 } 2082 break; 2083 } 2084 } 2085 2086 Object ret = node.m_Value; 2087 2088 node.m_Value = value; 2089 2090 return ret; 2091 } 2092 2093 /** 2094 * Get an object that matches the key. 2095 * 2096 * @param key must be a 7-bit ASCII string 2097 * 2098 * @return The object that matches the key, or null. 2099 */ 2100 public Object get(final String key) 2101 { 2102 2103 final int len = key.length(); 2104 2105 /* If the name is too long, we won't find it, this also keeps us 2106 * from overflowing m_charBuffer 2107 */ 2108 if (m_charBuffer.length < len) 2109 return null; 2110 2111 Node node = m_Root; 2112 switch (len) // optimize the look up based on the number of chars 2113 { 2114 // case 0 looks silly, but the generated bytecode runs 2115 // faster for lookup of elements of length 2 with this in 2116 // and a fair bit faster. Don't know why. 2117 case 0 : 2118 { 2119 return null; 2120 } 2121 2122 case 1 : 2123 { 2124 final char ch = key.charAt(0); 2125 if (ch < ALPHA_SIZE) 2126 { 2127 node = node.m_nextChar[ch]; 2128 if (node != null) 2129 return node.m_Value; 2130 } 2131 return null; 2132 } 2133 // comment out case 2 because the default is faster 2134 // case 2 : 2135 // { 2136 // final char ch0 = key.charAt(0); 2137 // final char ch1 = key.charAt(1); 2138 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) 2139 // { 2140 // node = node.m_nextChar[ch0]; 2141 // if (node != null) 2142 // { 2143 // 2144 // if (ch1 < ALPHA_SIZE) 2145 // { 2146 // node = node.m_nextChar[ch1]; 2147 // if (node != null) 2148 // return node.m_Value; 2149 // } 2150 // } 2151 // } 2152 // return null; 2153 // } 2154 default : 2155 { 2156 for (int i = 0; i < len; i++) 2157 { 2158 // A thread-safe way to loop over the characters 2159 final char ch = key.charAt(i); 2160 if (ALPHA_SIZE <= ch) 2161 { 2162 // the key is not 7-bit ASCII so we won't find it here 2163 return null; 2164 } 2165 2166 node = node.m_nextChar[ch]; 2167 if (node == null) 2168 return null; 2169 } 2170 2171 return node.m_Value; 2172 } 2173 } 2174 } 2175 2176 /** 2177 * The node representation for the trie. 2178 * @xsl.usage internal 2179 */ 2180 private class Node 2181 { 2182 2183 /** 2184 * Constructor, creates a Node[ALPHA_SIZE]. 2185 */ 2186 Node() 2187 { 2188 m_nextChar = new Node[ALPHA_SIZE]; 2189 m_Value = null; 2190 } 2191 2192 /** The next nodes. */ 2193 final Node m_nextChar[]; 2194 2195 /** The value. */ 2196 Object m_Value; 2197 } 2198 /** 2199 * Construct the trie from another Trie. 2200 * Both the existing Trie and this new one share the same table for 2201 * lookup, and it is assumed that the table is fully populated and 2202 * not changing anymore. 2203 * 2204 * @param existingTrie the Trie that this one is a copy of. 2205 */ 2206 public Trie(Trie existingTrie) 2207 { 2208 // copy some fields from the existing Trie into this one. 2209 m_Root = existingTrie.m_Root; 2210 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; 2211 2212 // get a buffer just big enough to hold the longest key in the table. 2213 int max = existingTrie.getLongestKeyLength(); 2214 m_charBuffer = new char[max]; 2215 } 2216 2217 /** 2218 * Get an object that matches the key. 2219 * This method is faster than get(), but is not thread-safe. 2220 * 2221 * @param key must be a 7-bit ASCII string 2222 * 2223 * @return The object that matches the key, or null. 2224 */ 2225 public Object get2(final String key) 2226 { 2227 2228 final int len = key.length(); 2229 2230 /* If the name is too long, we won't find it, this also keeps us 2231 * from overflowing m_charBuffer 2232 */ 2233 if (m_charBuffer.length < len) 2234 return null; 2235 2236 Node node = m_Root; 2237 switch (len) // optimize the look up based on the number of chars 2238 { 2239 // case 0 looks silly, but the generated bytecode runs 2240 // faster for lookup of elements of length 2 with this in 2241 // and a fair bit faster. Don't know why. 2242 case 0 : 2243 { 2244 return null; 2245 } 2246 2247 case 1 : 2248 { 2249 final char ch = key.charAt(0); 2250 if (ch < ALPHA_SIZE) 2251 { 2252 node = node.m_nextChar[ch]; 2253 if (node != null) 2254 return node.m_Value; 2255 } 2256 return null; 2257 } 2258 default : 2259 { 2260 /* Copy string into array. This is not thread-safe because 2261 * it modifies the contents of m_charBuffer. If multiple 2262 * threads were to use this Trie they all would be 2263 * using this same array (not good). So this 2264 * method is not thread-safe, but it is faster because 2265 * converting to a char[] and looping over elements of 2266 * the array is faster than a String's charAt(i). 2267 */ 2268 key.getChars(0, len, m_charBuffer, 0); 2269 2270 for (int i = 0; i < len; i++) 2271 { 2272 final char ch = m_charBuffer[i]; 2273 if (ALPHA_SIZE <= ch) 2274 { 2275 // the key is not 7-bit ASCII so we won't find it here 2276 return null; 2277 } 2278 2279 node = node.m_nextChar[ch]; 2280 if (node == null) 2281 return null; 2282 } 2283 2284 return node.m_Value; 2285 } 2286 } 2287 } 2288 2289 /** 2290 * Get the length of the longest key used in the table. 2291 */ 2292 public int getLongestKeyLength() 2293 { 2294 return m_charBuffer.length; 2295 } 2296 } 2297 }