1 /* 2 * Copyright (c) 2014, 2016 Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.serializer; 22 23 import java.io.IOException; 24 import java.util.Properties; 25 26 import javax.xml.transform.Result; 27 28 import org.xml.sax.Attributes; 29 import org.xml.sax.SAXException; 30 31 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 32 import com.sun.org.apache.xml.internal.serializer.utils.Utils; 33 34 /** 35 * This serializer takes a series of SAX or 36 * SAX-like events and writes its output 37 * to the given stream. 38 * 39 * This class is not a public API, it is public 40 * because it is used from another package. 41 * 42 * @xsl.usage internal 43 */ 44 public final class ToHTMLStream extends ToStream 45 { 46 47 /** This flag is set while receiving events from the DTD */ 48 protected boolean m_inDTD = false; 49 50 /** True if the previous element is a block element. */ 51 private boolean m_isprevblock = false; 52 53 /** 54 * Map that tells which XML characters should have special treatment, and it 55 * provides character to entity name lookup. 56 */ 57 private static final CharInfo m_htmlcharInfo = 58 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); 59 CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); 60 61 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ 62 static final Trie m_elementFlags = new Trie(); 63 64 static { 65 initTagReference(m_elementFlags); 66 } 67 static void initTagReference(Trie m_elementFlags) { 68 69 // HTML 4.0 loose DTD 70 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); 71 m_elementFlags.put( 72 "FRAME", 73 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 74 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); 75 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); 76 m_elementFlags.put( 77 "ISINDEX", 78 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 79 m_elementFlags.put( 80 "APPLET", 81 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); 82 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); 83 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); 84 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); 85 86 // HTML 4.0 strict DTD 87 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 88 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 89 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 90 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 91 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 92 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); 93 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); 94 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); 95 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); 96 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); 97 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); 98 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); 99 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); 100 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); 101 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); 102 m_elementFlags.put( 103 "SUP", 104 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 105 m_elementFlags.put( 106 "SUB", 107 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 108 m_elementFlags.put( 109 "SPAN", 110 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 111 m_elementFlags.put( 112 "BDO", 113 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 114 m_elementFlags.put( 115 "BR", 116 new ElemDesc( 117 0 118 | ElemDesc.SPECIAL 119 | ElemDesc.ASPECIAL 120 | ElemDesc.EMPTY 121 | ElemDesc.BLOCK)); 122 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); 123 m_elementFlags.put( 124 "ADDRESS", 125 new ElemDesc( 126 0 127 | ElemDesc.BLOCK 128 | ElemDesc.BLOCKFORM 129 | ElemDesc.BLOCKFORMFIELDSET)); 130 m_elementFlags.put( 131 "DIV", 132 new ElemDesc( 133 0 134 | ElemDesc.BLOCK 135 | ElemDesc.BLOCKFORM 136 | ElemDesc.BLOCKFORMFIELDSET)); 137 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); 138 m_elementFlags.put( 139 "MAP", 140 new ElemDesc( 141 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); 142 m_elementFlags.put( 143 "AREA", 144 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 145 m_elementFlags.put( 146 "LINK", 147 new ElemDesc( 148 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 149 m_elementFlags.put( 150 "IMG", 151 new ElemDesc( 152 0 153 | ElemDesc.SPECIAL 154 | ElemDesc.ASPECIAL 155 | ElemDesc.EMPTY 156 | ElemDesc.WHITESPACESENSITIVE)); 157 m_elementFlags.put( 158 "OBJECT", 159 new ElemDesc( 160 0 161 | ElemDesc.SPECIAL 162 | ElemDesc.ASPECIAL 163 | ElemDesc.HEADMISC 164 | ElemDesc.WHITESPACESENSITIVE)); 165 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); 166 m_elementFlags.put( 167 "HR", 168 new ElemDesc( 169 0 170 | ElemDesc.BLOCK 171 | ElemDesc.BLOCKFORM 172 | ElemDesc.BLOCKFORMFIELDSET 173 | ElemDesc.EMPTY)); 174 m_elementFlags.put( 175 "P", 176 new ElemDesc( 177 0 178 | ElemDesc.BLOCK 179 | ElemDesc.BLOCKFORM 180 | ElemDesc.BLOCKFORMFIELDSET)); 181 m_elementFlags.put( 182 "H1", 183 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 184 m_elementFlags.put( 185 "H2", 186 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 187 m_elementFlags.put( 188 "H3", 189 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 190 m_elementFlags.put( 191 "H4", 192 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 193 m_elementFlags.put( 194 "H5", 195 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 196 m_elementFlags.put( 197 "H6", 198 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 199 m_elementFlags.put( 200 "PRE", 201 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); 202 m_elementFlags.put( 203 "Q", 204 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 205 m_elementFlags.put( 206 "BLOCKQUOTE", 207 new ElemDesc( 208 0 209 | ElemDesc.BLOCK 210 | ElemDesc.BLOCKFORM 211 | ElemDesc.BLOCKFORMFIELDSET)); 212 m_elementFlags.put("INS", new ElemDesc(0)); 213 m_elementFlags.put("DEL", new ElemDesc(0)); 214 m_elementFlags.put( 215 "DL", 216 new ElemDesc( 217 0 218 | ElemDesc.BLOCK 219 | ElemDesc.BLOCKFORM 220 | ElemDesc.BLOCKFORMFIELDSET)); 221 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); 222 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); 223 m_elementFlags.put( 224 "OL", 225 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 226 m_elementFlags.put( 227 "UL", 228 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 229 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); 230 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); 231 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); 232 m_elementFlags.put( 233 "INPUT", 234 new ElemDesc( 235 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); 236 m_elementFlags.put( 237 "SELECT", 238 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 239 m_elementFlags.put("OPTGROUP", new ElemDesc(0)); 240 m_elementFlags.put("OPTION", new ElemDesc(0)); 241 m_elementFlags.put( 242 "TEXTAREA", 243 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 244 m_elementFlags.put( 245 "FIELDSET", 246 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); 247 m_elementFlags.put("LEGEND", new ElemDesc(0)); 248 m_elementFlags.put( 249 "BUTTON", 250 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 251 m_elementFlags.put( 252 "TABLE", 253 new ElemDesc( 254 0 255 | ElemDesc.BLOCK 256 | ElemDesc.BLOCKFORM 257 | ElemDesc.BLOCKFORMFIELDSET)); 258 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); 259 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); 260 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); 261 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); 262 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); 263 m_elementFlags.put( 264 "COL", 265 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 266 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); 267 m_elementFlags.put("TH", new ElemDesc(0)); 268 m_elementFlags.put("TD", new ElemDesc(0)); 269 m_elementFlags.put( 270 "HEAD", 271 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); 272 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); 273 m_elementFlags.put( 274 "BASE", 275 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 276 m_elementFlags.put( 277 "META", 278 new ElemDesc( 279 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 280 m_elementFlags.put( 281 "STYLE", 282 new ElemDesc( 283 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); 284 m_elementFlags.put( 285 "SCRIPT", 286 new ElemDesc( 287 0 288 | ElemDesc.SPECIAL 289 | ElemDesc.ASPECIAL 290 | ElemDesc.HEADMISC 291 | ElemDesc.RAW)); 292 m_elementFlags.put( 293 "NOSCRIPT", 294 new ElemDesc( 295 0 296 | ElemDesc.BLOCK 297 | ElemDesc.BLOCKFORM 298 | ElemDesc.BLOCKFORMFIELDSET)); 299 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); 300 301 // From "John Ky" <hand@syd.speednet.com.au 302 // Transitional Document Type Definition () 303 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont 304 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 305 306 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE 307 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 308 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 309 310 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U 311 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 312 313 // From "John Ky" <hand@syd.speednet.com.au 314 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 315 316 // HTML 4.0, section 16.5 317 m_elementFlags.put( 318 "IFRAME", 319 new ElemDesc( 320 0 321 | ElemDesc.BLOCK 322 | ElemDesc.BLOCKFORM 323 | ElemDesc.BLOCKFORMFIELDSET)); 324 325 // Netscape 4 extension 326 m_elementFlags.put( 327 "LAYER", 328 new ElemDesc( 329 0 330 | ElemDesc.BLOCK 331 | ElemDesc.BLOCKFORM 332 | ElemDesc.BLOCKFORMFIELDSET)); 333 // Netscape 4 extension 334 m_elementFlags.put( 335 "ILAYER", 336 new ElemDesc( 337 0 338 | ElemDesc.BLOCK 339 | ElemDesc.BLOCKFORM 340 | ElemDesc.BLOCKFORMFIELDSET)); 341 342 343 // NOW FOR ATTRIBUTE INFORMATION . . . 344 ElemDesc elemDesc; 345 346 347 // ---------------------------------------------- 348 elemDesc = (ElemDesc) m_elementFlags.get("a"); 349 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 350 elemDesc.setAttr("NAME", ElemDesc.ATTRURL); 351 352 // ---------------------------------------------- 353 elemDesc = (ElemDesc) m_elementFlags.get("area"); 354 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 355 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); 356 357 // ---------------------------------------------- 358 elemDesc = (ElemDesc) m_elementFlags.get("base"); 359 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 360 361 // ---------------------------------------------- 362 elemDesc = (ElemDesc) m_elementFlags.get("button"); 363 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 364 365 // ---------------------------------------------- 366 elemDesc = (ElemDesc) m_elementFlags.get("blockquote"); 367 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 368 369 // ---------------------------------------------- 370 elemDesc = (ElemDesc) m_elementFlags.get("del"); 371 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 372 373 // ---------------------------------------------- 374 elemDesc = (ElemDesc) m_elementFlags.get("dir"); 375 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 376 377 // ---------------------------------------------- 378 379 elemDesc = (ElemDesc) m_elementFlags.get("div"); 380 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension 381 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 382 383 // ---------------------------------------------- 384 elemDesc = (ElemDesc) m_elementFlags.get("dl"); 385 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 386 387 // ---------------------------------------------- 388 elemDesc = (ElemDesc) m_elementFlags.get("form"); 389 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); 390 391 // ---------------------------------------------- 392 // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> 393 elemDesc = (ElemDesc) m_elementFlags.get("frame"); 394 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 395 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 396 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); 397 398 // ---------------------------------------------- 399 elemDesc = (ElemDesc) m_elementFlags.get("head"); 400 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); 401 402 // ---------------------------------------------- 403 elemDesc = (ElemDesc) m_elementFlags.get("hr"); 404 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); 405 406 // ---------------------------------------------- 407 // HTML 4.0, section 16.5 408 elemDesc = (ElemDesc) m_elementFlags.get("iframe"); 409 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 410 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 411 412 // ---------------------------------------------- 413 // Netscape 4 extension 414 elemDesc = (ElemDesc) m_elementFlags.get("ilayer"); 415 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 416 417 // ---------------------------------------------- 418 elemDesc = (ElemDesc) m_elementFlags.get("img"); 419 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 420 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 421 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 422 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 423 424 // ---------------------------------------------- 425 elemDesc = (ElemDesc) m_elementFlags.get("input"); 426 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 427 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 428 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); 429 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 430 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 431 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 432 433 // ---------------------------------------------- 434 elemDesc = (ElemDesc) m_elementFlags.get("ins"); 435 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 436 437 // ---------------------------------------------- 438 // Netscape 4 extension 439 elemDesc = (ElemDesc) m_elementFlags.get("layer"); 440 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 441 442 // ---------------------------------------------- 443 elemDesc = (ElemDesc) m_elementFlags.get("link"); 444 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 445 446 // ---------------------------------------------- 447 elemDesc = (ElemDesc) m_elementFlags.get("menu"); 448 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 449 450 // ---------------------------------------------- 451 elemDesc = (ElemDesc) m_elementFlags.get("object"); 452 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); 453 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); 454 elemDesc.setAttr("DATA", ElemDesc.ATTRURL); 455 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); 456 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 457 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); 458 459 // ---------------------------------------------- 460 elemDesc = (ElemDesc) m_elementFlags.get("ol"); 461 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 462 463 // ---------------------------------------------- 464 elemDesc = (ElemDesc) m_elementFlags.get("optgroup"); 465 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 466 467 // ---------------------------------------------- 468 elemDesc = (ElemDesc) m_elementFlags.get("option"); 469 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); 470 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 471 472 // ---------------------------------------------- 473 elemDesc = (ElemDesc) m_elementFlags.get("q"); 474 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 475 476 // ---------------------------------------------- 477 elemDesc = (ElemDesc) m_elementFlags.get("script"); 478 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 479 elemDesc.setAttr("FOR", ElemDesc.ATTRURL); 480 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); 481 482 // ---------------------------------------------- 483 elemDesc = (ElemDesc) m_elementFlags.get("select"); 484 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 485 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); 486 487 // ---------------------------------------------- 488 elemDesc = (ElemDesc) m_elementFlags.get("table"); 489 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 490 491 // ---------------------------------------------- 492 elemDesc = (ElemDesc) m_elementFlags.get("td"); 493 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 494 495 // ---------------------------------------------- 496 elemDesc = (ElemDesc) m_elementFlags.get("textarea"); 497 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 498 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 499 500 // ---------------------------------------------- 501 elemDesc = (ElemDesc) m_elementFlags.get("th"); 502 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 503 504 // ---------------------------------------------- 505 // The nowrap attribute of a tr element is both 506 // a Netscape and Internet-Explorer extension 507 elemDesc = (ElemDesc) m_elementFlags.get("tr"); 508 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 509 510 // ---------------------------------------------- 511 elemDesc = (ElemDesc) m_elementFlags.get("ul"); 512 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 513 } 514 515 /** 516 * Dummy element for elements not found. 517 */ 518 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); 519 520 /** True if URLs should be specially escaped with the %xx form. */ 521 private boolean m_specialEscapeURLs = true; 522 523 /** True if the META tag should be omitted. */ 524 private boolean m_omitMetaTag = false; 525 526 /** 527 * Tells if the formatter should use special URL escaping. 528 * 529 * @param bool True if URLs should be specially escaped with the %xx form. 530 */ 531 public void setSpecialEscapeURLs(boolean bool) 532 { 533 m_specialEscapeURLs = bool; 534 } 535 536 /** 537 * Tells if the formatter should omit the META tag. 538 * 539 * @param bool True if the META tag should be omitted. 540 */ 541 public void setOmitMetaTag(boolean bool) 542 { 543 m_omitMetaTag = bool; 544 } 545 546 /** 547 * Specifies an output format for this serializer. It the 548 * serializer has already been associated with an output format, 549 * it will switch to the new format. This method should not be 550 * called while the serializer is in the process of serializing 551 * a document. 552 * 553 * This method can be called multiple times before starting 554 * the serialization of a particular result-tree. In principle 555 * all serialization parameters can be changed, with the exception 556 * of method="html" (it must be method="html" otherwise we 557 * shouldn't even have a ToHTMLStream object here!) 558 * 559 * @param format The output format or serialzation parameters 560 * to use. 561 */ 562 public void setOutputFormat(Properties format) 563 { 564 565 m_specialEscapeURLs = 566 OutputPropertyUtils.getBooleanProperty( 567 OutputPropertiesFactory.S_USE_URL_ESCAPING, 568 format); 569 570 m_omitMetaTag = 571 OutputPropertyUtils.getBooleanProperty( 572 OutputPropertiesFactory.S_OMIT_META_TAG, 573 format); 574 575 super.setOutputFormat(format); 576 } 577 578 /** 579 * Tells if the formatter should use special URL escaping. 580 * 581 * @return True if URLs should be specially escaped with the %xx form. 582 */ 583 private final boolean getSpecialEscapeURLs() 584 { 585 return m_specialEscapeURLs; 586 } 587 588 /** 589 * Tells if the formatter should omit the META tag. 590 * 591 * @return True if the META tag should be omitted. 592 */ 593 private final boolean getOmitMetaTag() 594 { 595 return m_omitMetaTag; 596 } 597 598 /** 599 * Get a description of the given element. 600 * 601 * @param name non-null name of element, case insensitive. 602 * 603 * @return non-null reference to ElemDesc, which may be m_dummy if no 604 * element description matches the given name. 605 */ 606 public static final ElemDesc getElemDesc(String name) 607 { 608 /* this method used to return m_dummy when name was null 609 * but now it doesn't check and and requires non-null name. 610 */ 611 Object obj = m_elementFlags.get(name); 612 if (null != obj) 613 return (ElemDesc)obj; 614 return m_dummy; 615 } 616 617 /** 618 * A Trie that is just a copy of the "static" one. 619 * We need this one to be able to use the faster, but not thread-safe 620 * method Trie.get2(name) 621 */ 622 private Trie m_htmlInfo = new Trie(m_elementFlags); 623 /** 624 * Calls to this method could be replaced with calls to 625 * getElemDesc(name), but this one should be faster. 626 */ 627 private ElemDesc getElemDesc2(String name) 628 { 629 Object obj = m_htmlInfo.get2(name); 630 if (null != obj) 631 return (ElemDesc)obj; 632 return m_dummy; 633 } 634 635 /** 636 * Default constructor. 637 */ 638 public ToHTMLStream() 639 { 640 641 super(); 642 m_charInfo = m_htmlcharInfo; 643 // initialize namespaces 644 m_prefixMap = new NamespaceMappings(); 645 646 } 647 648 /** The name of the current element. */ 649 // private String m_currentElementName = null; 650 651 /** 652 * Receive notification of the beginning of a document. 653 * 654 * @throws org.xml.sax.SAXException Any SAX exception, possibly 655 * wrapping another exception. 656 * 657 * @throws org.xml.sax.SAXException 658 */ 659 protected void startDocumentInternal() throws org.xml.sax.SAXException 660 { 661 super.startDocumentInternal(); 662 663 m_needToCallStartDocument = false; 664 m_needToOutputDocTypeDecl = true; 665 m_startNewLine = false; 666 setOmitXMLDeclaration(true); 667 668 if (true == m_needToOutputDocTypeDecl) 669 { 670 String doctypeSystem = getDoctypeSystem(); 671 String doctypePublic = getDoctypePublic(); 672 if ((null != doctypeSystem) || (null != doctypePublic)) 673 { 674 final java.io.Writer writer = m_writer; 675 try 676 { 677 writer.write("<!DOCTYPE html"); 678 679 if (null != doctypePublic) 680 { 681 writer.write(" PUBLIC \""); 682 writer.write(doctypePublic); 683 writer.write('"'); 684 } 685 686 if (null != doctypeSystem) 687 { 688 if (null == doctypePublic) 689 writer.write(" SYSTEM \""); 690 else 691 writer.write(" \""); 692 693 writer.write(doctypeSystem); 694 writer.write('"'); 695 } 696 697 writer.write('>'); 698 outputLineSep(); 699 } 700 catch(IOException e) 701 { 702 throw new SAXException(e); 703 } 704 } 705 } 706 707 m_needToOutputDocTypeDecl = false; 708 } 709 710 /** 711 * Receive notification of the end of a document. 712 * 713 * @throws org.xml.sax.SAXException Any SAX exception, possibly 714 * wrapping another exception. 715 * 716 * @throws org.xml.sax.SAXException 717 */ 718 public final void endDocument() throws org.xml.sax.SAXException 719 { 720 flushCharactersBuffer(); 721 flushPending(); 722 if (m_doIndent && !m_isprevtext) 723 { 724 try 725 { 726 outputLineSep(); 727 } 728 catch(IOException e) 729 { 730 throw new SAXException(e); 731 } 732 } 733 734 flushWriter(); 735 if (m_tracer != null) 736 super.fireEndDoc(); 737 } 738 739 /** 740 * If the previous is an inline element, won't insert a new line before the 741 * text. 742 * 743 */ 744 protected boolean shouldIndentForText() { 745 return super.shouldIndentForText() && m_isprevblock; 746 } 747 748 /** 749 * Only check m_doIndent, disregard m_ispreserveSpace. 750 * 751 * @return True if the content should be formatted. 752 */ 753 protected boolean shouldFormatOutput() { 754 return m_doIndent; 755 } 756 757 /** 758 * Receive notification of the beginning of an element. 759 * 760 * 761 * @param namespaceURI 762 * @param localName 763 * @param name 764 * The element type name. 765 * @param atts 766 * The attributes attached to the element, if any. 767 * @throws org.xml.sax.SAXException 768 * Any SAX exception, possibly wrapping another exception. 769 * @see #endElement 770 * @see org.xml.sax.AttributeList 771 */ 772 public void startElement( 773 String namespaceURI, 774 String localName, 775 String name, 776 Attributes atts) 777 throws SAXException 778 { 779 // will add extra one if having namespace but no matter 780 m_childNodeNum++; 781 flushCharactersBuffer(); 782 ElemContext elemContext = m_elemContext; 783 784 // clean up any pending things first 785 if (elemContext.m_startTagOpen) 786 { 787 closeStartTag(); 788 elemContext.m_startTagOpen = false; 789 } 790 else if (m_cdataTagOpen) 791 { 792 closeCDATA(); 793 m_cdataTagOpen = false; 794 } 795 else if (m_needToCallStartDocument) 796 { 797 startDocumentInternal(); 798 m_needToCallStartDocument = false; 799 } 800 801 802 // if this element has a namespace then treat it like XML 803 if (null != namespaceURI && namespaceURI.length() > 0) 804 { 805 super.startElement(namespaceURI, localName, name, atts); 806 807 return; 808 } 809 810 try 811 { 812 // getElemDesc2(name) is faster than getElemDesc(name) 813 ElemDesc elemDesc = getElemDesc2(name); 814 int elemFlags = elemDesc.getFlags(); 815 816 // deal with indentation issues first 817 if (m_doIndent) 818 { 819 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 820 if ((elemContext.m_elementName != null) 821 // If this element is a block element, 822 // or if this is not a block element, then if the 823 // previous is neither a text nor an inline 824 && (isBlockElement || (!(m_isprevtext || !m_isprevblock)))) 825 { 826 m_startNewLine = true; 827 828 indent(); 829 } 830 m_isprevblock = isBlockElement; 831 } 832 833 // save any attributes for later processing 834 if (atts != null) 835 addAttributes(atts); 836 837 m_isprevtext = false; 838 final java.io.Writer writer = m_writer; 839 writer.write('<'); 840 writer.write(name); 841 842 m_childNodeNumStack.push(m_childNodeNum); 843 m_childNodeNum = 0; 844 845 if (m_tracer != null) 846 firePseudoAttributes(); 847 848 if ((elemFlags & ElemDesc.EMPTY) != 0) 849 { 850 // an optimization for elements which are expected 851 // to be empty. 852 m_elemContext = elemContext.push(); 853 /* XSLTC sometimes calls namespaceAfterStartElement() 854 * so we need to remember the name 855 */ 856 m_elemContext.m_elementName = name; 857 m_elemContext.m_elementDesc = elemDesc; 858 return; 859 } 860 else 861 { 862 elemContext = elemContext.push(namespaceURI,localName,name); 863 m_elemContext = elemContext; 864 elemContext.m_elementDesc = elemDesc; 865 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; 866 867 // set m_startNewLine for the next element 868 if (m_doIndent) { 869 // elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(), 870 // in this branch m_elemContext.m_elementName is not null 871 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 872 if (isBlockElement) 873 m_startNewLine = true; 874 } 875 } 876 877 878 if ((elemFlags & ElemDesc.HEADELEM) != 0) 879 { 880 // This is the <HEAD> element, do some special processing 881 closeStartTag(); 882 elemContext.m_startTagOpen = false; 883 if (!m_omitMetaTag) 884 { 885 if (m_doIndent) 886 indent(); 887 writer.write( 888 "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); 889 String encoding = getEncoding(); 890 String encode = Encodings.getMimeEncoding(encoding); 891 writer.write(encode); 892 writer.write("\">"); 893 } 894 } 895 } 896 catch (IOException e) 897 { 898 throw new SAXException(e); 899 } 900 } 901 902 /** 903 * Receive notification of the end of an element. 904 * 905 * 906 * @param namespaceURI 907 * @param localName 908 * @param name The element type name 909 * @throws org.xml.sax.SAXException Any SAX exception, possibly 910 * wrapping another exception. 911 */ 912 public final void endElement( 913 final String namespaceURI, 914 final String localName, 915 final String name) 916 throws org.xml.sax.SAXException 917 { 918 flushCharactersBuffer(); 919 // deal with any pending issues 920 if (m_cdataTagOpen) 921 closeCDATA(); 922 923 // if the element has a namespace, treat it like XML, not HTML 924 if (null != namespaceURI && namespaceURI.length() > 0) 925 { 926 super.endElement(namespaceURI, localName, name); 927 928 return; 929 } 930 931 try 932 { 933 934 ElemContext elemContext = m_elemContext; 935 final ElemDesc elemDesc = elemContext.m_elementDesc; 936 final int elemFlags = elemDesc.getFlags(); 937 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; 938 939 // deal with any indentation issues 940 if (m_doIndent) 941 { 942 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; 943 boolean shouldIndent = false; 944 945 // If this element is a block element, 946 // or if this is not a block element, then if the previous is 947 // neither a text nor an inline 948 if (isBlockElement || (!(m_isprevtext || !m_isprevblock))) 949 { 950 m_startNewLine = true; 951 shouldIndent = true; 952 } 953 if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext)) 954 indent(elemContext.m_currentElemDepth - 1); 955 956 m_isprevblock = isBlockElement; 957 } 958 959 final java.io.Writer writer = m_writer; 960 if (!elemContext.m_startTagOpen) 961 { 962 writer.write("</"); 963 writer.write(name); 964 writer.write('>'); 965 } 966 else 967 { 968 // the start-tag open when this method was called, 969 // so we need to process it now. 970 971 if (m_tracer != null) 972 super.fireStartElem(name); 973 974 // the starting tag was still open when we received this endElement() call 975 // so we need to process any gathered attributes NOW, before they go away. 976 int nAttrs = m_attributes.getLength(); 977 if (nAttrs > 0) 978 { 979 processAttributes(m_writer, nAttrs); 980 // clear attributes object for re-use with next element 981 m_attributes.clear(); 982 } 983 if (!elemEmpty) 984 { 985 // As per Dave/Paul recommendation 12/06/2000 986 // if (shouldIndent) 987 // writer.write('>'); 988 // indent(m_currentIndent); 989 990 writer.write("></"); 991 writer.write(name); 992 writer.write('>'); 993 } 994 else 995 { 996 writer.write('>'); 997 } 998 } 999 1000 m_childNodeNum = m_childNodeNumStack.pop(); 1001 // clean up because the element has ended 1002 if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0) 1003 m_ispreserve = true; 1004 m_isprevtext = false; 1005 1006 // fire off the end element event 1007 if (m_tracer != null) 1008 super.fireEndElem(name); 1009 1010 // OPTIMIZE-EMPTY 1011 if (elemEmpty) 1012 { 1013 // a quick exit if the HTML element had no children. 1014 // This block of code can be removed if the corresponding block of code 1015 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed 1016 m_elemContext = elemContext.m_prev; 1017 return; 1018 } 1019 1020 // some more clean because the element has ended. 1021 if (!elemContext.m_startTagOpen) 1022 { 1023 if (m_doIndent && !m_preserves.isEmpty()) 1024 m_preserves.pop(); 1025 } 1026 m_elemContext = elemContext.m_prev; 1027 // m_isRawStack.pop(); 1028 } 1029 catch (IOException e) 1030 { 1031 throw new SAXException(e); 1032 } 1033 } 1034 1035 /** 1036 * Process an attribute. 1037 * @param writer The writer to write the processed output to. 1038 * @param name The name of the attribute. 1039 * @param value The value of the attribute. 1040 * @param elemDesc The description of the HTML element 1041 * that has this attribute. 1042 * 1043 * @throws org.xml.sax.SAXException 1044 */ 1045 protected void processAttribute( 1046 java.io.Writer writer, 1047 String name, 1048 String value, 1049 ElemDesc elemDesc) 1050 throws IOException 1051 { 1052 writer.write(' '); 1053 1054 if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) 1055 && elemDesc != null 1056 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) 1057 { 1058 writer.write(name); 1059 } 1060 else 1061 { 1062 // %REVIEW% %OPT% 1063 // Two calls to single-char write may NOT 1064 // be more efficient than one to string-write... 1065 writer.write(name); 1066 writer.write("=\""); 1067 if ( elemDesc != null 1068 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) 1069 writeAttrURI(writer, value, m_specialEscapeURLs); 1070 else 1071 writeAttrString(writer, value, this.getEncoding()); 1072 writer.write('"'); 1073 1074 } 1075 } 1076 1077 /** 1078 * Tell if a character is an ASCII digit. 1079 */ 1080 private boolean isASCIIDigit(char c) 1081 { 1082 return (c >= '0' && c <= '9'); 1083 } 1084 1085 /** 1086 * Make an integer into an HH hex value. 1087 * Does no checking on the size of the input, since this 1088 * is only meant to be used locally by writeAttrURI. 1089 * 1090 * @param i must be a value less than 255. 1091 * 1092 * @return should be a two character string. 1093 */ 1094 private static String makeHHString(int i) 1095 { 1096 String s = Integer.toHexString(i).toUpperCase(); 1097 if (s.length() == 1) 1098 { 1099 s = "0" + s; 1100 } 1101 return s; 1102 } 1103 1104 /** 1105 * Dmitri Ilyin: Makes sure if the String is HH encoded sign. 1106 * @param str must be 2 characters long 1107 * 1108 * @return true or false 1109 */ 1110 private boolean isHHSign(String str) 1111 { 1112 boolean sign = true; 1113 try 1114 { 1115 char r = (char) Integer.parseInt(str, 16); 1116 } 1117 catch (NumberFormatException e) 1118 { 1119 sign = false; 1120 } 1121 return sign; 1122 } 1123 1124 /** 1125 * Write the specified <var>string</var> after substituting non ASCII characters, 1126 * with <CODE>%HH</CODE>, where HH is the hex of the byte value. 1127 * 1128 * @param string String to convert to XML format. 1129 * @param doURLEscaping True if we should try to encode as 1130 * per http://www.ietf.org/rfc/rfc2396.txt. 1131 * 1132 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. 1133 */ 1134 public void writeAttrURI( 1135 final java.io.Writer writer, String string, boolean doURLEscaping) 1136 throws IOException 1137 { 1138 // http://www.ietf.org/rfc/rfc2396.txt says: 1139 // A URI is always in an "escaped" form, since escaping or unescaping a 1140 // completed URI might change its semantics. Normally, the only time 1141 // escape encodings can safely be made is when the URI is being created 1142 // from its component parts; each component may have its own set of 1143 // characters that are reserved, so only the mechanism responsible for 1144 // generating or interpreting that component can determine whether or 1145 // not escaping a character will change its semantics. Likewise, a URI 1146 // must be separated into its components before the escaped characters 1147 // within those components can be safely decoded. 1148 // 1149 // ...So we do our best to do limited escaping of the URL, without 1150 // causing damage. If the URL is already properly escaped, in theory, this 1151 // function should not change the string value. 1152 1153 final int end = string.length(); 1154 if (end > m_attrBuff.length) 1155 { 1156 m_attrBuff = new char[end*2 + 1]; 1157 } 1158 string.getChars(0,end, m_attrBuff, 0); 1159 final char[] chars = m_attrBuff; 1160 1161 int cleanStart = 0; 1162 int cleanLength = 0; 1163 1164 1165 char ch = 0; 1166 for (int i = 0; i < end; i++) 1167 { 1168 ch = chars[i]; 1169 1170 if ((ch < 32) || (ch > 126)) 1171 { 1172 if (cleanLength > 0) 1173 { 1174 writer.write(chars, cleanStart, cleanLength); 1175 cleanLength = 0; 1176 } 1177 if (doURLEscaping) 1178 { 1179 // Encode UTF16 to UTF8. 1180 // Reference is Unicode, A Primer, by Tony Graham. 1181 // Page 92. 1182 1183 // Note that Kay doesn't escape 0x20... 1184 // if(ch == 0x20) // Not sure about this... -sb 1185 // { 1186 // writer.write(ch); 1187 // } 1188 // else 1189 if (ch <= 0x7F) 1190 { 1191 writer.write('%'); 1192 writer.write(makeHHString(ch)); 1193 } 1194 else if (ch <= 0x7FF) 1195 { 1196 // Clear low 6 bits before rotate, put high 4 bits in low byte, 1197 // and set two high bits. 1198 int high = (ch >> 6) | 0xC0; 1199 int low = (ch & 0x3F) | 0x80; 1200 // First 6 bits, + high bit 1201 writer.write('%'); 1202 writer.write(makeHHString(high)); 1203 writer.write('%'); 1204 writer.write(makeHHString(low)); 1205 } 1206 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate 1207 { 1208 // I'm sure this can be done in 3 instructions, but I choose 1209 // to try and do it exactly like it is done in the book, at least 1210 // until we are sure this is totally clean. I don't think performance 1211 // is a big issue with this particular function, though I could be 1212 // wrong. Also, the stuff below clearly does more masking than 1213 // it needs to do. 1214 1215 // Clear high 6 bits. 1216 int highSurrogate = ((int) ch) & 0x03FF; 1217 1218 // Middle 4 bits (wwww) + 1 1219 // "Note that the value of wwww from the high surrogate bit pattern 1220 // is incremented to make the uuuuu bit pattern in the scalar value 1221 // so the surrogate pair don't address the BMP." 1222 int wwww = ((highSurrogate & 0x03C0) >> 6); 1223 int uuuuu = wwww + 1; 1224 1225 // next 4 bits 1226 int zzzz = (highSurrogate & 0x003C) >> 2; 1227 1228 // low 2 bits 1229 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; 1230 1231 // Get low surrogate character. 1232 ch = chars[++i]; 1233 1234 // Clear high 6 bits. 1235 int lowSurrogate = ((int) ch) & 0x03FF; 1236 1237 // put the middle 4 bits into the bottom of yyyyyy (byte 3) 1238 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); 1239 1240 // bottom 6 bits. 1241 int xxxxxx = (lowSurrogate & 0x003F); 1242 1243 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu 1244 int byte2 = 1245 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; 1246 int byte3 = 0x80 | yyyyyy; 1247 int byte4 = 0x80 | xxxxxx; 1248 1249 writer.write('%'); 1250 writer.write(makeHHString(byte1)); 1251 writer.write('%'); 1252 writer.write(makeHHString(byte2)); 1253 writer.write('%'); 1254 writer.write(makeHHString(byte3)); 1255 writer.write('%'); 1256 writer.write(makeHHString(byte4)); 1257 } 1258 else 1259 { 1260 int high = (ch >> 12) | 0xE0; // top 4 bits 1261 int middle = ((ch & 0x0FC0) >> 6) | 0x80; 1262 // middle 6 bits 1263 int low = (ch & 0x3F) | 0x80; 1264 // First 6 bits, + high bit 1265 writer.write('%'); 1266 writer.write(makeHHString(high)); 1267 writer.write('%'); 1268 writer.write(makeHHString(middle)); 1269 writer.write('%'); 1270 writer.write(makeHHString(low)); 1271 } 1272 1273 } 1274 else if (escapingNotNeeded(ch)) 1275 { 1276 writer.write(ch); 1277 } 1278 else 1279 { 1280 writer.write("&#"); 1281 writer.write(Integer.toString(ch)); 1282 writer.write(';'); 1283 } 1284 // In this character range we have first written out any previously accumulated 1285 // "clean" characters, then processed the current more complicated character, 1286 // which may have incremented "i". 1287 // We now we reset the next possible clean character. 1288 cleanStart = i + 1; 1289 } 1290 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as 1291 // not allowing quotes in the URI proper syntax, nor in the fragment 1292 // identifier, we believe that it's OK to double escape quotes. 1293 else if (ch == '"') 1294 { 1295 // If the character is a '%' number number, try to avoid double-escaping. 1296 // There is a question if this is legal behavior. 1297 1298 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded 1299 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. 1300 1301 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) 1302 1303 // We are no longer escaping '%' 1304 1305 if (cleanLength > 0) 1306 { 1307 writer.write(chars, cleanStart, cleanLength); 1308 cleanLength = 0; 1309 } 1310 1311 1312 // Mike Kay encodes this as ", so he may know something I don't? 1313 if (doURLEscaping) 1314 writer.write("%22"); 1315 else 1316 writer.write("""); // we have to escape this, I guess. 1317 1318 // We have written out any clean characters, then the escaped '%' and now we 1319 // We now we reset the next possible clean character. 1320 cleanStart = i + 1; 1321 } 1322 else if (ch == '&') 1323 { 1324 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) 1325 // instead of "&" to avoid confusion with the beginning of a character 1326 // reference (entity reference open delimiter). 1327 if (cleanLength > 0) 1328 { 1329 writer.write(chars, cleanStart, cleanLength); 1330 cleanLength = 0; 1331 } 1332 writer.write("&"); 1333 cleanStart = i + 1; 1334 } 1335 else 1336 { 1337 // no processing for this character, just count how 1338 // many characters in a row that we have that need no processing 1339 cleanLength++; 1340 } 1341 } 1342 1343 // are there any clean characters at the end of the array 1344 // that we haven't processed yet? 1345 if (cleanLength > 1) 1346 { 1347 // if the whole string can be written out as-is do so 1348 // otherwise write out the clean chars at the end of the 1349 // array 1350 if (cleanStart == 0) 1351 writer.write(string); 1352 else 1353 writer.write(chars, cleanStart, cleanLength); 1354 } 1355 else if (cleanLength == 1) 1356 { 1357 // a little optimization for 1 clean character 1358 // (we could have let the previous if(...) handle them all) 1359 writer.write(ch); 1360 } 1361 } 1362 1363 /** 1364 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, 1365 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. 1366 * 1367 * @param string String to convert to XML format. 1368 * @param encoding CURRENTLY NOT IMPLEMENTED. 1369 * 1370 * @throws org.xml.sax.SAXException 1371 */ 1372 public void writeAttrString( 1373 final java.io.Writer writer, String string, String encoding) 1374 throws IOException 1375 { 1376 final int end = string.length(); 1377 if (end > m_attrBuff.length) 1378 { 1379 m_attrBuff = new char[end * 2 + 1]; 1380 } 1381 string.getChars(0, end, m_attrBuff, 0); 1382 final char[] chars = m_attrBuff; 1383 1384 1385 1386 int cleanStart = 0; 1387 int cleanLength = 0; 1388 1389 char ch = 0; 1390 for (int i = 0; i < end; i++) 1391 { 1392 ch = chars[i]; 1393 1394 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); 1395 // System.out.println("ch: "+(int)ch); 1396 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); 1397 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); 1398 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) 1399 { 1400 cleanLength++; 1401 } 1402 else if ('<' == ch || '>' == ch) 1403 { 1404 cleanLength++; // no escaping in this case, as specified in 15.2 1405 } 1406 else if ( 1407 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) 1408 { 1409 cleanLength++; // no escaping in this case, as specified in 15.2 1410 } 1411 else 1412 { 1413 if (cleanLength > 0) 1414 { 1415 writer.write(chars,cleanStart,cleanLength); 1416 cleanLength = 0; 1417 } 1418 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); 1419 1420 if (i != pos) 1421 { 1422 i = pos - 1; 1423 } 1424 else 1425 { 1426 if (Encodings.isHighUTF16Surrogate(ch)) 1427 { 1428 1429 writeUTF16Surrogate(ch, chars, i, end); 1430 i++; // two input characters processed 1431 // this increments by one and the for() 1432 // loop itself increments by another one. 1433 } 1434 1435 // The next is kind of a hack to keep from escaping in the case 1436 // of Shift_JIS and the like. 1437 1438 /* 1439 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) 1440 && (ch != 160)) 1441 { 1442 writer.write(ch); // no escaping in this case 1443 } 1444 else 1445 */ 1446 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); 1447 if (null != outputStringForChar) 1448 { 1449 writer.write(outputStringForChar); 1450 } 1451 else if (escapingNotNeeded(ch)) 1452 { 1453 writer.write(ch); // no escaping in this case 1454 } 1455 else 1456 { 1457 writer.write("&#"); 1458 writer.write(Integer.toString(ch)); 1459 writer.write(';'); 1460 } 1461 } 1462 cleanStart = i + 1; 1463 } 1464 } // end of for() 1465 1466 // are there any clean characters at the end of the array 1467 // that we haven't processed yet? 1468 if (cleanLength > 1) 1469 { 1470 // if the whole string can be written out as-is do so 1471 // otherwise write out the clean chars at the end of the 1472 // array 1473 if (cleanStart == 0) 1474 writer.write(string); 1475 else 1476 writer.write(chars, cleanStart, cleanLength); 1477 } 1478 else if (cleanLength == 1) 1479 { 1480 // a little optimization for 1 clean character 1481 // (we could have let the previous if(...) handle them all) 1482 writer.write(ch); 1483 } 1484 } 1485 1486 1487 1488 /** 1489 * Receive notification of character data. 1490 * 1491 * <p>The Parser will call this method to report each chunk of 1492 * character data. SAX parsers may return all contiguous character 1493 * data in a single chunk, or they may split it into several 1494 * chunks; however, all of the characters in any single event 1495 * must come from the same external entity, so that the Locator 1496 * provides useful information.</p> 1497 * 1498 * <p>The application must not attempt to read from the array 1499 * outside of the specified range.</p> 1500 * 1501 * <p>Note that some parsers will report whitespace using the 1502 * ignorableWhitespace() method rather than this one (validating 1503 * parsers must do so).</p> 1504 * 1505 * @param chars The characters from the XML document. 1506 * @param start The start position in the array. 1507 * @param length The number of characters to read from the array. 1508 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1509 * wrapping another exception. 1510 * @see #ignorableWhitespace 1511 * @see org.xml.sax.Locator 1512 * 1513 * @throws org.xml.sax.SAXException 1514 */ 1515 public final void characters(char chars[], int start, int length) 1516 throws org.xml.sax.SAXException 1517 { 1518 1519 if (m_elemContext.m_isRaw) 1520 { 1521 try 1522 { 1523 if (m_elemContext.m_startTagOpen) 1524 { 1525 closeStartTag(); 1526 m_elemContext.m_startTagOpen = false; 1527 } 1528 m_ispreserve = true; 1529 1530 // With m_ispreserve just set true it looks like shouldIndent() 1531 // will always return false, so drop any possible indentation. 1532 // if (shouldIndent()) 1533 // indent(); 1534 1535 // writer.write("<![CDATA["); 1536 // writer.write(chars, start, length); 1537 writeNormalizedChars(chars, start, length, false, m_lineSepUse); 1538 m_isprevtext = true; 1539 // writer.write("]]>"); 1540 1541 // time to generate characters event 1542 if (m_tracer != null) 1543 super.fireCharEvent(chars, start, length); 1544 1545 return; 1546 } 1547 catch (IOException ioe) 1548 { 1549 throw new org.xml.sax.SAXException( 1550 Utils.messages.createMessage( 1551 MsgKey.ER_OIERROR, 1552 null), 1553 ioe); 1554 //"IO error", ioe); 1555 } 1556 } 1557 else 1558 { 1559 super.characters(chars, start, length); 1560 } 1561 } 1562 1563 /** 1564 * Receive notification of cdata. 1565 * 1566 * <p>The Parser will call this method to report each chunk of 1567 * character data. SAX parsers may return all contiguous character 1568 * data in a single chunk, or they may split it into several 1569 * chunks; however, all of the characters in any single event 1570 * must come from the same external entity, so that the Locator 1571 * provides useful information.</p> 1572 * 1573 * <p>The application must not attempt to read from the array 1574 * outside of the specified range.</p> 1575 * 1576 * <p>Note that some parsers will report whitespace using the 1577 * ignorableWhitespace() method rather than this one (validating 1578 * parsers must do so).</p> 1579 * 1580 * @param ch The characters from the XML document. 1581 * @param start The start position in the array. 1582 * @param length The number of characters to read from the array. 1583 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1584 * wrapping another exception. 1585 * @see #ignorableWhitespace 1586 * @see org.xml.sax.Locator 1587 * 1588 * @throws org.xml.sax.SAXException 1589 */ 1590 public final void cdata(char ch[], int start, int length) 1591 throws org.xml.sax.SAXException 1592 { 1593 if ((null != m_elemContext.m_elementName) 1594 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") 1595 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) 1596 { 1597 try 1598 { 1599 if (m_elemContext.m_startTagOpen) 1600 { 1601 closeStartTag(); 1602 m_elemContext.m_startTagOpen = false; 1603 } 1604 1605 m_ispreserve = true; 1606 1607 if (shouldIndent()) 1608 indent(); 1609 1610 // writer.write(ch, start, length); 1611 writeNormalizedChars(ch, start, length, true, m_lineSepUse); 1612 } 1613 catch (IOException ioe) 1614 { 1615 throw new org.xml.sax.SAXException( 1616 Utils.messages.createMessage( 1617 MsgKey.ER_OIERROR, 1618 null), 1619 ioe); 1620 //"IO error", ioe); 1621 } 1622 } 1623 else 1624 { 1625 super.cdata(ch, start, length); 1626 } 1627 } 1628 1629 /** 1630 * Receive notification of a processing instruction. 1631 * 1632 * @param target The processing instruction target. 1633 * @param data The processing instruction data, or null if 1634 * none was supplied. 1635 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1636 * wrapping another exception. 1637 * 1638 * @throws org.xml.sax.SAXException 1639 */ 1640 public void processingInstruction(String target, String data) 1641 throws org.xml.sax.SAXException 1642 { 1643 m_childNodeNum++; 1644 flushCharactersBuffer(); 1645 // Process any pending starDocument and startElement first. 1646 flushPending(); 1647 1648 // Use a fairly nasty hack to tell if the next node is supposed to be 1649 // unescaped text. 1650 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) 1651 { 1652 startNonEscaping(); 1653 } 1654 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) 1655 { 1656 endNonEscaping(); 1657 } 1658 else 1659 { 1660 try 1661 { 1662 if (m_elemContext.m_startTagOpen) 1663 { 1664 closeStartTag(); 1665 m_elemContext.m_startTagOpen = false; 1666 } 1667 else if (m_needToCallStartDocument) 1668 startDocumentInternal(); 1669 1670 if (shouldIndent()) 1671 indent(); 1672 1673 final java.io.Writer writer = m_writer; 1674 //writer.write("<?" + target); 1675 writer.write("<?"); 1676 writer.write(target); 1677 1678 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) 1679 writer.write(' '); 1680 1681 //writer.write(data + ">"); // different from XML 1682 writer.write(data); // different from XML 1683 writer.write('>'); // different from XML 1684 1685 // Always output a newline char if not inside of an 1686 // element. The whitespace is not significant in that 1687 // case. 1688 if (m_elemContext.m_currentElemDepth <= 0) 1689 outputLineSep(); 1690 1691 m_startNewLine = true; 1692 } 1693 catch(IOException e) 1694 { 1695 throw new SAXException(e); 1696 } 1697 } 1698 1699 // now generate the PI event 1700 if (m_tracer != null) 1701 super.fireEscapingEvent(target, data); 1702 } 1703 1704 /** 1705 * Receive notivication of a entityReference. 1706 * 1707 * @param name non-null reference to entity name string. 1708 * 1709 * @throws org.xml.sax.SAXException 1710 */ 1711 public final void entityReference(String name) 1712 throws org.xml.sax.SAXException 1713 { 1714 try 1715 { 1716 1717 final java.io.Writer writer = m_writer; 1718 writer.write('&'); 1719 writer.write(name); 1720 writer.write(';'); 1721 1722 } catch(IOException e) 1723 { 1724 throw new SAXException(e); 1725 } 1726 } 1727 /** 1728 * @see ExtendedContentHandler#endElement(String) 1729 */ 1730 public final void endElement(String elemName) throws SAXException 1731 { 1732 endElement(null, null, elemName); 1733 } 1734 1735 /** 1736 * Process the attributes, which means to write out the currently 1737 * collected attributes to the writer. The attributes are not 1738 * cleared by this method 1739 * 1740 * @param writer the writer to write processed attributes to. 1741 * @param nAttrs the number of attributes in m_attributes 1742 * to be processed 1743 * 1744 * @throws org.xml.sax.SAXException 1745 */ 1746 public void processAttributes(java.io.Writer writer, int nAttrs) 1747 throws IOException,SAXException 1748 { 1749 /* 1750 * process the collected attributes 1751 */ 1752 for (int i = 0; i < nAttrs; i++) 1753 { 1754 processAttribute( 1755 writer, 1756 m_attributes.getQName(i), 1757 m_attributes.getValue(i), 1758 m_elemContext.m_elementDesc); 1759 } 1760 } 1761 1762 /** 1763 * For the enclosing elements starting tag write out out any attributes 1764 * followed by ">" 1765 * 1766 *@throws org.xml.sax.SAXException 1767 */ 1768 protected void closeStartTag() throws SAXException 1769 { 1770 try 1771 { 1772 1773 // finish processing attributes, time to fire off the start element event 1774 if (m_tracer != null) 1775 super.fireStartElem(m_elemContext.m_elementName); 1776 1777 int nAttrs = m_attributes.getLength(); 1778 if (nAttrs>0) 1779 { 1780 processAttributes(m_writer, nAttrs); 1781 // clear attributes object for re-use with next element 1782 m_attributes.clear(); 1783 } 1784 1785 m_writer.write('>'); 1786 1787 /* whether Xalan or XSLTC, we have the prefix mappings now, so 1788 * lets determine if the current element is specified in the cdata- 1789 * section-elements list. 1790 */ 1791 if (m_StringOfCDATASections != null) 1792 m_elemContext.m_isCdataSection = isCdataSection(); 1793 if (m_doIndent) 1794 { 1795 m_isprevtext = false; 1796 m_preserves.push(m_ispreserve); 1797 } 1798 1799 } 1800 catch(IOException e) 1801 { 1802 throw new SAXException(e); 1803 } 1804 } 1805 1806 /** 1807 * This method is used when a prefix/uri namespace mapping 1808 * is indicated after the element was started with a 1809 * startElement() and before and endElement(). 1810 * startPrefixMapping(prefix,uri) would be used before the 1811 * startElement() call. 1812 * @param uri the URI of the namespace 1813 * @param prefix the prefix associated with the given URI. 1814 * 1815 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) 1816 */ 1817 public void namespaceAfterStartElement(String prefix, String uri) 1818 throws SAXException 1819 { 1820 // hack for XSLTC with finding URI for default namespace 1821 if (m_elemContext.m_elementURI == null) 1822 { 1823 String prefix1 = getPrefixPart(m_elemContext.m_elementName); 1824 if (prefix1 == null && EMPTYSTRING.equals(prefix)) 1825 { 1826 // the elements URI is not known yet, and it 1827 // doesn't have a prefix, and we are currently 1828 // setting the uri for prefix "", so we have 1829 // the uri for the element... lets remember it 1830 m_elemContext.m_elementURI = uri; 1831 } 1832 } 1833 startPrefixMapping(prefix,uri,false); 1834 } 1835 1836 public void startDTD(String name, String publicId, String systemId) 1837 throws SAXException 1838 { 1839 m_inDTD = true; 1840 super.startDTD(name, publicId, systemId); 1841 } 1842 1843 /** 1844 * Report the end of DTD declarations. 1845 * @throws org.xml.sax.SAXException The application may raise an exception. 1846 * @see #startDTD 1847 */ 1848 public void endDTD() throws org.xml.sax.SAXException 1849 { 1850 m_inDTD = false; 1851 /* for ToHTMLStream the DOCTYPE is entirely output in the 1852 * startDocumentInternal() method, so don't do anything here 1853 */ 1854 } 1855 /** 1856 * This method does nothing. 1857 */ 1858 public void attributeDecl( 1859 String eName, 1860 String aName, 1861 String type, 1862 String valueDefault, 1863 String value) 1864 throws SAXException 1865 { 1866 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1867 } 1868 1869 /** 1870 * This method does nothing. 1871 */ 1872 public void elementDecl(String name, String model) throws SAXException 1873 { 1874 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1875 } 1876 /** 1877 * This method does nothing. 1878 */ 1879 public void internalEntityDecl(String name, String value) 1880 throws SAXException 1881 { 1882 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1883 } 1884 /** 1885 * This method does nothing. 1886 */ 1887 public void externalEntityDecl( 1888 String name, 1889 String publicId, 1890 String systemId) 1891 throws SAXException 1892 { 1893 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1894 } 1895 1896 /** 1897 * This method is used to add an attribute to the currently open element. 1898 * The caller has guaranted that this attribute is unique, which means that it 1899 * not been seen before and will not be seen again. 1900 * 1901 * @param name the qualified name of the attribute 1902 * @param value the value of the attribute which can contain only 1903 * ASCII printable characters characters in the range 32 to 127 inclusive. 1904 * @param flags the bit values of this integer give optimization information. 1905 */ 1906 public void addUniqueAttribute(String name, String value, int flags) 1907 throws SAXException 1908 { 1909 try 1910 { 1911 final java.io.Writer writer = m_writer; 1912 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) 1913 { 1914 // "flags" has indicated that the characters 1915 // '>' '<' '&' and '"' are not in the value and 1916 // m_htmlcharInfo has recorded that there are no other 1917 // entities in the range 0 to 127 so we write out the 1918 // value directly 1919 writer.write(' '); 1920 writer.write(name); 1921 writer.write("=\""); 1922 writer.write(value); 1923 writer.write('"'); 1924 } 1925 else if ( 1926 (flags & HTML_ATTREMPTY) > 0 1927 && (value.length() == 0 || value.equalsIgnoreCase(name))) 1928 { 1929 writer.write(' '); 1930 writer.write(name); 1931 } 1932 else 1933 { 1934 writer.write(' '); 1935 writer.write(name); 1936 writer.write("=\""); 1937 if ((flags & HTML_ATTRURL) > 0) 1938 { 1939 writeAttrURI(writer, value, m_specialEscapeURLs); 1940 } 1941 else 1942 { 1943 writeAttrString(writer, value, this.getEncoding()); 1944 } 1945 writer.write('"'); 1946 } 1947 } catch (IOException e) { 1948 throw new SAXException(e); 1949 } 1950 } 1951 1952 public void comment(char ch[], int start, int length) 1953 throws SAXException 1954 { 1955 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1956 if (m_inDTD) 1957 return; 1958 super.comment(ch, start, length); 1959 } 1960 1961 public boolean reset() 1962 { 1963 boolean ret = super.reset(); 1964 if (!ret) 1965 return false; 1966 initToHTMLStream(); 1967 return true; 1968 } 1969 1970 private void initToHTMLStream() 1971 { 1972 m_isprevblock = false; 1973 m_inDTD = false; 1974 m_omitMetaTag = false; 1975 m_specialEscapeURLs = true; 1976 } 1977 1978 static class Trie 1979 { 1980 /** 1981 * A digital search trie for 7-bit ASCII text 1982 * The API is a subset of java.util.Hashtable 1983 * The key must be a 7-bit ASCII string 1984 * The value may be any Java Object 1985 * One can get an object stored in a trie from its key, 1986 * but the search is either case sensitive or case 1987 * insensitive to the characters in the key, and this 1988 * choice of sensitivity or insensitivity is made when 1989 * the Trie is created, before any objects are put in it. 1990 * 1991 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. 1992 * It exists to cut the serializers dependancy on that package. 1993 * 1994 * @xsl.usage internal 1995 */ 1996 1997 /** Size of the m_nextChar array. */ 1998 public static final int ALPHA_SIZE = 128; 1999 2000 /** The root node of the tree. */ 2001 final Node m_Root; 2002 2003 /** helper buffer to convert Strings to char arrays */ 2004 private char[] m_charBuffer = new char[0]; 2005 2006 /** true if the search for an object is lower case only with the key */ 2007 private final boolean m_lowerCaseOnly; 2008 2009 /** 2010 * Construct the trie that has a case insensitive search. 2011 */ 2012 public Trie() 2013 { 2014 m_Root = new Node(); 2015 m_lowerCaseOnly = false; 2016 } 2017 2018 /** 2019 * Construct the trie given the desired case sensitivity with the key. 2020 * @param lowerCaseOnly true if the search keys are to be loser case only, 2021 * not case insensitive. 2022 */ 2023 public Trie(boolean lowerCaseOnly) 2024 { 2025 m_Root = new Node(); 2026 m_lowerCaseOnly = lowerCaseOnly; 2027 } 2028 2029 /** 2030 * Put an object into the trie for lookup. 2031 * 2032 * @param key must be a 7-bit ASCII string 2033 * @param value any java object. 2034 * 2035 * @return The old object that matched key, or null. 2036 */ 2037 public Object put(String key, Object value) 2038 { 2039 2040 final int len = key.length(); 2041 if (len > m_charBuffer.length) 2042 { 2043 // make the biggest buffer ever needed in get(String) 2044 m_charBuffer = new char[len]; 2045 } 2046 2047 Node node = m_Root; 2048 2049 for (int i = 0; i < len; i++) 2050 { 2051 Node nextNode = 2052 node.m_nextChar[Character.toLowerCase(key.charAt(i))]; 2053 2054 if (nextNode != null) 2055 { 2056 node = nextNode; 2057 } 2058 else 2059 { 2060 for (; i < len; i++) 2061 { 2062 Node newNode = new Node(); 2063 if (m_lowerCaseOnly) 2064 { 2065 // put this value into the tree only with a lower case key 2066 node.m_nextChar[Character.toLowerCase( 2067 key.charAt(i))] = 2068 newNode; 2069 } 2070 else 2071 { 2072 // put this value into the tree with a case insensitive key 2073 node.m_nextChar[Character.toUpperCase( 2074 key.charAt(i))] = 2075 newNode; 2076 node.m_nextChar[Character.toLowerCase( 2077 key.charAt(i))] = 2078 newNode; 2079 } 2080 node = newNode; 2081 } 2082 break; 2083 } 2084 } 2085 2086 Object ret = node.m_Value; 2087 2088 node.m_Value = value; 2089 2090 return ret; 2091 } 2092 2093 /** 2094 * Get an object that matches the key. 2095 * 2096 * @param key must be a 7-bit ASCII string 2097 * 2098 * @return The object that matches the key, or null. 2099 */ 2100 public Object get(final String key) 2101 { 2102 2103 final int len = key.length(); 2104 2105 /* If the name is too long, we won't find it, this also keeps us 2106 * from overflowing m_charBuffer 2107 */ 2108 if (m_charBuffer.length < len) 2109 return null; 2110 2111 Node node = m_Root; 2112 switch (len) // optimize the look up based on the number of chars 2113 { 2114 // case 0 looks silly, but the generated bytecode runs 2115 // faster for lookup of elements of length 2 with this in 2116 // and a fair bit faster. Don't know why. 2117 case 0 : 2118 { 2119 return null; 2120 } 2121 2122 case 1 : 2123 { 2124 final char ch = key.charAt(0); 2125 if (ch < ALPHA_SIZE) 2126 { 2127 node = node.m_nextChar[ch]; 2128 if (node != null) 2129 return node.m_Value; 2130 } 2131 return null; 2132 } 2133 // comment out case 2 because the default is faster 2134 // case 2 : 2135 // { 2136 // final char ch0 = key.charAt(0); 2137 // final char ch1 = key.charAt(1); 2138 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) 2139 // { 2140 // node = node.m_nextChar[ch0]; 2141 // if (node != null) 2142 // { 2143 // 2144 // if (ch1 < ALPHA_SIZE) 2145 // { 2146 // node = node.m_nextChar[ch1]; 2147 // if (node != null) 2148 // return node.m_Value; 2149 // } 2150 // } 2151 // } 2152 // return null; 2153 // } 2154 default : 2155 { 2156 for (int i = 0; i < len; i++) 2157 { 2158 // A thread-safe way to loop over the characters 2159 final char ch = key.charAt(i); 2160 if (ALPHA_SIZE <= ch) 2161 { 2162 // the key is not 7-bit ASCII so we won't find it here 2163 return null; 2164 } 2165 2166 node = node.m_nextChar[ch]; 2167 if (node == null) 2168 return null; 2169 } 2170 2171 return node.m_Value; 2172 } 2173 } 2174 } 2175 2176 /** 2177 * The node representation for the trie. 2178 * @xsl.usage internal 2179 */ 2180 private class Node 2181 { 2182 2183 /** 2184 * Constructor, creates a Node[ALPHA_SIZE]. 2185 */ 2186 Node() 2187 { 2188 m_nextChar = new Node[ALPHA_SIZE]; 2189 m_Value = null; 2190 } 2191 2192 /** The next nodes. */ 2193 final Node m_nextChar[]; 2194 2195 /** The value. */ 2196 Object m_Value; 2197 } 2198 /** 2199 * Construct the trie from another Trie. 2200 * Both the existing Trie and this new one share the same table for 2201 * lookup, and it is assumed that the table is fully populated and 2202 * not changing anymore. 2203 * 2204 * @param existingTrie the Trie that this one is a copy of. 2205 */ 2206 public Trie(Trie existingTrie) 2207 { 2208 // copy some fields from the existing Trie into this one. 2209 m_Root = existingTrie.m_Root; 2210 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; 2211 2212 // get a buffer just big enough to hold the longest key in the table. 2213 int max = existingTrie.getLongestKeyLength(); 2214 m_charBuffer = new char[max]; 2215 } 2216 2217 /** 2218 * Get an object that matches the key. 2219 * This method is faster than get(), but is not thread-safe. 2220 * 2221 * @param key must be a 7-bit ASCII string 2222 * 2223 * @return The object that matches the key, or null. 2224 */ 2225 public Object get2(final String key) 2226 { 2227 2228 final int len = key.length(); 2229 2230 /* If the name is too long, we won't find it, this also keeps us 2231 * from overflowing m_charBuffer 2232 */ 2233 if (m_charBuffer.length < len) 2234 return null; 2235 2236 Node node = m_Root; 2237 switch (len) // optimize the look up based on the number of chars 2238 { 2239 // case 0 looks silly, but the generated bytecode runs 2240 // faster for lookup of elements of length 2 with this in 2241 // and a fair bit faster. Don't know why. 2242 case 0 : 2243 { 2244 return null; 2245 } 2246 2247 case 1 : 2248 { 2249 final char ch = key.charAt(0); 2250 if (ch < ALPHA_SIZE) 2251 { 2252 node = node.m_nextChar[ch]; 2253 if (node != null) 2254 return node.m_Value; 2255 } 2256 return null; 2257 } 2258 default : 2259 { 2260 /* Copy string into array. This is not thread-safe because 2261 * it modifies the contents of m_charBuffer. If multiple 2262 * threads were to use this Trie they all would be 2263 * using this same array (not good). So this 2264 * method is not thread-safe, but it is faster because 2265 * converting to a char[] and looping over elements of 2266 * the array is faster than a String's charAt(i). 2267 */ 2268 key.getChars(0, len, m_charBuffer, 0); 2269 2270 for (int i = 0; i < len; i++) 2271 { 2272 final char ch = m_charBuffer[i]; 2273 if (ALPHA_SIZE <= ch) 2274 { 2275 // the key is not 7-bit ASCII so we won't find it here 2276 return null; 2277 } 2278 2279 node = node.m_nextChar[ch]; 2280 if (node == null) 2281 return null; 2282 } 2283 2284 return node.m_Value; 2285 } 2286 } 2287 } 2288 2289 /** 2290 * Get the length of the longest key used in the table. 2291 */ 2292 public int getLongestKeyLength() 2293 { 2294 return m_charBuffer.length; 2295 } 2296 } 2297 }