1 /*
2 * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
3 */
4 /*
5 * Licensed to the Apache Software Foundation (ASF) under one or more
6 * contributor license agreements. See the NOTICE file distributed with
7 * this work for additional information regarding copyright ownership.
8 * The ASF licenses this file to You under the Apache License, Version 2.0
9 * (the "License"); you may not use this file except in compliance with
10 * the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 package com.sun.org.apache.xml.internal.serializer;
22
23 import java.io.IOException;
24 import java.util.Properties;
25
26 import javax.xml.transform.Result;
27
28 import org.xml.sax.Attributes;
29 import org.xml.sax.SAXException;
30
31 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
32 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
33
34 /**
35 * This serializer takes a series of SAX or
36 * SAX-like events and writes its output
37 * to the given stream.
38 *
39 * This class is not a public API, it is public
40 * because it is used from another package.
41 *
42 * @xsl.usage internal
43 */
44 public final class ToHTMLStream extends ToStream
45 {
46
47 /** This flag is set while receiving events from the DTD */
48 protected boolean m_inDTD = false;
49
50 /** True if the previous element is a block element. */
51 private boolean m_isprevblock = false;
52
53 /**
54 * Map that tells which XML characters should have special treatment, and it
55 * provides character to entity name lookup.
56 */
57 private static final CharInfo m_htmlcharInfo =
58 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
59 CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
60
61 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
62 static final Trie m_elementFlags = new Trie();
1032 {
1033 throw new SAXException(e);
1034 }
1035 }
1036
1037 /**
1038 * Process an attribute.
1039 * @param writer The writer to write the processed output to.
1040 * @param name The name of the attribute.
1041 * @param value The value of the attribute.
1042 * @param elemDesc The description of the HTML element
1043 * that has this attribute.
1044 *
1045 * @throws org.xml.sax.SAXException
1046 */
1047 protected void processAttribute(
1048 java.io.Writer writer,
1049 String name,
1050 String value,
1051 ElemDesc elemDesc)
1052 throws IOException
1053 {
1054 writer.write(' ');
1055
1056 if ( ((value.length() == 0) || value.equalsIgnoreCase(name))
1057 && elemDesc != null
1058 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1059 {
1060 writer.write(name);
1061 }
1062 else
1063 {
1064 // %REVIEW% %OPT%
1065 // Two calls to single-char write may NOT
1066 // be more efficient than one to string-write...
1067 writer.write(name);
1068 writer.write("=\"");
1069 if ( elemDesc != null
1070 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1071 writeAttrURI(writer, value, m_specialEscapeURLs);
1072 else
1356 }
1357 else if (cleanLength == 1)
1358 {
1359 // a little optimization for 1 clean character
1360 // (we could have let the previous if(...) handle them all)
1361 writer.write(ch);
1362 }
1363 }
1364
1365 /**
1366 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1367 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>.
1368 *
1369 * @param string String to convert to XML format.
1370 * @param encoding CURRENTLY NOT IMPLEMENTED.
1371 *
1372 * @throws org.xml.sax.SAXException
1373 */
1374 public void writeAttrString(
1375 final java.io.Writer writer, String string, String encoding)
1376 throws IOException
1377 {
1378 final int end = string.length();
1379 if (end > m_attrBuff.length)
1380 {
1381 m_attrBuff = new char[end * 2 + 1];
1382 }
1383 string.getChars(0, end, m_attrBuff, 0);
1384 final char[] chars = m_attrBuff;
1385
1386
1387
1388 int cleanStart = 0;
1389 int cleanLength = 0;
1390
1391 char ch = 0;
1392 for (int i = 0; i < end; i++)
1393 {
1394 ch = chars[i];
1395
1396 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1408 else if (
1409 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1410 {
1411 cleanLength++; // no escaping in this case, as specified in 15.2
1412 }
1413 else
1414 {
1415 if (cleanLength > 0)
1416 {
1417 writer.write(chars,cleanStart,cleanLength);
1418 cleanLength = 0;
1419 }
1420 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1421
1422 if (i != pos)
1423 {
1424 i = pos - 1;
1425 }
1426 else
1427 {
1428 if (Encodings.isHighUTF16Surrogate(ch))
1429 {
1430
1431 writeUTF16Surrogate(ch, chars, i, end);
1432 i++; // two input characters processed
1433 // this increments by one and the for()
1434 // loop itself increments by another one.
1435 }
1436
1437 // The next is kind of a hack to keep from escaping in the case
1438 // of Shift_JIS and the like.
1439
1440 /*
1441 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1442 && (ch != 160))
1443 {
1444 writer.write(ch); // no escaping in this case
1445 }
1446 else
1447 */
1448 String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1449 if (null != outputStringForChar)
1450 {
1451 writer.write(outputStringForChar);
1452 }
1453 else if (escapingNotNeeded(ch))
1454 {
|
1 /*
2 * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
3 */
4 /*
5 * Licensed to the Apache Software Foundation (ASF) under one or more
6 * contributor license agreements. See the NOTICE file distributed with
7 * this work for additional information regarding copyright ownership.
8 * The ASF licenses this file to You under the Apache License, Version 2.0
9 * (the "License"); you may not use this file except in compliance with
10 * the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 package com.sun.org.apache.xml.internal.serializer;
22
23 import java.io.IOException;
24 import java.util.Properties;
25
26 import javax.xml.transform.Result;
27
28 import org.xml.sax.Attributes;
29 import org.xml.sax.SAXException;
30
31 import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
32 import com.sun.org.apache.xml.internal.serializer.utils.Utils;
33
34 /**
35 * This serializer takes a series of SAX or
36 * SAX-like events and writes its output
37 * to the given stream.
38 *
39 * This class is not a public API, it is public
40 * because it is used from another package.
41 *
42 * @xsl.usage internal
43 * @LastModified: Sept 2018
44 */
45 public final class ToHTMLStream extends ToStream
46 {
47
48 /** This flag is set while receiving events from the DTD */
49 protected boolean m_inDTD = false;
50
51 /** True if the previous element is a block element. */
52 private boolean m_isprevblock = false;
53
54 /**
55 * Map that tells which XML characters should have special treatment, and it
56 * provides character to entity name lookup.
57 */
58 private static final CharInfo m_htmlcharInfo =
59 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
60 CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
61
62 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
63 static final Trie m_elementFlags = new Trie();
1033 {
1034 throw new SAXException(e);
1035 }
1036 }
1037
1038 /**
1039 * Process an attribute.
1040 * @param writer The writer to write the processed output to.
1041 * @param name The name of the attribute.
1042 * @param value The value of the attribute.
1043 * @param elemDesc The description of the HTML element
1044 * that has this attribute.
1045 *
1046 * @throws org.xml.sax.SAXException
1047 */
1048 protected void processAttribute(
1049 java.io.Writer writer,
1050 String name,
1051 String value,
1052 ElemDesc elemDesc)
1053 throws IOException, SAXException
1054 {
1055 writer.write(' ');
1056
1057 if ( ((value.length() == 0) || value.equalsIgnoreCase(name))
1058 && elemDesc != null
1059 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1060 {
1061 writer.write(name);
1062 }
1063 else
1064 {
1065 // %REVIEW% %OPT%
1066 // Two calls to single-char write may NOT
1067 // be more efficient than one to string-write...
1068 writer.write(name);
1069 writer.write("=\"");
1070 if ( elemDesc != null
1071 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1072 writeAttrURI(writer, value, m_specialEscapeURLs);
1073 else
1357 }
1358 else if (cleanLength == 1)
1359 {
1360 // a little optimization for 1 clean character
1361 // (we could have let the previous if(...) handle them all)
1362 writer.write(ch);
1363 }
1364 }
1365
1366 /**
1367 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1368 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>.
1369 *
1370 * @param string String to convert to XML format.
1371 * @param encoding CURRENTLY NOT IMPLEMENTED.
1372 *
1373 * @throws org.xml.sax.SAXException
1374 */
1375 public void writeAttrString(
1376 final java.io.Writer writer, String string, String encoding)
1377 throws IOException, SAXException
1378 {
1379 final int end = string.length();
1380 if (end > m_attrBuff.length)
1381 {
1382 m_attrBuff = new char[end * 2 + 1];
1383 }
1384 string.getChars(0, end, m_attrBuff, 0);
1385 final char[] chars = m_attrBuff;
1386
1387
1388
1389 int cleanStart = 0;
1390 int cleanLength = 0;
1391
1392 char ch = 0;
1393 for (int i = 0; i < end; i++)
1394 {
1395 ch = chars[i];
1396
1397 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1409 else if (
1410 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1411 {
1412 cleanLength++; // no escaping in this case, as specified in 15.2
1413 }
1414 else
1415 {
1416 if (cleanLength > 0)
1417 {
1418 writer.write(chars,cleanStart,cleanLength);
1419 cleanLength = 0;
1420 }
1421 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1422
1423 if (i != pos)
1424 {
1425 i = pos - 1;
1426 }
1427 else
1428 {
1429 if (Encodings.isHighUTF16Surrogate(ch) ||
1430 Encodings.isLowUTF16Surrogate(ch))
1431 {
1432 if (writeUTF16Surrogate(ch, chars, i, end) >= 0) {
1433 // move the index if the low surrogate is consumed
1434 // as writeUTF16Surrogate has written the pair
1435 if (Encodings.isHighUTF16Surrogate(ch)) {
1436 i++;
1437 }
1438 }
1439 }
1440
1441 // The next is kind of a hack to keep from escaping in the case
1442 // of Shift_JIS and the like.
1443
1444 /*
1445 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1446 && (ch != 160))
1447 {
1448 writer.write(ch); // no escaping in this case
1449 }
1450 else
1451 */
1452 String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1453 if (null != outputStringForChar)
1454 {
1455 writer.write(outputStringForChar);
1456 }
1457 else if (escapingNotNeeded(ch))
1458 {
|