< prev index next >
src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java
Print this page
*** 1,7 ****
/*
! * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
--- 1,7 ----
/*
! * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
*** 20,29 ****
--- 20,30 ----
package com.sun.org.apache.xerces.internal.impl ;
import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
+ import com.sun.org.apache.xerces.internal.impl.io.UTF16Reader;
import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager;
import com.sun.org.apache.xerces.internal.util.*;
import com.sun.org.apache.xerces.internal.util.URI;
*** 87,97 ****
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
* @author K.Venugopal SUN Microsystems
* @author Neeraj Bajaj SUN Microsystems
* @author Sunitha Reddy SUN Microsystems
! * @LastModified: Nov 2018
*/
public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
//
// Constants
--- 88,98 ----
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
* @author K.Venugopal SUN Microsystems
* @author Neeraj Bajaj SUN Microsystems
* @author Sunitha Reddy SUN Microsystems
! * @LastModified: Apr 2019
*/
public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
//
// Constants
*** 410,422 ****
private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
/** Augmentations for entities. */
private final Augmentations fEntityAugs = new AugmentationsImpl();
- /** Pool of character buffers. */
- private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE);
-
/** indicate whether Catalog should be used for resolving external resources */
private boolean fUseCatalog = true;
CatalogFeatures fCatalogFeatures;
CatalogResolver fCatalogResolver;
--- 411,420 ----
*** 692,745 ****
}
}
}
// wrap this stream in RewindableInputStream
! stream = new RewindableInputStream(stream);
// perform auto-detect of encoding if necessary
if (encoding == null) {
// read first four bytes and determine encoding
final byte[] b4 = new byte[4];
int count = 0;
for (; count<4; count++ ) {
! b4[count] = (byte)stream.read();
}
if (count == 4) {
! Object [] encodingDesc = getEncodingName(b4, count);
! encoding = (String)(encodingDesc[0]);
! isBigEndian = (Boolean)(encodingDesc[1]);
!
stream.reset();
// Special case UTF-8 files with BOM created by Microsoft
// tools. It's more efficient to consume the BOM than make
// the reader perform extra checks. -Ac
! if (count > 2 && encoding.equals("UTF-8")) {
! int b0 = b4[0] & 0xFF;
! int b1 = b4[1] & 0xFF;
! int b2 = b4[2] & 0xFF;
! if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
! // ignore first three bytes...
stream.skip(3);
}
}
! reader = createReader(stream, encoding, isBigEndian);
} else {
reader = createReader(stream, encoding, isBigEndian);
}
}
// use specified encoding
else {
encoding = encoding.toUpperCase(Locale.ENGLISH);
// If encoding is UTF-8, consume BOM if one is present.
! if (encoding.equals("UTF-8")) {
final int[] b3 = new int[3];
int count = 0;
for (; count < 3; ++count) {
! b3[count] = stream.read();
if (b3[count] == -1)
break;
}
if (count == 3) {
if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
--- 690,747 ----
}
}
}
// wrap this stream in RewindableInputStream
! RewindableInputStream rewindableStream = new RewindableInputStream(stream);
! stream = rewindableStream;
// perform auto-detect of encoding if necessary
if (encoding == null) {
// read first four bytes and determine encoding
final byte[] b4 = new byte[4];
int count = 0;
for (; count<4; count++ ) {
! b4[count] = (byte)rewindableStream.readAndBuffer();
}
if (count == 4) {
! final EncodingInfo info = getEncodingInfo(b4, count);
! encoding = info.autoDetectedEncoding;
! final String readerEncoding = info.readerEncoding;
! isBigEndian = info.isBigEndian;
stream.reset();
+ if (info.hasBOM) {
// Special case UTF-8 files with BOM created by Microsoft
// tools. It's more efficient to consume the BOM than make
// the reader perform extra checks. -Ac
! if (EncodingInfo.STR_UTF8.equals(readerEncoding)) {
! // UTF-8 BOM: 0xEF 0xBB 0xBF
stream.skip(3);
}
+ // It's also more efficient to consume the UTF-16 BOM.
+ else if (EncodingInfo.STR_UTF16.equals(readerEncoding)) {
+ // UTF-16 BE BOM: 0xFE 0xFF
+ // UTF-16 LE BOM: 0xFF 0xFE
+ stream.skip(2);
}
! }
! reader = createReader(stream, readerEncoding, isBigEndian);
} else {
reader = createReader(stream, encoding, isBigEndian);
}
}
// use specified encoding
else {
encoding = encoding.toUpperCase(Locale.ENGLISH);
// If encoding is UTF-8, consume BOM if one is present.
! if (EncodingInfo.STR_UTF8.equals(encoding)) {
final int[] b3 = new int[3];
int count = 0;
for (; count < 3; ++count) {
! b3[count] = rewindableStream.readAndBuffer();
if (b3[count] == -1)
break;
}
if (count == 3) {
if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
*** 748,807 ****
}
} else {
stream.reset();
}
}
! // If encoding is UTF-16, we still need to read the first four bytes
! // in order to discover the byte order.
! else if (encoding.equals("UTF-16")) {
final int[] b4 = new int[4];
int count = 0;
for (; count < 4; ++count) {
! b4[count] = stream.read();
if (b4[count] == -1)
break;
}
stream.reset();
-
- String utf16Encoding = "UTF-16";
if (count >= 2) {
final int b0 = b4[0];
final int b1 = b4[1];
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
- utf16Encoding = "UTF-16BE";
isBigEndian = Boolean.TRUE;
}
else if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
- utf16Encoding = "UTF-16LE";
isBigEndian = Boolean.FALSE;
}
else if (count == 4) {
final int b2 = b4[2];
final int b3 = b4[3];
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
- utf16Encoding = "UTF-16BE";
isBigEndian = Boolean.TRUE;
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
- utf16Encoding = "UTF-16LE";
isBigEndian = Boolean.FALSE;
}
}
}
- reader = createReader(stream, utf16Encoding, isBigEndian);
}
// If encoding is UCS-4, we still need to read the first four bytes
// in order to discover the byte order.
! else if (encoding.equals("ISO-10646-UCS-4")) {
final int[] b4 = new int[4];
int count = 0;
for (; count < 4; ++count) {
! b4[count] = stream.read();
if (b4[count] == -1)
break;
}
stream.reset();
--- 750,804 ----
}
} else {
stream.reset();
}
}
! // If encoding is UTF-16, we still need to read the first
! // four bytes, in order to discover the byte order.
! else if (EncodingInfo.STR_UTF16.equals(encoding)) {
final int[] b4 = new int[4];
int count = 0;
for (; count < 4; ++count) {
! b4[count] = rewindableStream.readAndBuffer();
if (b4[count] == -1)
break;
}
stream.reset();
if (count >= 2) {
final int b0 = b4[0];
final int b1 = b4[1];
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
isBigEndian = Boolean.TRUE;
+ stream.skip(2);
}
else if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
isBigEndian = Boolean.FALSE;
+ stream.skip(2);
}
else if (count == 4) {
final int b2 = b4[2];
final int b3 = b4[3];
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
isBigEndian = Boolean.TRUE;
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
isBigEndian = Boolean.FALSE;
}
}
}
}
// If encoding is UCS-4, we still need to read the first four bytes
// in order to discover the byte order.
! else if (EncodingInfo.STR_UCS4.equals(encoding)) {
final int[] b4 = new int[4];
int count = 0;
for (; count < 4; ++count) {
! b4[count] = rewindableStream.readAndBuffer();
if (b4[count] == -1)
break;
}
stream.reset();
*** 817,831 ****
}
}
}
// If encoding is UCS-2, we still need to read the first four bytes
// in order to discover the byte order.
! else if (encoding.equals("ISO-10646-UCS-2")) {
final int[] b4 = new int[4];
int count = 0;
for (; count < 4; ++count) {
! b4[count] = stream.read();
if (b4[count] == -1)
break;
}
stream.reset();
--- 814,828 ----
}
}
}
// If encoding is UCS-2, we still need to read the first four bytes
// in order to discover the byte order.
! else if (EncodingInfo.STR_UCS2.equals(encoding)) {
final int[] b4 = new int[4];
int count = 0;
for (; count < 4; ++count) {
! b4[count] = rewindableStream.readAndBuffer();
if (b4[count] == -1)
break;
}
stream.reset();
*** 1796,1806 ****
Integer bufferSize = (Integer)value;
if (bufferSize != null &&
bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) {
fBufferSize = bufferSize.intValue();
fEntityScanner.setBufferSize(fBufferSize);
- fBufferPool.setExternalBufferSize(fBufferSize);
}
}
if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() &&
propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) {
fSecurityManager = (XMLSecurityManager)value;
--- 1793,1802 ----
*** 2423,2510 ****
* Returns the IANA encoding name that is auto-detected from
* the bytes specified, with the endian-ness of that encoding where appropriate.
*
* @param b4 The first four bytes of the input.
* @param count The number of bytes actually read.
! * @return a 2-element array: the first element, an IANA-encoding string,
! * the second element a Boolean which is true iff the document is big endian, false
! * if it's little-endian, and null if the distinction isn't relevant.
*/
! protected Object[] getEncodingName(byte[] b4, int count) {
if (count < 2) {
! return defaultEncoding;
}
// UTF-16, with BOM
int b0 = b4[0] & 0xFF;
int b1 = b4[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
! return new Object [] {"UTF-16BE", true};
}
if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
! return new Object [] {"UTF-16LE", false};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 3) {
! return defaultEncoding;
}
// UTF-8 with a BOM
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
! return defaultEncoding;
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 4) {
! return defaultEncoding;
}
// other encodings
int b3 = b4[3] & 0xFF;
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
// UCS-4, big endian (1234)
! return new Object [] {"ISO-10646-UCS-4", true};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
// UCS-4, little endian (4321)
! return new Object [] {"ISO-10646-UCS-4", false};
}
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
! return new Object [] {"ISO-10646-UCS-4", null};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
! return new Object [] {"ISO-10646-UCS-4", null};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
// (or could turn out to be UCS-2...
// REVISIT: What should this be?
! return new Object [] {"UTF-16BE", true};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
// (or could turn out to be UCS-2...
! return new Object [] {"UTF-16LE", false};
}
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
! return new Object [] {"CP037", null};
}
! return defaultEncoding;
} // getEncodingName(byte[],int):Object[]
/**
* Creates a reader capable of reading the given input stream in
--- 2419,2505 ----
* Returns the IANA encoding name that is auto-detected from
* the bytes specified, with the endian-ness of that encoding where appropriate.
*
* @param b4 The first four bytes of the input.
* @param count The number of bytes actually read.
! * @return an instance of EncodingInfo which represents the auto-detected encoding.
*/
! protected EncodingInfo getEncodingInfo(byte[] b4, int count) {
if (count < 2) {
! return EncodingInfo.UTF_8;
}
// UTF-16, with BOM
int b0 = b4[0] & 0xFF;
int b1 = b4[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
! return EncodingInfo.UTF_16_BIG_ENDIAN_WITH_BOM;
}
if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
! return EncodingInfo.UTF_16_LITTLE_ENDIAN_WITH_BOM;
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 3) {
! return EncodingInfo.UTF_8;
}
// UTF-8 with a BOM
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
! return EncodingInfo.UTF_8_WITH_BOM;
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 4) {
! return EncodingInfo.UTF_8;
}
// other encodings
int b3 = b4[3] & 0xFF;
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
// UCS-4, big endian (1234)
! return EncodingInfo.UCS_4_BIG_ENDIAN;
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
// UCS-4, little endian (4321)
! return EncodingInfo.UCS_4_LITTLE_ENDIAN;
}
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
! return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER;
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
! return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER;
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
// (or could turn out to be UCS-2...
// REVISIT: What should this be?
! return EncodingInfo.UTF_16_BIG_ENDIAN;
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
// (or could turn out to be UCS-2...
! return EncodingInfo.UTF_16_LITTLE_ENDIAN;
}
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
! return EncodingInfo.EBCDIC;
}
! // default encoding
! return EncodingInfo.UTF_8;
} // getEncodingName(byte[],int):Object[]
/**
* Creates a reader capable of reading the given input stream in
*** 2515,2587 ****
* encoded using. If the user has specified that
* Java encoding names are allowed, then the
* encoding name may be a Java encoding name;
* otherwise, it is an ianaEncoding name.
* @param isBigEndian For encodings (like uCS-4), whose names cannot
! * specify a byte order, this tells whether the order is bigEndian. null menas
! * unknown or not relevant.
*
* @return Returns a reader.
*/
protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
throws IOException {
! // normalize encoding name
! if (encoding == null) {
! encoding = "UTF-8";
}
!
! // try to use an optimized reader
! String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
! if (ENCODING.equals("UTF-8")) {
! if (DEBUG_ENCODINGS) {
! System.out.println("$$$ creating UTF8Reader");
! }
! return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
! }
! if (ENCODING.equals("US-ASCII")) {
! if (DEBUG_ENCODINGS) {
! System.out.println("$$$ creating ASCIIReader");
! }
! return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
! }
! if(ENCODING.equals("ISO-10646-UCS-4")) {
if(isBigEndian != null) {
! boolean isBE = isBigEndian.booleanValue();
! if(isBE) {
return new UCSReader(inputStream, UCSReader.UCS4BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS4LE);
}
} else {
! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
"EncodingByteOrderUnsupported",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
! }
! if(ENCODING.equals("ISO-10646-UCS-2")) {
! if(isBigEndian != null) { // sould never happen with this encoding...
! boolean isBE = isBigEndian.booleanValue();
! if(isBE) {
return new UCSReader(inputStream, UCSReader.UCS2BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS2LE);
}
} else {
! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
"EncodingByteOrderUnsupported",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
}
// check for valid name
boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
boolean validJava = XMLChar.isValidJavaEncoding(encoding);
if (!validIANA || (fAllowJavaEncodings && !validJava)) {
! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
"EncodingDeclInvalid",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1
// because every byte is a valid ISO Latin 1 character.
--- 2510,2581 ----
* encoded using. If the user has specified that
* Java encoding names are allowed, then the
* encoding name may be a Java encoding name;
* otherwise, it is an ianaEncoding name.
* @param isBigEndian For encodings (like uCS-4), whose names cannot
! * specify a byte order, this tells whether the order
! * is bigEndian. null if unknown or irrelevant.
*
* @return Returns a reader.
*/
protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
throws IOException {
! String enc = (encoding != null) ? encoding : EncodingInfo.STR_UTF8;
! enc = enc.toUpperCase(Locale.ENGLISH);
! MessageFormatter f = fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN);
! Locale l = fErrorReporter.getLocale();
! switch (enc) {
! case EncodingInfo.STR_UTF8:
! return new UTF8Reader(inputStream, fBufferSize, f, l);
! case EncodingInfo.STR_UTF16:
! if (isBigEndian != null) {
! return new UTF16Reader(inputStream, fBufferSize, isBigEndian, f, l);
}
! break;
! case EncodingInfo.STR_UTF16BE:
! return new UTF16Reader(inputStream, fBufferSize, true, f, l);
! case EncodingInfo.STR_UTF16LE:
! return new UTF16Reader(inputStream, fBufferSize, false, f, l);
! case EncodingInfo.STR_UCS4:
if(isBigEndian != null) {
! if(isBigEndian) {
return new UCSReader(inputStream, UCSReader.UCS4BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS4LE);
}
} else {
! fErrorReporter.reportError(this.getEntityScanner(),
! XMLMessageFormatter.XML_DOMAIN,
"EncodingByteOrderUnsupported",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
! break;
! case EncodingInfo.STR_UCS2:
! if(isBigEndian != null) {
! if(isBigEndian) {
return new UCSReader(inputStream, UCSReader.UCS2BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS2LE);
}
} else {
! fErrorReporter.reportError(this.getEntityScanner(),
! XMLMessageFormatter.XML_DOMAIN,
"EncodingByteOrderUnsupported",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
+ break;
}
// check for valid name
boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
boolean validJava = XMLChar.isValidJavaEncoding(encoding);
if (!validIANA || (fAllowJavaEncodings && !validJava)) {
! fErrorReporter.reportError(this.getEntityScanner(),
! XMLMessageFormatter.XML_DOMAIN,
"EncodingDeclInvalid",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1
// because every byte is a valid ISO Latin 1 character.
*** 2593,2608 ****
// on. -Ac
encoding = "ISO-8859-1";
}
// try to use a Java reader
! String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
if (javaEncoding == null) {
! if(fAllowJavaEncodings) {
javaEncoding = encoding;
} else {
! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
"EncodingDeclInvalid",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
// see comment above.
javaEncoding = "ISO8859_1";
--- 2587,2603 ----
// on. -Ac
encoding = "ISO-8859-1";
}
// try to use a Java reader
! String javaEncoding = EncodingMap.getIANA2JavaMapping(enc);
if (javaEncoding == null) {
! if (fAllowJavaEncodings) {
javaEncoding = encoding;
} else {
! fErrorReporter.reportError(this.getEntityScanner(),
! XMLMessageFormatter.XML_DOMAIN,
"EncodingDeclInvalid",
new Object[] { encoding },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
// see comment above.
javaEncoding = "ISO8859_1";
*** 2896,3007 ****
}
}
} // print()
/**
! * Buffer used in entity manager to reuse character arrays instead
! * of creating new ones every time.
! *
! * @xerces.internal
! *
! * @author Ankit Pasricha, IBM
! */
! private static class CharacterBuffer {
!
! /** character buffer */
! private char[] ch;
!
! /** whether the buffer is for an external or internal scanned entity */
! private boolean isExternal;
!
! public CharacterBuffer(boolean isExternal, int size) {
! this.isExternal = isExternal;
! ch = new char[size];
! }
! }
!
!
! /**
! * Stores a number of character buffers and provides it to the entity
! * manager to use when an entity is seen.
*
* @xerces.internal
*
! * @author Ankit Pasricha, IBM
*/
! private static class CharacterBufferPool {
!
! private static final int DEFAULT_POOL_SIZE = 3;
!
! private CharacterBuffer[] fInternalBufferPool;
! private CharacterBuffer[] fExternalBufferPool;
!
! private int fExternalBufferSize;
! private int fInternalBufferSize;
! private int poolSize;
!
! private int fInternalTop;
! private int fExternalTop;
!
! public CharacterBufferPool(int externalBufferSize, int internalBufferSize) {
! this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize);
! }
! public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) {
! fExternalBufferSize = externalBufferSize;
! fInternalBufferSize = internalBufferSize;
! this.poolSize = poolSize;
! init();
! }
!
! /** Initializes buffer pool. **/
! private void init() {
! fInternalBufferPool = new CharacterBuffer[poolSize];
! fExternalBufferPool = new CharacterBuffer[poolSize];
! fInternalTop = -1;
! fExternalTop = -1;
! }
!
! /** Retrieves buffer from pool. **/
! public CharacterBuffer getBuffer(boolean external) {
! if (external) {
! if (fExternalTop > -1) {
! return fExternalBufferPool[fExternalTop--];
! }
! else {
! return new CharacterBuffer(true, fExternalBufferSize);
! }
! }
! else {
! if (fInternalTop > -1) {
! return fInternalBufferPool[fInternalTop--];
! }
! else {
! return new CharacterBuffer(false, fInternalBufferSize);
! }
! }
! }
!
! /** Returns buffer to pool. **/
! public void returnToPool(CharacterBuffer buffer) {
! if (buffer.isExternal) {
! if (fExternalTop < fExternalBufferPool.length - 1) {
! fExternalBufferPool[++fExternalTop] = buffer;
! }
! }
! else if (fInternalTop < fInternalBufferPool.length - 1) {
! fInternalBufferPool[++fInternalTop] = buffer;
! }
! }
!
! /** Sets the size of external buffers and dumps the old pool. **/
! public void setExternalBufferSize(int bufferSize) {
! fExternalBufferSize = bufferSize;
! fExternalBufferPool = new CharacterBuffer[poolSize];
! fExternalTop = -1;
! }
! }
/**
* This class wraps the byte inputstreams we're presented with.
* We need it because java.io.InputStreams don't provide
* functionality to reread processed bytes, and they have a habit
--- 2891,2972 ----
}
}
} // print()
/**
! * Information about auto-detectable encodings.
*
* @xerces.internal
*
! * @author Michael Glavassevich, IBM
*/
! private static class EncodingInfo {
! public static final String STR_UTF8 = "UTF-8";
! public static final String STR_UTF16 = "UTF-16";
! public static final String STR_UTF16BE = "UTF-16BE";
! public static final String STR_UTF16LE = "UTF-16LE";
! public static final String STR_UCS4 = "ISO-10646-UCS-4";
! public static final String STR_UCS2 = "ISO-10646-UCS-2";
! public static final String STR_CP037 = "CP037";
!
! /** UTF-8 **/
! public static final EncodingInfo UTF_8 =
! new EncodingInfo(STR_UTF8, null, false);
!
! /** UTF-8, with BOM **/
! public static final EncodingInfo UTF_8_WITH_BOM =
! new EncodingInfo(STR_UTF8, null, true);
!
! /** UTF-16, big-endian **/
! public static final EncodingInfo UTF_16_BIG_ENDIAN =
! new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, false);
!
! /** UTF-16, big-endian with BOM **/
! public static final EncodingInfo UTF_16_BIG_ENDIAN_WITH_BOM =
! new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, true);
!
! /** UTF-16, little-endian **/
! public static final EncodingInfo UTF_16_LITTLE_ENDIAN =
! new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, false);
!
! /** UTF-16, little-endian with BOM **/
! public static final EncodingInfo UTF_16_LITTLE_ENDIAN_WITH_BOM =
! new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, true);
!
! /** UCS-4, big-endian **/
! public static final EncodingInfo UCS_4_BIG_ENDIAN =
! new EncodingInfo(STR_UCS4, Boolean.TRUE, false);
!
! /** UCS-4, little-endian **/
! public static final EncodingInfo UCS_4_LITTLE_ENDIAN =
! new EncodingInfo(STR_UCS4, Boolean.FALSE, false);
!
! /** UCS-4, unusual byte-order (2143) or (3412) **/
! public static final EncodingInfo UCS_4_UNUSUAL_BYTE_ORDER =
! new EncodingInfo(STR_UCS4, null, false);
!
! /** EBCDIC **/
! public static final EncodingInfo EBCDIC = new EncodingInfo(STR_CP037, null, false);
!
! public final String autoDetectedEncoding;
! public final String readerEncoding;
! public final Boolean isBigEndian;
! public final boolean hasBOM;
!
! private EncodingInfo(String autoDetectedEncoding, Boolean isBigEndian, boolean hasBOM) {
! this(autoDetectedEncoding, autoDetectedEncoding, isBigEndian, hasBOM);
! } // <init>(String,Boolean,boolean)
!
! private EncodingInfo(String autoDetectedEncoding, String readerEncoding,
! Boolean isBigEndian, boolean hasBOM) {
! this.autoDetectedEncoding = autoDetectedEncoding;
! this.readerEncoding = readerEncoding;
! this.isBigEndian = isBigEndian;
! this.hasBOM = hasBOM;
! } // <init>(String,String,Boolean,boolean)
! } // class EncodingInfo
/**
* This class wraps the byte inputstreams we're presented with.
* We need it because java.io.InputStreams don't provide
* functionality to reread processed bytes, and they have a habit
*** 3050,3112 ****
public void rewind() {
fOffset = fStartOffset;
}
! public int read() throws IOException {
! int b = 0;
! if (fOffset < fLength) {
! return fData[fOffset++] & 0xff;
! }
! if (fOffset == fEndOffset) {
! return -1;
! }
if (fOffset == fData.length) {
byte[] newData = new byte[fOffset << 1];
System.arraycopy(fData, 0, newData, 0, fOffset);
fData = newData;
}
! b = fInputStream.read();
if (b == -1) {
fEndOffset = fOffset;
return -1;
}
fData[fLength++] = (byte)b;
fOffset++;
return b & 0xff;
}
public int read(byte[] b, int off, int len) throws IOException {
! int bytesLeft = fLength - fOffset;
if (bytesLeft == 0) {
if (fOffset == fEndOffset) {
return -1;
}
! /**
! * //System.out.println("fCurrentEntitty = " + fCurrentEntity );
! * //System.out.println("fInputStream = " + fInputStream );
! * // better get some more for the voracious reader... */
!
if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) {
if (!fCurrentEntity.xmlDeclChunkRead)
{
fCurrentEntity.xmlDeclChunkRead = true;
len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE;
}
return fInputStream.read(b, off, len);
}
!
! int returnedVal = read();
! if(returnedVal == -1) {
fEndOffset = fOffset;
return -1;
}
b[off] = (byte)returnedVal;
return 1;
-
}
if (len < bytesLeft) {
if (len <= 0) {
return 0;
}
--- 3015,3077 ----
public void rewind() {
fOffset = fStartOffset;
}
! public int readAndBuffer() throws IOException {
if (fOffset == fData.length) {
byte[] newData = new byte[fOffset << 1];
System.arraycopy(fData, 0, newData, 0, fOffset);
fData = newData;
}
! final int b = fInputStream.read();
if (b == -1) {
fEndOffset = fOffset;
return -1;
}
fData[fLength++] = (byte)b;
fOffset++;
return b & 0xff;
}
+ public int read() throws IOException {
+ if (fOffset < fLength) {
+ return fData[fOffset++] & 0xff;
+ }
+ if (fOffset == fEndOffset) {
+ return -1;
+ }
+ if (fCurrentEntity.mayReadChunks) {
+ return fInputStream.read();
+ }
+ return readAndBuffer();
+ }
+
public int read(byte[] b, int off, int len) throws IOException {
! final int bytesLeft = fLength - fOffset;
if (bytesLeft == 0) {
if (fOffset == fEndOffset) {
return -1;
}
! // read a block of data as requested
if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) {
if (!fCurrentEntity.xmlDeclChunkRead)
{
fCurrentEntity.xmlDeclChunkRead = true;
len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE;
}
return fInputStream.read(b, off, len);
}
! int returnedVal = readAndBuffer();
! if (returnedVal == -1) {
fEndOffset = fOffset;
return -1;
}
b[off] = (byte)returnedVal;
return 1;
}
if (len < bytesLeft) {
if (len <= 0) {
return 0;
}
*** 3118,3129 ****
}
fOffset += len;
return len;
}
! public long skip(long n)
! throws IOException {
int bytesLeft;
if (n <= 0) {
return 0;
}
bytesLeft = fLength - fOffset;
--- 3083,3093 ----
}
fOffset += len;
return len;
}
! public long skip(long n) throws IOException {
int bytesLeft;
if (n <= 0) {
return 0;
}
bytesLeft = fLength - fOffset;
*** 3152,3162 ****
*/
return fInputStream.skip(n) + bytesLeft;
}
public int available() throws IOException {
! int bytesLeft = fLength - fOffset;
if (bytesLeft == 0) {
if (fOffset == fEndOffset) {
return -1;
}
return fCurrentEntity.mayReadChunks ? fInputStream.available()
--- 3116,3126 ----
*/
return fInputStream.skip(n) + bytesLeft;
}
public int available() throws IOException {
! final int bytesLeft = fLength - fOffset;
if (bytesLeft == 0) {
if (fOffset == fEndOffset) {
return -1;
}
return fCurrentEntity.mayReadChunks ? fInputStream.available()
*** 3169,3179 ****
fMark = fOffset;
}
public void reset() {
fOffset = fMark;
- //test();
}
public boolean markSupported() {
return true;
}
--- 3133,3142 ----
< prev index next >