< prev index next >

src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java

Print this page

        

*** 1,7 **** /* ! * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. */ /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. --- 1,7 ---- /* ! * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. */ /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership.
*** 20,29 **** --- 20,30 ---- package com.sun.org.apache.xerces.internal.impl ; import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; import com.sun.org.apache.xerces.internal.impl.io.UCSReader; + import com.sun.org.apache.xerces.internal.impl.io.UTF16Reader; import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; import com.sun.org.apache.xerces.internal.util.*; import com.sun.org.apache.xerces.internal.util.URI;
*** 87,97 **** * @author Andy Clark, IBM * @author Arnaud Le Hors, IBM * @author K.Venugopal SUN Microsystems * @author Neeraj Bajaj SUN Microsystems * @author Sunitha Reddy SUN Microsystems ! * @LastModified: Nov 2018 */ public class XMLEntityManager implements XMLComponent, XMLEntityResolver { // // Constants --- 88,98 ---- * @author Andy Clark, IBM * @author Arnaud Le Hors, IBM * @author K.Venugopal SUN Microsystems * @author Neeraj Bajaj SUN Microsystems * @author Sunitha Reddy SUN Microsystems ! * @LastModified: Apr 2019 */ public class XMLEntityManager implements XMLComponent, XMLEntityResolver { // // Constants
*** 410,422 **** private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); /** Augmentations for entities. */ private final Augmentations fEntityAugs = new AugmentationsImpl(); - /** Pool of character buffers. */ - private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); - /** indicate whether Catalog should be used for resolving external resources */ private boolean fUseCatalog = true; CatalogFeatures fCatalogFeatures; CatalogResolver fCatalogResolver; --- 411,420 ----
*** 692,745 **** } } } // wrap this stream in RewindableInputStream ! stream = new RewindableInputStream(stream); // perform auto-detect of encoding if necessary if (encoding == null) { // read first four bytes and determine encoding final byte[] b4 = new byte[4]; int count = 0; for (; count<4; count++ ) { ! b4[count] = (byte)stream.read(); } if (count == 4) { ! Object [] encodingDesc = getEncodingName(b4, count); ! encoding = (String)(encodingDesc[0]); ! isBigEndian = (Boolean)(encodingDesc[1]); ! stream.reset(); // Special case UTF-8 files with BOM created by Microsoft // tools. It's more efficient to consume the BOM than make // the reader perform extra checks. -Ac ! if (count > 2 && encoding.equals("UTF-8")) { ! int b0 = b4[0] & 0xFF; ! int b1 = b4[1] & 0xFF; ! int b2 = b4[2] & 0xFF; ! if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { ! // ignore first three bytes... stream.skip(3); } } ! reader = createReader(stream, encoding, isBigEndian); } else { reader = createReader(stream, encoding, isBigEndian); } } // use specified encoding else { encoding = encoding.toUpperCase(Locale.ENGLISH); // If encoding is UTF-8, consume BOM if one is present. ! if (encoding.equals("UTF-8")) { final int[] b3 = new int[3]; int count = 0; for (; count < 3; ++count) { ! b3[count] = stream.read(); if (b3[count] == -1) break; } if (count == 3) { if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { --- 690,747 ---- } } } // wrap this stream in RewindableInputStream ! RewindableInputStream rewindableStream = new RewindableInputStream(stream); ! stream = rewindableStream; // perform auto-detect of encoding if necessary if (encoding == null) { // read first four bytes and determine encoding final byte[] b4 = new byte[4]; int count = 0; for (; count<4; count++ ) { ! b4[count] = (byte)rewindableStream.readAndBuffer(); } if (count == 4) { ! final EncodingInfo info = getEncodingInfo(b4, count); ! encoding = info.autoDetectedEncoding; ! final String readerEncoding = info.readerEncoding; ! isBigEndian = info.isBigEndian; stream.reset(); + if (info.hasBOM) { // Special case UTF-8 files with BOM created by Microsoft // tools. It's more efficient to consume the BOM than make // the reader perform extra checks. -Ac ! if (EncodingInfo.STR_UTF8.equals(readerEncoding)) { ! // UTF-8 BOM: 0xEF 0xBB 0xBF stream.skip(3); } + // It's also more efficient to consume the UTF-16 BOM. + else if (EncodingInfo.STR_UTF16.equals(readerEncoding)) { + // UTF-16 BE BOM: 0xFE 0xFF + // UTF-16 LE BOM: 0xFF 0xFE + stream.skip(2); } ! } ! reader = createReader(stream, readerEncoding, isBigEndian); } else { reader = createReader(stream, encoding, isBigEndian); } } // use specified encoding else { encoding = encoding.toUpperCase(Locale.ENGLISH); // If encoding is UTF-8, consume BOM if one is present. ! if (EncodingInfo.STR_UTF8.equals(encoding)) { final int[] b3 = new int[3]; int count = 0; for (; count < 3; ++count) { ! b3[count] = rewindableStream.readAndBuffer(); if (b3[count] == -1) break; } if (count == 3) { if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
*** 748,807 **** } } else { stream.reset(); } } ! // If encoding is UTF-16, we still need to read the first four bytes ! // in order to discover the byte order. ! else if (encoding.equals("UTF-16")) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = stream.read(); if (b4[count] == -1) break; } stream.reset(); - - String utf16Encoding = "UTF-16"; if (count >= 2) { final int b0 = b4[0]; final int b1 = b4[1]; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian - utf16Encoding = "UTF-16BE"; isBigEndian = Boolean.TRUE; } else if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian - utf16Encoding = "UTF-16LE"; isBigEndian = Boolean.FALSE; } else if (count == 4) { final int b2 = b4[2]; final int b3 = b4[3]; if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM - utf16Encoding = "UTF-16BE"; isBigEndian = Boolean.TRUE; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM - utf16Encoding = "UTF-16LE"; isBigEndian = Boolean.FALSE; } } } - reader = createReader(stream, utf16Encoding, isBigEndian); } // If encoding is UCS-4, we still need to read the first four bytes // in order to discover the byte order. ! else if (encoding.equals("ISO-10646-UCS-4")) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = stream.read(); if (b4[count] == -1) break; } stream.reset(); --- 750,804 ---- } } else { stream.reset(); } } ! // If encoding is UTF-16, we still need to read the first ! // four bytes, in order to discover the byte order. ! else if (EncodingInfo.STR_UTF16.equals(encoding)) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = rewindableStream.readAndBuffer(); if (b4[count] == -1) break; } stream.reset(); if (count >= 2) { final int b0 = b4[0]; final int b1 = b4[1]; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian isBigEndian = Boolean.TRUE; + stream.skip(2); } else if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian isBigEndian = Boolean.FALSE; + stream.skip(2); } else if (count == 4) { final int b2 = b4[2]; final int b3 = b4[3]; if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM isBigEndian = Boolean.TRUE; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM isBigEndian = Boolean.FALSE; } } } } // If encoding is UCS-4, we still need to read the first four bytes // in order to discover the byte order. ! else if (EncodingInfo.STR_UCS4.equals(encoding)) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = rewindableStream.readAndBuffer(); if (b4[count] == -1) break; } stream.reset();
*** 817,831 **** } } } // If encoding is UCS-2, we still need to read the first four bytes // in order to discover the byte order. ! else if (encoding.equals("ISO-10646-UCS-2")) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = stream.read(); if (b4[count] == -1) break; } stream.reset(); --- 814,828 ---- } } } // If encoding is UCS-2, we still need to read the first four bytes // in order to discover the byte order. ! else if (EncodingInfo.STR_UCS2.equals(encoding)) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = rewindableStream.readAndBuffer(); if (b4[count] == -1) break; } stream.reset();
*** 1796,1806 **** Integer bufferSize = (Integer)value; if (bufferSize != null && bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { fBufferSize = bufferSize.intValue(); fEntityScanner.setBufferSize(fBufferSize); - fBufferPool.setExternalBufferSize(fBufferSize); } } if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { fSecurityManager = (XMLSecurityManager)value; --- 1793,1802 ----
*** 2423,2510 **** * Returns the IANA encoding name that is auto-detected from * the bytes specified, with the endian-ness of that encoding where appropriate. * * @param b4 The first four bytes of the input. * @param count The number of bytes actually read. ! * @return a 2-element array: the first element, an IANA-encoding string, ! * the second element a Boolean which is true iff the document is big endian, false ! * if it's little-endian, and null if the distinction isn't relevant. */ ! protected Object[] getEncodingName(byte[] b4, int count) { if (count < 2) { ! return defaultEncoding; } // UTF-16, with BOM int b0 = b4[0] & 0xFF; int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian ! return new Object [] {"UTF-16BE", true}; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian ! return new Object [] {"UTF-16LE", false}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { ! return defaultEncoding; } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { ! return defaultEncoding; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { ! return defaultEncoding; } // other encodings int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) ! return new Object [] {"ISO-10646-UCS-4", true}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) ! return new Object [] {"ISO-10646-UCS-4", false}; } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? ! return new Object [] {"ISO-10646-UCS-4", null}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) // REVISIT: What should this be? ! return new Object [] {"ISO-10646-UCS-4", null}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? ! return new Object [] {"UTF-16BE", true}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... ! return new Object [] {"UTF-16LE", false}; } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here ! return new Object [] {"CP037", null}; } ! return defaultEncoding; } // getEncodingName(byte[],int):Object[] /** * Creates a reader capable of reading the given input stream in --- 2419,2505 ---- * Returns the IANA encoding name that is auto-detected from * the bytes specified, with the endian-ness of that encoding where appropriate. * * @param b4 The first four bytes of the input. * @param count The number of bytes actually read. ! * @return an instance of EncodingInfo which represents the auto-detected encoding. */ ! protected EncodingInfo getEncodingInfo(byte[] b4, int count) { if (count < 2) { ! return EncodingInfo.UTF_8; } // UTF-16, with BOM int b0 = b4[0] & 0xFF; int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian ! return EncodingInfo.UTF_16_BIG_ENDIAN_WITH_BOM; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian ! return EncodingInfo.UTF_16_LITTLE_ENDIAN_WITH_BOM; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { ! return EncodingInfo.UTF_8; } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { ! return EncodingInfo.UTF_8_WITH_BOM; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { ! return EncodingInfo.UTF_8; } // other encodings int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) ! return EncodingInfo.UCS_4_BIG_ENDIAN; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) ! return EncodingInfo.UCS_4_LITTLE_ENDIAN; } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? ! return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) // REVISIT: What should this be? ! return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? ! return EncodingInfo.UTF_16_BIG_ENDIAN; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... ! return EncodingInfo.UTF_16_LITTLE_ENDIAN; } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here ! return EncodingInfo.EBCDIC; } ! // default encoding ! return EncodingInfo.UTF_8; } // getEncodingName(byte[],int):Object[] /** * Creates a reader capable of reading the given input stream in
*** 2515,2587 **** * encoded using. If the user has specified that * Java encoding names are allowed, then the * encoding name may be a Java encoding name; * otherwise, it is an ianaEncoding name. * @param isBigEndian For encodings (like uCS-4), whose names cannot ! * specify a byte order, this tells whether the order is bigEndian. null menas ! * unknown or not relevant. * * @return Returns a reader. */ protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) throws IOException { ! // normalize encoding name ! if (encoding == null) { ! encoding = "UTF-8"; } ! ! // try to use an optimized reader ! String ENCODING = encoding.toUpperCase(Locale.ENGLISH); ! if (ENCODING.equals("UTF-8")) { ! if (DEBUG_ENCODINGS) { ! System.out.println("$$$ creating UTF8Reader"); ! } ! return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); ! } ! if (ENCODING.equals("US-ASCII")) { ! if (DEBUG_ENCODINGS) { ! System.out.println("$$$ creating ASCIIReader"); ! } ! return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); ! } ! if(ENCODING.equals("ISO-10646-UCS-4")) { if(isBigEndian != null) { ! boolean isBE = isBigEndian.booleanValue(); ! if(isBE) { return new UCSReader(inputStream, UCSReader.UCS4BE); } else { return new UCSReader(inputStream, UCSReader.UCS4LE); } } else { ! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, "EncodingByteOrderUnsupported", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } ! } ! if(ENCODING.equals("ISO-10646-UCS-2")) { ! if(isBigEndian != null) { // sould never happen with this encoding... ! boolean isBE = isBigEndian.booleanValue(); ! if(isBE) { return new UCSReader(inputStream, UCSReader.UCS2BE); } else { return new UCSReader(inputStream, UCSReader.UCS2LE); } } else { ! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, "EncodingByteOrderUnsupported", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } // check for valid name boolean validIANA = XMLChar.isValidIANAEncoding(encoding); boolean validJava = XMLChar.isValidJavaEncoding(encoding); if (!validIANA || (fAllowJavaEncodings && !validJava)) { ! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 // because every byte is a valid ISO Latin 1 character. --- 2510,2581 ---- * encoded using. If the user has specified that * Java encoding names are allowed, then the * encoding name may be a Java encoding name; * otherwise, it is an ianaEncoding name. * @param isBigEndian For encodings (like uCS-4), whose names cannot ! * specify a byte order, this tells whether the order ! * is bigEndian. null if unknown or irrelevant. * * @return Returns a reader. */ protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) throws IOException { ! String enc = (encoding != null) ? encoding : EncodingInfo.STR_UTF8; ! enc = enc.toUpperCase(Locale.ENGLISH); ! MessageFormatter f = fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN); ! Locale l = fErrorReporter.getLocale(); ! switch (enc) { ! case EncodingInfo.STR_UTF8: ! return new UTF8Reader(inputStream, fBufferSize, f, l); ! case EncodingInfo.STR_UTF16: ! if (isBigEndian != null) { ! return new UTF16Reader(inputStream, fBufferSize, isBigEndian, f, l); } ! break; ! case EncodingInfo.STR_UTF16BE: ! return new UTF16Reader(inputStream, fBufferSize, true, f, l); ! case EncodingInfo.STR_UTF16LE: ! return new UTF16Reader(inputStream, fBufferSize, false, f, l); ! case EncodingInfo.STR_UCS4: if(isBigEndian != null) { ! if(isBigEndian) { return new UCSReader(inputStream, UCSReader.UCS4BE); } else { return new UCSReader(inputStream, UCSReader.UCS4LE); } } else { ! fErrorReporter.reportError(this.getEntityScanner(), ! XMLMessageFormatter.XML_DOMAIN, "EncodingByteOrderUnsupported", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } ! break; ! case EncodingInfo.STR_UCS2: ! if(isBigEndian != null) { ! if(isBigEndian) { return new UCSReader(inputStream, UCSReader.UCS2BE); } else { return new UCSReader(inputStream, UCSReader.UCS2LE); } } else { ! fErrorReporter.reportError(this.getEntityScanner(), ! XMLMessageFormatter.XML_DOMAIN, "EncodingByteOrderUnsupported", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } + break; } // check for valid name boolean validIANA = XMLChar.isValidIANAEncoding(encoding); boolean validJava = XMLChar.isValidJavaEncoding(encoding); if (!validIANA || (fAllowJavaEncodings && !validJava)) { ! fErrorReporter.reportError(this.getEntityScanner(), ! XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 // because every byte is a valid ISO Latin 1 character.
*** 2593,2608 **** // on. -Ac encoding = "ISO-8859-1"; } // try to use a Java reader ! String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); if (javaEncoding == null) { ! if(fAllowJavaEncodings) { javaEncoding = encoding; } else { ! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); // see comment above. javaEncoding = "ISO8859_1"; --- 2587,2603 ---- // on. -Ac encoding = "ISO-8859-1"; } // try to use a Java reader ! String javaEncoding = EncodingMap.getIANA2JavaMapping(enc); if (javaEncoding == null) { ! if (fAllowJavaEncodings) { javaEncoding = encoding; } else { ! fErrorReporter.reportError(this.getEntityScanner(), ! XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); // see comment above. javaEncoding = "ISO8859_1";
*** 2896,3007 **** } } } // print() /** ! * Buffer used in entity manager to reuse character arrays instead ! * of creating new ones every time. ! * ! * @xerces.internal ! * ! * @author Ankit Pasricha, IBM ! */ ! private static class CharacterBuffer { ! ! /** character buffer */ ! private char[] ch; ! ! /** whether the buffer is for an external or internal scanned entity */ ! private boolean isExternal; ! ! public CharacterBuffer(boolean isExternal, int size) { ! this.isExternal = isExternal; ! ch = new char[size]; ! } ! } ! ! ! /** ! * Stores a number of character buffers and provides it to the entity ! * manager to use when an entity is seen. * * @xerces.internal * ! * @author Ankit Pasricha, IBM */ ! private static class CharacterBufferPool { ! ! private static final int DEFAULT_POOL_SIZE = 3; ! ! private CharacterBuffer[] fInternalBufferPool; ! private CharacterBuffer[] fExternalBufferPool; ! ! private int fExternalBufferSize; ! private int fInternalBufferSize; ! private int poolSize; ! ! private int fInternalTop; ! private int fExternalTop; ! ! public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { ! this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); ! } ! public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { ! fExternalBufferSize = externalBufferSize; ! fInternalBufferSize = internalBufferSize; ! this.poolSize = poolSize; ! init(); ! } ! ! /** Initializes buffer pool. **/ ! private void init() { ! fInternalBufferPool = new CharacterBuffer[poolSize]; ! fExternalBufferPool = new CharacterBuffer[poolSize]; ! fInternalTop = -1; ! fExternalTop = -1; ! } ! ! /** Retrieves buffer from pool. **/ ! public CharacterBuffer getBuffer(boolean external) { ! if (external) { ! if (fExternalTop > -1) { ! return fExternalBufferPool[fExternalTop--]; ! } ! else { ! return new CharacterBuffer(true, fExternalBufferSize); ! } ! } ! else { ! if (fInternalTop > -1) { ! return fInternalBufferPool[fInternalTop--]; ! } ! else { ! return new CharacterBuffer(false, fInternalBufferSize); ! } ! } ! } ! ! /** Returns buffer to pool. **/ ! public void returnToPool(CharacterBuffer buffer) { ! if (buffer.isExternal) { ! if (fExternalTop < fExternalBufferPool.length - 1) { ! fExternalBufferPool[++fExternalTop] = buffer; ! } ! } ! else if (fInternalTop < fInternalBufferPool.length - 1) { ! fInternalBufferPool[++fInternalTop] = buffer; ! } ! } ! ! /** Sets the size of external buffers and dumps the old pool. **/ ! public void setExternalBufferSize(int bufferSize) { ! fExternalBufferSize = bufferSize; ! fExternalBufferPool = new CharacterBuffer[poolSize]; ! fExternalTop = -1; ! } ! } /** * This class wraps the byte inputstreams we're presented with. * We need it because java.io.InputStreams don't provide * functionality to reread processed bytes, and they have a habit --- 2891,2972 ---- } } } // print() /** ! * Information about auto-detectable encodings. * * @xerces.internal * ! * @author Michael Glavassevich, IBM */ ! private static class EncodingInfo { ! public static final String STR_UTF8 = "UTF-8"; ! public static final String STR_UTF16 = "UTF-16"; ! public static final String STR_UTF16BE = "UTF-16BE"; ! public static final String STR_UTF16LE = "UTF-16LE"; ! public static final String STR_UCS4 = "ISO-10646-UCS-4"; ! public static final String STR_UCS2 = "ISO-10646-UCS-2"; ! public static final String STR_CP037 = "CP037"; ! ! /** UTF-8 **/ ! public static final EncodingInfo UTF_8 = ! new EncodingInfo(STR_UTF8, null, false); ! ! /** UTF-8, with BOM **/ ! public static final EncodingInfo UTF_8_WITH_BOM = ! new EncodingInfo(STR_UTF8, null, true); ! ! /** UTF-16, big-endian **/ ! public static final EncodingInfo UTF_16_BIG_ENDIAN = ! new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, false); ! ! /** UTF-16, big-endian with BOM **/ ! public static final EncodingInfo UTF_16_BIG_ENDIAN_WITH_BOM = ! new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, true); ! ! /** UTF-16, little-endian **/ ! public static final EncodingInfo UTF_16_LITTLE_ENDIAN = ! new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, false); ! ! /** UTF-16, little-endian with BOM **/ ! public static final EncodingInfo UTF_16_LITTLE_ENDIAN_WITH_BOM = ! new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, true); ! ! /** UCS-4, big-endian **/ ! public static final EncodingInfo UCS_4_BIG_ENDIAN = ! new EncodingInfo(STR_UCS4, Boolean.TRUE, false); ! ! /** UCS-4, little-endian **/ ! public static final EncodingInfo UCS_4_LITTLE_ENDIAN = ! new EncodingInfo(STR_UCS4, Boolean.FALSE, false); ! ! /** UCS-4, unusual byte-order (2143) or (3412) **/ ! public static final EncodingInfo UCS_4_UNUSUAL_BYTE_ORDER = ! new EncodingInfo(STR_UCS4, null, false); ! ! /** EBCDIC **/ ! public static final EncodingInfo EBCDIC = new EncodingInfo(STR_CP037, null, false); ! ! public final String autoDetectedEncoding; ! public final String readerEncoding; ! public final Boolean isBigEndian; ! public final boolean hasBOM; ! ! private EncodingInfo(String autoDetectedEncoding, Boolean isBigEndian, boolean hasBOM) { ! this(autoDetectedEncoding, autoDetectedEncoding, isBigEndian, hasBOM); ! } // <init>(String,Boolean,boolean) ! ! private EncodingInfo(String autoDetectedEncoding, String readerEncoding, ! Boolean isBigEndian, boolean hasBOM) { ! this.autoDetectedEncoding = autoDetectedEncoding; ! this.readerEncoding = readerEncoding; ! this.isBigEndian = isBigEndian; ! this.hasBOM = hasBOM; ! } // <init>(String,String,Boolean,boolean) ! } // class EncodingInfo /** * This class wraps the byte inputstreams we're presented with. * We need it because java.io.InputStreams don't provide * functionality to reread processed bytes, and they have a habit
*** 3050,3112 **** public void rewind() { fOffset = fStartOffset; } ! public int read() throws IOException { ! int b = 0; ! if (fOffset < fLength) { ! return fData[fOffset++] & 0xff; ! } ! if (fOffset == fEndOffset) { ! return -1; ! } if (fOffset == fData.length) { byte[] newData = new byte[fOffset << 1]; System.arraycopy(fData, 0, newData, 0, fOffset); fData = newData; } ! b = fInputStream.read(); if (b == -1) { fEndOffset = fOffset; return -1; } fData[fLength++] = (byte)b; fOffset++; return b & 0xff; } public int read(byte[] b, int off, int len) throws IOException { ! int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } ! /** ! * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); ! * //System.out.println("fInputStream = " + fInputStream ); ! * // better get some more for the voracious reader... */ ! if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { if (!fCurrentEntity.xmlDeclChunkRead) { fCurrentEntity.xmlDeclChunkRead = true; len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE; } return fInputStream.read(b, off, len); } ! ! int returnedVal = read(); ! if(returnedVal == -1) { fEndOffset = fOffset; return -1; } b[off] = (byte)returnedVal; return 1; - } if (len < bytesLeft) { if (len <= 0) { return 0; } --- 3015,3077 ---- public void rewind() { fOffset = fStartOffset; } ! public int readAndBuffer() throws IOException { if (fOffset == fData.length) { byte[] newData = new byte[fOffset << 1]; System.arraycopy(fData, 0, newData, 0, fOffset); fData = newData; } ! final int b = fInputStream.read(); if (b == -1) { fEndOffset = fOffset; return -1; } fData[fLength++] = (byte)b; fOffset++; return b & 0xff; } + public int read() throws IOException { + if (fOffset < fLength) { + return fData[fOffset++] & 0xff; + } + if (fOffset == fEndOffset) { + return -1; + } + if (fCurrentEntity.mayReadChunks) { + return fInputStream.read(); + } + return readAndBuffer(); + } + public int read(byte[] b, int off, int len) throws IOException { ! final int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } ! // read a block of data as requested if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { if (!fCurrentEntity.xmlDeclChunkRead) { fCurrentEntity.xmlDeclChunkRead = true; len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE; } return fInputStream.read(b, off, len); } ! int returnedVal = readAndBuffer(); ! if (returnedVal == -1) { fEndOffset = fOffset; return -1; } b[off] = (byte)returnedVal; return 1; } if (len < bytesLeft) { if (len <= 0) { return 0; }
*** 3118,3129 **** } fOffset += len; return len; } ! public long skip(long n) ! throws IOException { int bytesLeft; if (n <= 0) { return 0; } bytesLeft = fLength - fOffset; --- 3083,3093 ---- } fOffset += len; return len; } ! public long skip(long n) throws IOException { int bytesLeft; if (n <= 0) { return 0; } bytesLeft = fLength - fOffset;
*** 3152,3162 **** */ return fInputStream.skip(n) + bytesLeft; } public int available() throws IOException { ! int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } return fCurrentEntity.mayReadChunks ? fInputStream.available() --- 3116,3126 ---- */ return fInputStream.skip(n) + bytesLeft; } public int available() throws IOException { ! final int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } return fCurrentEntity.mayReadChunks ? fInputStream.available()
*** 3169,3179 **** fMark = fOffset; } public void reset() { fOffset = fMark; - //test(); } public boolean markSupported() { return true; } --- 3133,3142 ----
< prev index next >