open Cdiff src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java

src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java


*** 1,7 ****
  /*
!  * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved.
   */
  /*
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
   * this work for additional information regarding copyright ownership.
--- 1,7 ----
  /*
!  * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
   */
  /*
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
   * this work for additional information regarding copyright ownership.
*** 20,29 ****
--- 20,30 ----
  
  package com.sun.org.apache.xerces.internal.impl ;
  
  import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
  import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
+ import com.sun.org.apache.xerces.internal.impl.io.UTF16Reader;
  import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
  import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
  import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager;
  import com.sun.org.apache.xerces.internal.util.*;
  import com.sun.org.apache.xerces.internal.util.URI;
*** 87,97 ****
   * @author Andy Clark, IBM
   * @author Arnaud  Le Hors, IBM
   * @author K.Venugopal SUN Microsystems
   * @author Neeraj Bajaj SUN Microsystems
   * @author Sunitha Reddy SUN Microsystems
!  * @LastModified: Nov 2018
   */
  public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
  
      //
      // Constants
--- 88,98 ----
   * @author Andy Clark, IBM
   * @author Arnaud  Le Hors, IBM
   * @author K.Venugopal SUN Microsystems
   * @author Neeraj Bajaj SUN Microsystems
   * @author Sunitha Reddy SUN Microsystems
!  * @LastModified: Apr 2019
   */
  public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
  
      //
      // Constants
*** 410,422 ****
      private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
  
      /** Augmentations for entities. */
      private final Augmentations fEntityAugs = new AugmentationsImpl();
  
-     /** Pool of character buffers. */
-     private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE);
- 
      /** indicate whether Catalog should be used for resolving external resources */
      private boolean fUseCatalog = true;
      CatalogFeatures fCatalogFeatures;
      CatalogResolver fCatalogResolver;
  
--- 411,420 ----
*** 692,745 ****
                      }
                  }
              }
  
              // wrap this stream in RewindableInputStream
!             stream = new RewindableInputStream(stream);
  
              // perform auto-detect of encoding if necessary
              if (encoding == null) {
                  // read first four bytes and determine encoding
                  final byte[] b4 = new byte[4];
                  int count = 0;
                  for (; count<4; count++ ) {
!                     b4[count] = (byte)stream.read();
                  }
                  if (count == 4) {
!                     Object [] encodingDesc = getEncodingName(b4, count);
!                     encoding = (String)(encodingDesc[0]);
!                     isBigEndian = (Boolean)(encodingDesc[1]);
! 
                      stream.reset();
                      // Special case UTF-8 files with BOM created by Microsoft
                      // tools. It's more efficient to consume the BOM than make
                      // the reader perform extra checks. -Ac
!                     if (count > 2 && encoding.equals("UTF-8")) {
!                         int b0 = b4[0] & 0xFF;
!                         int b1 = b4[1] & 0xFF;
!                         int b2 = b4[2] & 0xFF;
!                         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
!                             // ignore first three bytes...
                              stream.skip(3);
                          }
                      }
!                     reader = createReader(stream, encoding, isBigEndian);
                  } else {
                      reader = createReader(stream, encoding, isBigEndian);
                  }
              }
  
              // use specified encoding
              else {
                  encoding = encoding.toUpperCase(Locale.ENGLISH);
  
                  // If encoding is UTF-8, consume BOM if one is present.
!                 if (encoding.equals("UTF-8")) {
                      final int[] b3 = new int[3];
                      int count = 0;
                      for (; count < 3; ++count) {
!                         b3[count] = stream.read();
                          if (b3[count] == -1)
                              break;
                      }
                      if (count == 3) {
                          if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
--- 690,747 ----
                      }
                  }
              }
  
              // wrap this stream in RewindableInputStream
!             RewindableInputStream rewindableStream = new RewindableInputStream(stream);
!             stream = rewindableStream;
  
              // perform auto-detect of encoding if necessary
              if (encoding == null) {
                  // read first four bytes and determine encoding
                  final byte[] b4 = new byte[4];
                  int count = 0;
                  for (; count<4; count++ ) {
!                     b4[count] = (byte)rewindableStream.readAndBuffer();
                  }
                  if (count == 4) {
!                     final EncodingInfo info = getEncodingInfo(b4, count);
!                     encoding = info.autoDetectedEncoding;
!                     final String readerEncoding = info.readerEncoding;
!                     isBigEndian = info.isBigEndian;
                      stream.reset();
+                     if (info.hasBOM) {
                          // Special case UTF-8 files with BOM created by Microsoft
                          // tools. It's more efficient to consume the BOM than make
                          // the reader perform extra checks. -Ac
!                         if (EncodingInfo.STR_UTF8.equals(readerEncoding)) {
!                             // UTF-8 BOM: 0xEF 0xBB 0xBF
                              stream.skip(3);
                          }
+                         // It's also more efficient to consume the UTF-16 BOM.
+                         else if (EncodingInfo.STR_UTF16.equals(readerEncoding)) {
+                             // UTF-16 BE BOM: 0xFE 0xFF
+                             // UTF-16 LE BOM: 0xFF 0xFE
+                             stream.skip(2);
                          }
!                     }
!                     reader = createReader(stream, readerEncoding, isBigEndian);
                  } else {
                      reader = createReader(stream, encoding, isBigEndian);
                  }
              }
  
              // use specified encoding
              else {
                  encoding = encoding.toUpperCase(Locale.ENGLISH);
  
                  // If encoding is UTF-8, consume BOM if one is present.
!                 if (EncodingInfo.STR_UTF8.equals(encoding)) {
                      final int[] b3 = new int[3];
                      int count = 0;
                      for (; count < 3; ++count) {
!                         b3[count] = rewindableStream.readAndBuffer();
                          if (b3[count] == -1)
                              break;
                      }
                      if (count == 3) {
                          if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
*** 748,807 ****
                          }
                      } else {
                          stream.reset();
                      }
                  }
!                 // If encoding is UTF-16, we still need to read the first four bytes
!                 // in order to discover the byte order.
!                 else if (encoding.equals("UTF-16")) {
                      final int[] b4 = new int[4];
                      int count = 0;
                      for (; count < 4; ++count) {
!                         b4[count] = stream.read();
                          if (b4[count] == -1)
                              break;
                      }
                      stream.reset();
- 
-                     String utf16Encoding = "UTF-16";
                      if (count >= 2) {
                          final int b0 = b4[0];
                          final int b1 = b4[1];
                          if (b0 == 0xFE && b1 == 0xFF) {
                              // UTF-16, big-endian
-                             utf16Encoding = "UTF-16BE";
                              isBigEndian = Boolean.TRUE;
                          }
                          else if (b0 == 0xFF && b1 == 0xFE) {
                              // UTF-16, little-endian
-                             utf16Encoding = "UTF-16LE";
                              isBigEndian = Boolean.FALSE;
                          }
                          else if (count == 4) {
                              final int b2 = b4[2];
                              final int b3 = b4[3];
                              if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
                                  // UTF-16, big-endian, no BOM
-                                 utf16Encoding = "UTF-16BE";
                                  isBigEndian = Boolean.TRUE;
                              }
                              if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
                                  // UTF-16, little-endian, no BOM
-                                 utf16Encoding = "UTF-16LE";
                                  isBigEndian = Boolean.FALSE;
                              }
                          }
                      }
-                     reader = createReader(stream, utf16Encoding, isBigEndian);
                  }
                  // If encoding is UCS-4, we still need to read the first four bytes
                  // in order to discover the byte order.
!                 else if (encoding.equals("ISO-10646-UCS-4")) {
                      final int[] b4 = new int[4];
                      int count = 0;
                      for (; count < 4; ++count) {
!                         b4[count] = stream.read();
                          if (b4[count] == -1)
                              break;
                      }
                      stream.reset();
  
--- 750,804 ----
                          }
                      } else {
                          stream.reset();
                      }
                  }
!                 // If encoding is UTF-16, we still need to read the first
!                 // four bytes, in order to discover the byte order.
!                 else if (EncodingInfo.STR_UTF16.equals(encoding)) {
                      final int[] b4 = new int[4];
                      int count = 0;
                      for (; count < 4; ++count) {
!                         b4[count] = rewindableStream.readAndBuffer();
                          if (b4[count] == -1)
                              break;
                      }
                      stream.reset();
                      if (count >= 2) {
                          final int b0 = b4[0];
                          final int b1 = b4[1];
                          if (b0 == 0xFE && b1 == 0xFF) {
                              // UTF-16, big-endian
                              isBigEndian = Boolean.TRUE;
+                             stream.skip(2);
                          }
                          else if (b0 == 0xFF && b1 == 0xFE) {
                              // UTF-16, little-endian
                              isBigEndian = Boolean.FALSE;
+                             stream.skip(2);
                          }
                          else if (count == 4) {
                              final int b2 = b4[2];
                              final int b3 = b4[3];
                              if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
                                  // UTF-16, big-endian, no BOM
                                  isBigEndian = Boolean.TRUE;
                              }
                              if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
                                  // UTF-16, little-endian, no BOM
                                  isBigEndian = Boolean.FALSE;
                              }
                          }
                      }
                  }
                  // If encoding is UCS-4, we still need to read the first four bytes
                  // in order to discover the byte order.
!                 else if (EncodingInfo.STR_UCS4.equals(encoding)) {
                      final int[] b4 = new int[4];
                      int count = 0;
                      for (; count < 4; ++count) {
!                         b4[count] = rewindableStream.readAndBuffer();
                          if (b4[count] == -1)
                              break;
                      }
                      stream.reset();
  
*** 817,831 ****
                          }
                      }
                  }
                  // If encoding is UCS-2, we still need to read the first four bytes
                  // in order to discover the byte order.
!                 else if (encoding.equals("ISO-10646-UCS-2")) {
                      final int[] b4 = new int[4];
                      int count = 0;
                      for (; count < 4; ++count) {
!                         b4[count] = stream.read();
                          if (b4[count] == -1)
                              break;
                      }
                      stream.reset();
  
--- 814,828 ----
                          }
                      }
                  }
                  // If encoding is UCS-2, we still need to read the first four bytes
                  // in order to discover the byte order.
!                 else if (EncodingInfo.STR_UCS2.equals(encoding)) {
                      final int[] b4 = new int[4];
                      int count = 0;
                      for (; count < 4; ++count) {
!                         b4[count] = rewindableStream.readAndBuffer();
                          if (b4[count] == -1)
                              break;
                      }
                      stream.reset();
  
*** 1796,1806 ****
                  Integer bufferSize = (Integer)value;
                  if (bufferSize != null &&
                      bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) {
                      fBufferSize = bufferSize.intValue();
                      fEntityScanner.setBufferSize(fBufferSize);
-                     fBufferPool.setExternalBufferSize(fBufferSize);
                  }
              }
              if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() &&
                  propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) {
                  fSecurityManager = (XMLSecurityManager)value;
--- 1793,1802 ----
*** 2423,2510 ****
       * Returns the IANA encoding name that is auto-detected from
       * the bytes specified, with the endian-ness of that encoding where appropriate.
       *
       * @param b4    The first four bytes of the input.
       * @param count The number of bytes actually read.
!      * @return a 2-element array:  the first element, an IANA-encoding string,
!      *  the second element a Boolean which is true iff the document is big endian, false
!      *  if it's little-endian, and null if the distinction isn't relevant.
       */
!     protected Object[] getEncodingName(byte[] b4, int count) {
  
          if (count < 2) {
!             return defaultEncoding;
          }
  
          // UTF-16, with BOM
          int b0 = b4[0] & 0xFF;
          int b1 = b4[1] & 0xFF;
          if (b0 == 0xFE && b1 == 0xFF) {
              // UTF-16, big-endian
!             return new Object [] {"UTF-16BE", true};
          }
          if (b0 == 0xFF && b1 == 0xFE) {
              // UTF-16, little-endian
!             return new Object [] {"UTF-16LE", false};
          }
  
          // default to UTF-8 if we don't have enough bytes to make a
          // good determination of the encoding
          if (count < 3) {
!             return defaultEncoding;
          }
  
          // UTF-8 with a BOM
          int b2 = b4[2] & 0xFF;
          if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
!             return defaultEncoding;
          }
  
          // default to UTF-8 if we don't have enough bytes to make a
          // good determination of the encoding
          if (count < 4) {
!             return defaultEncoding;
          }
  
          // other encodings
          int b3 = b4[3] & 0xFF;
          if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
              // UCS-4, big endian (1234)
!             return new Object [] {"ISO-10646-UCS-4", true};
          }
          if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
              // UCS-4, little endian (4321)
!             return new Object [] {"ISO-10646-UCS-4", false};
          }
          if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
              // UCS-4, unusual octet order (2143)
              // REVISIT: What should this be?
!             return new Object [] {"ISO-10646-UCS-4", null};
          }
          if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
              // UCS-4, unusual octect order (3412)
              // REVISIT: What should this be?
!             return new Object [] {"ISO-10646-UCS-4", null};
          }
          if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
              // UTF-16, big-endian, no BOM
              // (or could turn out to be UCS-2...
              // REVISIT: What should this be?
!             return new Object [] {"UTF-16BE", true};
          }
          if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
              // UTF-16, little-endian, no BOM
              // (or could turn out to be UCS-2...
!             return new Object [] {"UTF-16LE", false};
          }
          if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
              // EBCDIC
              // a la xerces1, return CP037 instead of EBCDIC here
!             return new Object [] {"CP037", null};
          }
  
!         return defaultEncoding;
  
      } // getEncodingName(byte[],int):Object[]
  
      /**
       * Creates a reader capable of reading the given input stream in
--- 2419,2505 ----
       * Returns the IANA encoding name that is auto-detected from
       * the bytes specified, with the endian-ness of that encoding where appropriate.
       *
       * @param b4    The first four bytes of the input.
       * @param count The number of bytes actually read.
!      * @return an instance of EncodingInfo which represents the auto-detected encoding.
       */
!     protected EncodingInfo getEncodingInfo(byte[] b4, int count) {
  
          if (count < 2) {
!             return EncodingInfo.UTF_8;
          }
  
          // UTF-16, with BOM
          int b0 = b4[0] & 0xFF;
          int b1 = b4[1] & 0xFF;
          if (b0 == 0xFE && b1 == 0xFF) {
              // UTF-16, big-endian
!             return EncodingInfo.UTF_16_BIG_ENDIAN_WITH_BOM;
          }
          if (b0 == 0xFF && b1 == 0xFE) {
              // UTF-16, little-endian
!             return EncodingInfo.UTF_16_LITTLE_ENDIAN_WITH_BOM;
          }
  
          // default to UTF-8 if we don't have enough bytes to make a
          // good determination of the encoding
          if (count < 3) {
!             return EncodingInfo.UTF_8;
          }
  
          // UTF-8 with a BOM
          int b2 = b4[2] & 0xFF;
          if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
!             return EncodingInfo.UTF_8_WITH_BOM;
          }
  
          // default to UTF-8 if we don't have enough bytes to make a
          // good determination of the encoding
          if (count < 4) {
!             return EncodingInfo.UTF_8;
          }
  
          // other encodings
          int b3 = b4[3] & 0xFF;
          if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
              // UCS-4, big endian (1234)
!             return EncodingInfo.UCS_4_BIG_ENDIAN;
          }
          if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
              // UCS-4, little endian (4321)
!             return EncodingInfo.UCS_4_LITTLE_ENDIAN;
          }
          if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
              // UCS-4, unusual octet order (2143)
              // REVISIT: What should this be?
!             return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER;
          }
          if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
              // UCS-4, unusual octect order (3412)
              // REVISIT: What should this be?
!             return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER;
          }
          if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
              // UTF-16, big-endian, no BOM
              // (or could turn out to be UCS-2...
              // REVISIT: What should this be?
!             return EncodingInfo.UTF_16_BIG_ENDIAN;
          }
          if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
              // UTF-16, little-endian, no BOM
              // (or could turn out to be UCS-2...
!             return EncodingInfo.UTF_16_LITTLE_ENDIAN;
          }
          if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
              // EBCDIC
              // a la xerces1, return CP037 instead of EBCDIC here
!             return EncodingInfo.EBCDIC;
          }
  
!         // default encoding
!         return EncodingInfo.UTF_8;
  
      } // getEncodingName(byte[],int):Object[]
  
      /**
       * Creates a reader capable of reading the given input stream in
*** 2515,2587 ****
       *                     encoded using. If the user has specified that
       *                     Java encoding names are allowed, then the
       *                     encoding name may be a Java encoding name;
       *                     otherwise, it is an ianaEncoding name.
       * @param isBigEndian   For encodings (like uCS-4), whose names cannot
!      *                      specify a byte order, this tells whether the order is bigEndian.  null menas
!      *                      unknown or not relevant.
       *
       * @return Returns a reader.
       */
      protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
      throws IOException {
  
!         // normalize encoding name
!         if (encoding == null) {
!             encoding = "UTF-8";
          }
! 
!         // try to use an optimized reader
!         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
!         if (ENCODING.equals("UTF-8")) {
!             if (DEBUG_ENCODINGS) {
!                 System.out.println("$$$ creating UTF8Reader");
!             }
!             return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
!         }
!         if (ENCODING.equals("US-ASCII")) {
!             if (DEBUG_ENCODINGS) {
!                 System.out.println("$$$ creating ASCIIReader");
!             }
!             return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
!         }
!         if(ENCODING.equals("ISO-10646-UCS-4")) {
              if(isBigEndian != null) {
!                 boolean isBE = isBigEndian.booleanValue();
!                 if(isBE) {
                      return new UCSReader(inputStream, UCSReader.UCS4BE);
                  } else {
                      return new UCSReader(inputStream, UCSReader.UCS4LE);
                  }
              } else {
!                 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
                          "EncodingByteOrderUnsupported",
                          new Object[] { encoding },
                          XMLErrorReporter.SEVERITY_FATAL_ERROR);
              }
!         }
!         if(ENCODING.equals("ISO-10646-UCS-2")) {
!             if(isBigEndian != null) { // sould never happen with this encoding...
!                 boolean isBE = isBigEndian.booleanValue();
!                 if(isBE) {
                      return new UCSReader(inputStream, UCSReader.UCS2BE);
                  } else {
                      return new UCSReader(inputStream, UCSReader.UCS2LE);
                  }
              } else {
!                 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
                          "EncodingByteOrderUnsupported",
                          new Object[] { encoding },
                          XMLErrorReporter.SEVERITY_FATAL_ERROR);
              }
          }
  
          // check for valid name
          boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
          boolean validJava = XMLChar.isValidJavaEncoding(encoding);
          if (!validIANA || (fAllowJavaEncodings && !validJava)) {
!             fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
                      "EncodingDeclInvalid",
                      new Object[] { encoding },
                      XMLErrorReporter.SEVERITY_FATAL_ERROR);
                      // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
                      //       because every byte is a valid ISO Latin 1 character.
--- 2510,2581 ----
       *                     encoded using. If the user has specified that
       *                     Java encoding names are allowed, then the
       *                     encoding name may be a Java encoding name;
       *                     otherwise, it is an ianaEncoding name.
       * @param isBigEndian   For encodings (like uCS-4), whose names cannot
!      *                      specify a byte order, this tells whether the order
!      *                      is bigEndian.  null if unknown or irrelevant.
       *
       * @return Returns a reader.
       */
      protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
          throws IOException {
  
!         String enc = (encoding != null) ? encoding : EncodingInfo.STR_UTF8;
!         enc = enc.toUpperCase(Locale.ENGLISH);
!         MessageFormatter f = fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN);
!         Locale l = fErrorReporter.getLocale();
!         switch (enc) {
!             case EncodingInfo.STR_UTF8:
!                 return new UTF8Reader(inputStream, fBufferSize, f, l);
!             case EncodingInfo.STR_UTF16:
!                 if (isBigEndian != null) {
!                     return new UTF16Reader(inputStream, fBufferSize, isBigEndian, f, l);
                  }
!                 break;
!             case EncodingInfo.STR_UTF16BE:
!                 return new UTF16Reader(inputStream, fBufferSize, true, f, l);
!             case EncodingInfo.STR_UTF16LE:
!                 return new UTF16Reader(inputStream, fBufferSize, false, f, l);
!             case EncodingInfo.STR_UCS4:
                  if(isBigEndian != null) {
!                     if(isBigEndian) {
                          return new UCSReader(inputStream, UCSReader.UCS4BE);
                      } else {
                          return new UCSReader(inputStream, UCSReader.UCS4LE);
                      }
                  } else {
!                     fErrorReporter.reportError(this.getEntityScanner(),
!                             XMLMessageFormatter.XML_DOMAIN,
                              "EncodingByteOrderUnsupported",
                              new Object[] { encoding },
                              XMLErrorReporter.SEVERITY_FATAL_ERROR);
                  }
!                 break;
!             case EncodingInfo.STR_UCS2:
!                 if(isBigEndian != null) {
!                     if(isBigEndian) {
                          return new UCSReader(inputStream, UCSReader.UCS2BE);
                      } else {
                          return new UCSReader(inputStream, UCSReader.UCS2LE);
                      }
                  } else {
!                     fErrorReporter.reportError(this.getEntityScanner(),
!                             XMLMessageFormatter.XML_DOMAIN,
                              "EncodingByteOrderUnsupported",
                              new Object[] { encoding },
                              XMLErrorReporter.SEVERITY_FATAL_ERROR);
                  }
+                 break;
          }
  
          // check for valid name
          boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
          boolean validJava = XMLChar.isValidJavaEncoding(encoding);
          if (!validIANA || (fAllowJavaEncodings && !validJava)) {
!             fErrorReporter.reportError(this.getEntityScanner(),
!                     XMLMessageFormatter.XML_DOMAIN,
                      "EncodingDeclInvalid",
                      new Object[] { encoding },
                      XMLErrorReporter.SEVERITY_FATAL_ERROR);
              // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
              //       because every byte is a valid ISO Latin 1 character.
*** 2593,2608 ****
                      //       on. -Ac
                      encoding = "ISO-8859-1";
          }
  
          // try to use a Java reader
!         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
          if (javaEncoding == null) {
!             if(fAllowJavaEncodings) {
                  javaEncoding = encoding;
              } else {
!                 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
                          "EncodingDeclInvalid",
                          new Object[] { encoding },
                          XMLErrorReporter.SEVERITY_FATAL_ERROR);
                          // see comment above.
                          javaEncoding = "ISO8859_1";
--- 2587,2603 ----
              //       on. -Ac
                      encoding = "ISO-8859-1";
          }
  
          // try to use a Java reader
!         String javaEncoding = EncodingMap.getIANA2JavaMapping(enc);
          if (javaEncoding == null) {
!             if (fAllowJavaEncodings) {
                  javaEncoding = encoding;
              } else {
!                 fErrorReporter.reportError(this.getEntityScanner(),
!                         XMLMessageFormatter.XML_DOMAIN,
                          "EncodingDeclInvalid",
                          new Object[] { encoding },
                          XMLErrorReporter.SEVERITY_FATAL_ERROR);
                  // see comment above.
                  javaEncoding = "ISO8859_1";
*** 2896,3007 ****
              }
          }
      } // print()
  
      /**
!      * Buffer used in entity manager to reuse character arrays instead
!      * of creating new ones every time.
!      *
!      * @xerces.internal
!      *
!      * @author Ankit Pasricha, IBM
!      */
!     private static class CharacterBuffer {
! 
!         /** character buffer */
!         private char[] ch;
! 
!         /** whether the buffer is for an external or internal scanned entity */
!         private boolean isExternal;
! 
!         public CharacterBuffer(boolean isExternal, int size) {
!             this.isExternal = isExternal;
!             ch = new char[size];
!         }
!     }
! 
! 
!      /**
!      * Stores a number of character buffers and provides it to the entity
!      * manager to use when an entity is seen.
       *
       * @xerces.internal
       *
!      * @author Ankit Pasricha, IBM
       */
!     private static class CharacterBufferPool {
! 
!         private static final int DEFAULT_POOL_SIZE = 3;
! 
!         private CharacterBuffer[] fInternalBufferPool;
!         private CharacterBuffer[] fExternalBufferPool;
! 
!         private int fExternalBufferSize;
!         private int fInternalBufferSize;
!         private int poolSize;
! 
!         private int fInternalTop;
!         private int fExternalTop;
! 
!         public CharacterBufferPool(int externalBufferSize, int internalBufferSize) {
!             this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize);
!         }
  
!         public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) {
!             fExternalBufferSize = externalBufferSize;
!             fInternalBufferSize = internalBufferSize;
!             this.poolSize = poolSize;
!             init();
!         }
! 
!         /** Initializes buffer pool. **/
!         private void init() {
!             fInternalBufferPool = new CharacterBuffer[poolSize];
!             fExternalBufferPool = new CharacterBuffer[poolSize];
!             fInternalTop = -1;
!             fExternalTop = -1;
!         }
! 
!         /** Retrieves buffer from pool. **/
!         public CharacterBuffer getBuffer(boolean external) {
!             if (external) {
!                 if (fExternalTop > -1) {
!                     return fExternalBufferPool[fExternalTop--];
!                 }
!                 else {
!                     return new CharacterBuffer(true, fExternalBufferSize);
!                 }
!             }
!             else {
!                 if (fInternalTop > -1) {
!                     return fInternalBufferPool[fInternalTop--];
!                 }
!                 else {
!                     return new CharacterBuffer(false, fInternalBufferSize);
!                 }
!             }
!         }
! 
!         /** Returns buffer to pool. **/
!         public void returnToPool(CharacterBuffer buffer) {
!             if (buffer.isExternal) {
!                 if (fExternalTop < fExternalBufferPool.length - 1) {
!                     fExternalBufferPool[++fExternalTop] = buffer;
!                 }
!             }
!             else if (fInternalTop < fInternalBufferPool.length - 1) {
!                 fInternalBufferPool[++fInternalTop] = buffer;
!             }
!         }
! 
!         /** Sets the size of external buffers and dumps the old pool. **/
!         public void setExternalBufferSize(int bufferSize) {
!             fExternalBufferSize = bufferSize;
!             fExternalBufferPool = new CharacterBuffer[poolSize];
!             fExternalTop = -1;
!         }
!     }
  
      /**
      * This class wraps the byte inputstreams we're presented with.
      * We need it because java.io.InputStreams don't provide
      * functionality to reread processed bytes, and they have a habit
--- 2891,2972 ----
              }
          }
      } // print()
  
      /**
!      * Information about auto-detectable encodings.
       *
       * @xerces.internal
       *
!      * @author Michael Glavassevich, IBM
       */
!     private static class EncodingInfo {
!         public static final String STR_UTF8 = "UTF-8";
!         public static final String STR_UTF16 = "UTF-16";
!         public static final String STR_UTF16BE = "UTF-16BE";
!         public static final String STR_UTF16LE = "UTF-16LE";
!         public static final String STR_UCS4 = "ISO-10646-UCS-4";
!         public static final String STR_UCS2 = "ISO-10646-UCS-2";
!         public static final String STR_CP037 = "CP037";
! 
!         /** UTF-8 **/
!         public static final EncodingInfo UTF_8 =
!                 new EncodingInfo(STR_UTF8, null, false);
! 
!         /** UTF-8, with BOM **/
!         public static final EncodingInfo UTF_8_WITH_BOM =
!                 new EncodingInfo(STR_UTF8, null, true);
! 
!         /** UTF-16, big-endian **/
!         public static final EncodingInfo UTF_16_BIG_ENDIAN =
!                 new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, false);
! 
!         /** UTF-16, big-endian with BOM **/
!         public static final EncodingInfo UTF_16_BIG_ENDIAN_WITH_BOM =
!                 new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, true);
! 
!         /** UTF-16, little-endian **/
!         public static final EncodingInfo UTF_16_LITTLE_ENDIAN =
!                 new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, false);
! 
!         /** UTF-16, little-endian with BOM **/
!         public static final EncodingInfo UTF_16_LITTLE_ENDIAN_WITH_BOM =
!                 new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, true);
! 
!         /** UCS-4, big-endian **/
!         public static final EncodingInfo UCS_4_BIG_ENDIAN =
!                 new EncodingInfo(STR_UCS4, Boolean.TRUE, false);
! 
!         /** UCS-4, little-endian **/
!         public static final EncodingInfo UCS_4_LITTLE_ENDIAN =
!                 new EncodingInfo(STR_UCS4, Boolean.FALSE, false);
! 
!         /** UCS-4, unusual byte-order (2143) or (3412) **/
!         public static final EncodingInfo UCS_4_UNUSUAL_BYTE_ORDER =
!                 new EncodingInfo(STR_UCS4, null, false);
! 
!         /** EBCDIC **/
!         public static final EncodingInfo EBCDIC = new EncodingInfo(STR_CP037, null, false);
! 
!         public final String autoDetectedEncoding;
!         public final String readerEncoding;
!         public final Boolean isBigEndian;
!         public final boolean hasBOM;
! 
!         private EncodingInfo(String autoDetectedEncoding, Boolean isBigEndian, boolean hasBOM) {
!             this(autoDetectedEncoding, autoDetectedEncoding, isBigEndian, hasBOM);
!         } // <init>(String,Boolean,boolean)
! 
!         private EncodingInfo(String autoDetectedEncoding, String readerEncoding,
!                 Boolean isBigEndian, boolean hasBOM) {
!             this.autoDetectedEncoding = autoDetectedEncoding;
!             this.readerEncoding = readerEncoding;
!             this.isBigEndian = isBigEndian;
!             this.hasBOM = hasBOM;
!         } // <init>(String,String,Boolean,boolean)
  
!     } // class EncodingInfo
  
      /**
      * This class wraps the byte inputstreams we're presented with.
      * We need it because java.io.InputStreams don't provide
      * functionality to reread processed bytes, and they have a habit
*** 3050,3112 ****
  
          public void rewind() {
              fOffset = fStartOffset;
          }
  
!         public int read() throws IOException {
!             int b = 0;
!             if (fOffset < fLength) {
!                 return fData[fOffset++] & 0xff;
!             }
!             if (fOffset == fEndOffset) {
!                 return -1;
!             }
              if (fOffset == fData.length) {
                  byte[] newData = new byte[fOffset << 1];
                  System.arraycopy(fData, 0, newData, 0, fOffset);
                  fData = newData;
              }
!             b = fInputStream.read();
              if (b == -1) {
                  fEndOffset = fOffset;
                  return -1;
              }
              fData[fLength++] = (byte)b;
              fOffset++;
              return b & 0xff;
          }
  
          public int read(byte[] b, int off, int len) throws IOException {
!             int bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return -1;
                  }
  
!                 /**
!                  * //System.out.println("fCurrentEntitty = " + fCurrentEntity );
!                  * //System.out.println("fInputStream = " + fInputStream );
!                  * // better get some more for the voracious reader... */
! 
                  if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) {
  
                      if (!fCurrentEntity.xmlDeclChunkRead)
                      {
                          fCurrentEntity.xmlDeclChunkRead = true;
                          len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE;
                      }
                      return fInputStream.read(b, off, len);
                  }
! 
!                 int returnedVal = read();
!                 if(returnedVal == -1) {
                    fEndOffset = fOffset;
                    return -1;
                  }
                  b[off] = (byte)returnedVal;
                  return 1;
- 
              }
              if (len < bytesLeft) {
                  if (len <= 0) {
                      return 0;
                  }
--- 3015,3077 ----
  
          public void rewind() {
              fOffset = fStartOffset;
          }
  
!         public int readAndBuffer() throws IOException {
              if (fOffset == fData.length) {
                  byte[] newData = new byte[fOffset << 1];
                  System.arraycopy(fData, 0, newData, 0, fOffset);
                  fData = newData;
              }
!             final int b = fInputStream.read();
              if (b == -1) {
                  fEndOffset = fOffset;
                  return -1;
              }
              fData[fLength++] = (byte)b;
              fOffset++;
              return b & 0xff;
          }
  
+         public int read() throws IOException {
+             if (fOffset < fLength) {
+                 return fData[fOffset++] & 0xff;
+             }
+             if (fOffset == fEndOffset) {
+                 return -1;
+             }
+             if (fCurrentEntity.mayReadChunks) {
+                 return fInputStream.read();
+             }
+             return readAndBuffer();
+         }
+ 
          public int read(byte[] b, int off, int len) throws IOException {
!             final int bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return -1;
                  }
  
!                 // read a block of data as requested
                  if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) {
  
                      if (!fCurrentEntity.xmlDeclChunkRead)
                      {
                          fCurrentEntity.xmlDeclChunkRead = true;
                          len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE;
                      }
                      return fInputStream.read(b, off, len);
                  }
!                 int returnedVal = readAndBuffer();
!                 if (returnedVal == -1) {
                      fEndOffset = fOffset;
                      return -1;
                  }
                  b[off] = (byte)returnedVal;
                  return 1;
              }
              if (len < bytesLeft) {
                  if (len <= 0) {
                      return 0;
                  }
*** 3118,3129 ****
              }
              fOffset += len;
              return len;
          }
  
!         public long skip(long n)
!         throws IOException {
              int bytesLeft;
              if (n <= 0) {
                  return 0;
              }
              bytesLeft = fLength - fOffset;
--- 3083,3093 ----
              }
              fOffset += len;
              return len;
          }
  
!         public long skip(long n) throws IOException {
              int bytesLeft;
              if (n <= 0) {
                  return 0;
              }
              bytesLeft = fLength - fOffset;
*** 3152,3162 ****
              */
              return fInputStream.skip(n) + bytesLeft;
          }
  
          public int available() throws IOException {
!             int bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return -1;
                  }
                  return fCurrentEntity.mayReadChunks ? fInputStream.available()
--- 3116,3126 ----
              */
              return fInputStream.skip(n) + bytesLeft;
          }
  
          public int available() throws IOException {
!             final int bytesLeft = fLength - fOffset;
              if (bytesLeft == 0) {
                  if (fOffset == fEndOffset) {
                      return -1;
                  }
                  return fCurrentEntity.mayReadChunks ? fInputStream.available()
*** 3169,3179 ****
              fMark = fOffset;
          }
  
          public void reset() {
              fOffset = fMark;
-             //test();
          }
  
          public boolean markSupported() {
              return true;
          }
--- 3133,3142 ----
< prev index next >