jdk Cdiff src/share/classes/jdk/internal/util/xml/impl/Parser.java

src/share/classes/jdk/internal/util/xml/impl/Parser.java


*** 2858,2875 ****
--- 2858,2886 ----
                  }
                  xml(reader);
              } else {
                  //              Get encoding from BOM or the xml text decl.
                  reader = bom(is.getByteStream(), ' ');
+                 /**
+                  * [#4.3.3] requires BOM for UTF-16, however, it's not uncommon
+                  * that it may be missing. A mature technique exists in Xerces
+                  * to further check for possible UTF-16 encoding
+                  */
                  if (reader == null) {
+                     reader = utf16(is.getByteStream());
+                 }
+ 
+                 if (reader == null) {
                      //          Encoding is defined by the xml text decl.
                      reader = enc("UTF-8", is.getByteStream());
                      expenc = xml(reader);
+                     if (!expenc.equals("UTF-8")) {
                          if (expenc.startsWith("UTF-16")) {
                              panic(FAULT);  // UTF-16 must have BOM [#4.3.3]
                          }
                          reader = enc(expenc, is.getByteStream());
+                     }
                  } else {
                      //          Encoding is defined by the BOM.
                      xml(reader);
                  }
              }
*** 2954,2964 ****
--- 2965,3018 ----
                  }
                  return null;
          }
      }
  
+ 
      /**
+      * Using a mature technique from Xerces, this method checks further after 
+      * the bom method above to see if the encoding is UTF-16
+      * 
+      * @param is A byte stream of the entity.
+      * @return a reader, may be null
+      * @exception Exception is parser specific exception form panic method.
+      * @exception IOException
+      */
+     private Reader utf16(InputStream is)
+             throws Exception {
+         if (mChIdx != 0) {
+             //The bom method has read ONE byte into the buffer. 
+             byte b0 = (byte)mChars[0];
+             if (b0 == 0x00 || b0 == 0x3C) {
+                 int b1 = is.read();
+                 int b2 = is.read();
+                 int b3 = is.read();
+                 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
+                     // UTF-16, big-endian, no BOM
+                     mChars[0] = (char)(b1);
+                     mChars[mChIdx++] = (char)(b3);
+                     return new ReaderUTF16(is, 'b');
+                 } else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
+                     // UTF-16, little-endian, no BOM
+                     mChars[0] = (char)(b0);
+                     mChars[mChIdx++] = (char)(b2);
+                     return new ReaderUTF16(is, 'l');
+                 } else {
+                     /**not every InputStream supports reset, so we have to remember
+                      * the state for further parsing
+                     **/
+                     mChars[0] = (char)(b0);
+                     mChars[mChIdx++] = (char)(b1);
+                     mChars[mChIdx++] = (char)(b2);
+                     mChars[mChIdx++] = (char)(b3);
+                 }
+                 
+             }
+         }
+         return null;
+     }
+     /**
       * Parses the xml text declaration.
       *
       * This method gets encoding from the xml text declaration [#4.3.1] if any.
       * The method assumes the buffer (mChars) is big enough to accommodate whole
       * xml text declaration.
*** 2972,2992 ****
              throws Exception {
          String str = null;
          String enc = "UTF-8";
          char ch;
          int val;
!         short st;
!         //              Read the xml text declaration into the buffer
!         if (mChIdx != 0) {
!             //          The bom method have read ONE char into the buffer.
!             st = (short) ((mChars[0] == '<') ? 1 : -1);
!         } else {
!             st = 0;
!         }
          while (st >= 0 && mChIdx < mChars.length) {
              ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
              mChars[mChIdx++] = ch;
              switch (st) {
                  case 0:     // read '<' of xml declaration
                      switch (ch) {
                          case '<':
                              st = 1;
--- 3026,3046 ----
              throws Exception {
          String str = null;
          String enc = "UTF-8";
          char ch;
          int val;
!         short st = 0;
!         int byteRead =  mChIdx; //number of bytes read prior to entering this method
! 
          while (st >= 0 && mChIdx < mChars.length) {
+             if (st < byteRead) {
+                 ch = mChars[st];
+             } else {
                  ch = ((val = reader.read()) >= 0) ? (char) val : EOS;            
                  mChars[mChIdx++] = ch;
+             }
+ 
              switch (st) {
                  case 0:     // read '<' of xml declaration
                      switch (ch) {
                          case '<':
                              st = 1;