src/java.base/share/classes/java/util/zip/ZipCoder.java

Print this page




  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.zip;
  27 
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.charset.Charset;
  31 import java.nio.charset.StandardCharsets;
  32 import java.nio.charset.CharsetDecoder;
  33 import java.nio.charset.CharsetEncoder;
  34 import java.nio.charset.CoderResult;
  35 import java.nio.charset.CodingErrorAction;
  36 import java.util.Arrays;
  37 import sun.nio.cs.ArrayDecoder;
  38 import sun.nio.cs.ArrayEncoder;
  39 
  40 /**
  41  * Utility class for zipfile name and comment decoding and encoding
  42  */
  43 
  44 final class ZipCoder {
  45 
  46     private static boolean isASCII(byte[] ba, int off, int len) {
  47         for (int i = off; i < off + len; i++) {
  48             if (ba[i] < 0)
  49                 return false;



  50         }



  51         return true;
  52     }
  53 
  54     private static boolean hasReplaceChar(byte[] ba) {
  55         for (int i = 0; i < ba.length; i++) {
  56             if (ba[i] == (byte)'?')
  57                 return true;
  58         }
  59         return false;













  60     }
  61 
  62     String toString(byte[] ba, int off, int length) {


  63 
  64         // fastpath for UTF-8 cs and ascii only name, leverage the
  65         // compact string impl to avoid the unnecessary char[] copy/
  66         // paste. A temporary workaround before we have better approach,
  67         // such as a String constructor that throws exception for
  68         // malformed and/or unmappable characters, instead of silently
  69         // replacing with repl char
  70         if (isUTF8 && isASCII(ba, off, length)) {
  71             return new String(ba, off, length, cs);
  72         }
  73 
  74         CharsetDecoder cd = decoder().reset();
  75         int len = (int)(length * cd.maxCharsPerByte());
  76         char[] ca = new char[len];
  77         if (len == 0)
  78             return new String(ca);
  79         // UTF-8 only for now. Other ArrayDeocder only handles
  80         // CodingErrorAction.REPLACE mode. ZipCoder uses
  81         // REPORT mode.
  82         if (isUTF8 && cd instanceof ArrayDecoder) {
  83             int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca);
  84             if (clen == -1)    // malformed
  85                 throw new IllegalArgumentException("MALFORMED");
  86             return new String(ca, 0, clen);
  87         }
  88         ByteBuffer bb = ByteBuffer.wrap(ba, off, length);
  89         CharBuffer cb = CharBuffer.wrap(ca);
  90         CoderResult cr = cd.decode(bb, cb, true);
  91         if (!cr.isUnderflow())
  92             throw new IllegalArgumentException(cr.toString());
  93         cr = cd.flush(cb);
  94         if (!cr.isUnderflow())
  95             throw new IllegalArgumentException(cr.toString());
  96         return new String(ca, 0, cb.position());
  97     }
  98 
  99     String toString(byte[] ba, int length) {
 100         return toString(ba, 0, length);
 101     }
 102 
 103     String toString(byte[] ba) {
 104         return toString(ba, 0, ba.length);
 105     }
 106 
 107     byte[] getBytes(String s) {
 108         if (isUTF8) {
 109             // fastpath for UTF8. should only occur when the string
 110             // has malformed surrogates. A postscan should still be
 111             // faster and use less memory.
 112             byte[] ba = s.getBytes(cs);
 113             if (!hasReplaceChar(ba)) {
 114                 return ba;
 115             }
 116         }
 117         CharsetEncoder ce = encoder().reset();
 118         char[] ca = s.toCharArray();
 119         int len = (int)(ca.length * ce.maxBytesPerChar());
 120         byte[] ba = new byte[len];
 121         if (len == 0)
 122             return ba;
 123         // UTF-8 only for now. Other ArrayDeocder only handles
 124         // CodingErrorAction.REPLACE mode.
 125         if (isUTF8 && ce instanceof ArrayEncoder) {
 126             int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
 127             if (blen == -1)    // malformed
 128                 throw new IllegalArgumentException("MALFORMED");
 129             return Arrays.copyOf(ba, blen);
 130         }
 131         ByteBuffer bb = ByteBuffer.wrap(ba);
 132         CharBuffer cb = CharBuffer.wrap(ca);
 133         CoderResult cr = ce.encode(cb, bb, true);
 134         if (!cr.isUnderflow())
 135             throw new IllegalArgumentException(cr.toString());
 136         cr = ce.flush(bb);
 137         if (!cr.isUnderflow())
 138             throw new IllegalArgumentException(cr.toString());
 139         if (bb.position() == ba.length)  // defensive copy?
 140             return ba;
 141         else
 142             return Arrays.copyOf(ba, bb.position());
 143     }
 144 
 145     // assume invoked only if "this" is not utf8
 146     byte[] getBytesUTF8(String s) {
 147         if (isUTF8)
 148             return getBytes(s);
 149         if (utf8 == null)
 150             utf8 = new ZipCoder(StandardCharsets.UTF_8);
 151         return utf8.getBytes(s);
 152     }
 153 
 154     String toStringUTF8(byte[] ba, int len) {
 155         return toStringUTF8(ba, 0, len);
 156     }
 157 
 158     String toStringUTF8(byte[] ba, int off, int len) {
 159         if (isUTF8)
 160             return toString(ba, off, len);
 161         if (utf8 == null)
 162             utf8 = new ZipCoder(StandardCharsets.UTF_8);
 163         return utf8.toString(ba, off, len);
 164     }
 165 
 166     boolean isUTF8() {
 167         return isUTF8;
 168     }
 169 
 170     private Charset cs;
 171     private CharsetDecoder dec;
 172     private CharsetEncoder enc;
 173     private boolean isUTF8;
 174     private ZipCoder utf8;
 175 
 176     private ZipCoder(Charset cs) {
 177         this.cs = cs;
 178         this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name());
 179     }
 180 
 181     static ZipCoder get(Charset charset) {
 182         return new ZipCoder(charset);
 183     }
 184 
 185     private CharsetDecoder decoder() {
 186         if (dec == null) {
 187             dec = cs.newDecoder()
 188               .onMalformedInput(CodingErrorAction.REPORT)
 189               .onUnmappableCharacter(CodingErrorAction.REPORT);
 190         }
 191         return dec;
 192     }
 193 
 194     private CharsetEncoder encoder() {
 195         if (enc == null) {
 196             enc = cs.newEncoder()
 197               .onMalformedInput(CodingErrorAction.REPORT)
 198               .onUnmappableCharacter(CodingErrorAction.REPORT);
 199         }
 200         return enc;
 201     }
 202 }


  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.zip;
  27 
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.charset.Charset;

  31 import java.nio.charset.CharsetDecoder;
  32 import java.nio.charset.CharsetEncoder;
  33 import java.nio.charset.CharacterCodingException;
  34 import java.nio.charset.CodingErrorAction;
  35 
  36 import static java.nio.charset.StandardCharsets.UTF_8;

  37 
  38 /**
  39  * Utility class for zipfile name and comment decoding and encoding
  40  */
  41 
  42 class ZipCoder {
  43 
  44     private static final jdk.internal.misc.JavaLangAccess JLA =
  45         jdk.internal.misc.SharedSecrets.getJavaLangAccess();
  46 
  47     static final class UTF8 extends ZipCoder {
  48 
  49         UTF8(Charset utf8) {
  50             super(utf8);
  51         }
  52 
  53         @Override
  54         boolean isUTF8() {
  55             return true;
  56         }
  57 
  58         @Override
  59         String toString(byte[] ba, int off, int length) {
  60             return JLA.newStringUTF8NoRepl(ba, off, length);

  61         }
  62 
  63         @Override
  64         byte[] getBytes(String s) {
  65             return JLA.getBytesUTF8NoRepl(s);
  66         }
  67     }
  68 
  69     // UTF_8.ArrayEn/Decoder is stateless, so make it singleton.
  70     private static ZipCoder utf8 = new UTF8(UTF_8);
  71 
  72     public static ZipCoder get(Charset charset) {
  73         if (charset == UTF_8)
  74             return utf8;
  75         return new ZipCoder(charset);
  76     }
  77 
  78     String toString(byte[] ba, int off, int length) {
  79         try {
  80               return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
  81 
  82         } catch (CharacterCodingException x) {
  83             throw new IllegalArgumentException(x);
  84         }






























  85     }
  86 
  87     String toString(byte[] ba, int length) {
  88         return toString(ba, 0, length);
  89     }
  90 
  91     String toString(byte[] ba) {
  92         return toString(ba, 0, ba.length);
  93     }
  94 
  95     byte[] getBytes(String s) {
  96         try {
  97             ByteBuffer bb = encoder().encode(CharBuffer.wrap(s));
  98             int pos = bb.position();
  99             int limit = bb.limit();
 100             if (bb.hasArray() && pos == 0 && limit == bb.capacity()) {
 101                 return bb.array();
 102             }
 103             byte[] bytes = new byte[bb.limit() - bb.position()];
 104             bb.get(bytes);
 105             return bytes;
 106         } catch (CharacterCodingException x) {
 107             throw new IllegalArgumentException(x);
 108         }






















 109     }
 110 
 111     // assume invoked only if "this" is not utf8
 112     byte[] getBytesUTF8(String s) {




 113         return utf8.getBytes(s);
 114     }
 115 
 116     String toStringUTF8(byte[] ba, int len) {
 117         return utf8.toString(ba, 0, len);
 118     }
 119 
 120     String toStringUTF8(byte[] ba, int off, int len) {




 121         return utf8.toString(ba, off, len);
 122     }
 123 
 124     boolean isUTF8() {
 125         return false;
 126     }
 127 
 128     private Charset cs;
 129     private CharsetDecoder dec;
 130     private CharsetEncoder enc;


 131 
 132     private ZipCoder(Charset cs) {
 133         this.cs = cs;





 134     }
 135 
 136     protected CharsetDecoder decoder() {
 137         if (dec == null) {
 138             dec = cs.newDecoder()
 139               .onMalformedInput(CodingErrorAction.REPORT)
 140               .onUnmappableCharacter(CodingErrorAction.REPORT);
 141         }
 142         return dec;
 143     }
 144 
 145     protected CharsetEncoder encoder() {
 146         if (enc == null) {
 147             enc = cs.newEncoder()
 148               .onMalformedInput(CodingErrorAction.REPORT)
 149               .onUnmappableCharacter(CodingErrorAction.REPORT);
 150         }
 151         return enc;
 152     }
 153 }