--- old/src/java.base/share/classes/java/util/zip/ZipCoder.java 2017-12-13 06:58:24.635639921 -0800 +++ new/src/java.base/share/classes/java/util/zip/ZipCoder.java 2017-12-13 06:58:24.344613652 -0800 @@ -28,72 +28,60 @@ import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; -import java.nio.charset.CoderResult; +import java.nio.charset.CharacterCodingException; import java.nio.charset.CodingErrorAction; -import java.util.Arrays; -import sun.nio.cs.ArrayDecoder; -import sun.nio.cs.ArrayEncoder; + +import static java.nio.charset.StandardCharsets.UTF_8; /** * Utility class for zipfile name and comment decoding and encoding */ -final class ZipCoder { +class ZipCoder { + + private static final jdk.internal.misc.JavaLangAccess JLA = + jdk.internal.misc.SharedSecrets.getJavaLangAccess(); + + static final class UTF8 extends ZipCoder { - private static boolean isASCII(byte[] ba, int off, int len) { - for (int i = off; i < off + len; i++) { - if (ba[i] < 0) - return false; + UTF8(Charset utf8) { + super(utf8); } - return true; - } - private static boolean hasReplaceChar(byte[] ba) { - for (int i = 0; i < ba.length; i++) { - if (ba[i] == (byte)'?') - return true; + @Override + boolean isUTF8() { + return true; + } + + @Override + String toString(byte[] ba, int off, int length) { + return JLA.newStringUTF8NoRepl(ba, off, length); + } + + @Override + byte[] getBytes(String s) { + return JLA.getBytesUTF8NoRepl(s); } - return false; + } + + // UTF_8.ArrayEn/Decoder is stateless, so make it singleton. + private static ZipCoder utf8 = new UTF8(UTF_8); + + public static ZipCoder get(Charset charset) { + if (charset == UTF_8) + return utf8; + return new ZipCoder(charset); } String toString(byte[] ba, int off, int length) { + try { + return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString(); - // fastpath for UTF-8 cs and ascii only name, leverage the - // compact string impl to avoid the unnecessary char[] copy/ - // paste. A temporary workaround before we have better approach, - // such as a String constructor that throws exception for - // malformed and/or unmappable characters, instead of silently - // replacing with repl char - if (isUTF8 && isASCII(ba, off, length)) { - return new String(ba, off, length, cs); - } - - CharsetDecoder cd = decoder().reset(); - int len = (int)(length * cd.maxCharsPerByte()); - char[] ca = new char[len]; - if (len == 0) - return new String(ca); - // UTF-8 only for now. Other ArrayDeocder only handles - // CodingErrorAction.REPLACE mode. ZipCoder uses - // REPORT mode. - if (isUTF8 && cd instanceof ArrayDecoder) { - int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca); - if (clen == -1) // malformed - throw new IllegalArgumentException("MALFORMED"); - return new String(ca, 0, clen); - } - ByteBuffer bb = ByteBuffer.wrap(ba, off, length); - CharBuffer cb = CharBuffer.wrap(ca); - CoderResult cr = cd.decode(bb, cb, true); - if (!cr.isUnderflow()) - throw new IllegalArgumentException(cr.toString()); - cr = cd.flush(cb); - if (!cr.isUnderflow()) - throw new IllegalArgumentException(cr.toString()); - return new String(ca, 0, cb.position()); + } catch (CharacterCodingException x) { + throw new IllegalArgumentException(x); + } } String toString(byte[] ba, int length) { @@ -105,84 +93,47 @@ } byte[] getBytes(String s) { - if (isUTF8) { - // fastpath for UTF8. should only occur when the string - // has malformed surrogates. A postscan should still be - // faster and use less memory. - byte[] ba = s.getBytes(cs); - if (!hasReplaceChar(ba)) { - return ba; + try { + ByteBuffer bb = encoder().encode(CharBuffer.wrap(s)); + int pos = bb.position(); + int limit = bb.limit(); + if (bb.hasArray() && pos == 0 && limit == bb.capacity()) { + return bb.array(); } + byte[] bytes = new byte[bb.limit() - bb.position()]; + bb.get(bytes); + return bytes; + } catch (CharacterCodingException x) { + throw new IllegalArgumentException(x); } - CharsetEncoder ce = encoder().reset(); - char[] ca = s.toCharArray(); - int len = (int)(ca.length * ce.maxBytesPerChar()); - byte[] ba = new byte[len]; - if (len == 0) - return ba; - // UTF-8 only for now. Other ArrayDeocder only handles - // CodingErrorAction.REPLACE mode. - if (isUTF8 && ce instanceof ArrayEncoder) { - int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba); - if (blen == -1) // malformed - throw new IllegalArgumentException("MALFORMED"); - return Arrays.copyOf(ba, blen); - } - ByteBuffer bb = ByteBuffer.wrap(ba); - CharBuffer cb = CharBuffer.wrap(ca); - CoderResult cr = ce.encode(cb, bb, true); - if (!cr.isUnderflow()) - throw new IllegalArgumentException(cr.toString()); - cr = ce.flush(bb); - if (!cr.isUnderflow()) - throw new IllegalArgumentException(cr.toString()); - if (bb.position() == ba.length) // defensive copy? - return ba; - else - return Arrays.copyOf(ba, bb.position()); } // assume invoked only if "this" is not utf8 byte[] getBytesUTF8(String s) { - if (isUTF8) - return getBytes(s); - if (utf8 == null) - utf8 = new ZipCoder(StandardCharsets.UTF_8); return utf8.getBytes(s); } String toStringUTF8(byte[] ba, int len) { - return toStringUTF8(ba, 0, len); + return utf8.toString(ba, 0, len); } String toStringUTF8(byte[] ba, int off, int len) { - if (isUTF8) - return toString(ba, off, len); - if (utf8 == null) - utf8 = new ZipCoder(StandardCharsets.UTF_8); return utf8.toString(ba, off, len); } boolean isUTF8() { - return isUTF8; + return false; } private Charset cs; private CharsetDecoder dec; private CharsetEncoder enc; - private boolean isUTF8; - private ZipCoder utf8; private ZipCoder(Charset cs) { this.cs = cs; - this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name()); - } - - static ZipCoder get(Charset charset) { - return new ZipCoder(charset); } - private CharsetDecoder decoder() { + protected CharsetDecoder decoder() { if (dec == null) { dec = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) @@ -191,7 +142,7 @@ return dec; } - private CharsetEncoder encoder() { + protected CharsetEncoder encoder() { if (enc == null) { enc = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPORT)