11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.zip; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.StandardCharsets; 32 import java.nio.charset.CharsetDecoder; 33 import java.nio.charset.CharsetEncoder; 34 import java.nio.charset.CoderResult; 35 import java.nio.charset.CodingErrorAction; 36 import java.util.Arrays; 37 import sun.nio.cs.ArrayDecoder; 38 import sun.nio.cs.ArrayEncoder; 39 40 /** 41 * Utility class for zipfile name and comment decoding and encoding 42 */ 43 44 final class ZipCoder { 45 46 private static boolean isASCII(byte[] ba, int off, int len) { 47 for (int i = off; i < off + len; i++) { 48 if (ba[i] < 0) 49 return false; 50 } 51 return true; 52 } 53 54 private static boolean hasReplaceChar(byte[] ba) { 55 for (int i = 0; i < ba.length; i++) { 56 if (ba[i] == (byte)'?') 57 return true; 58 } 59 return false; 60 } 61 62 String toString(byte[] ba, int off, int length) { 63 64 // fastpath for UTF-8 cs and ascii only name, leverage the 65 // compact string impl to avoid the unnecessary char[] copy/ 66 // paste. A temporary workaround before we have better approach, 67 // such as a String constructor that throws exception for 68 // malformed and/or unmappable characters, instead of silently 69 // replacing with repl char 70 if (isUTF8 && isASCII(ba, off, length)) { 71 return new String(ba, off, length, cs); 72 } 73 74 CharsetDecoder cd = decoder().reset(); 75 int len = (int)(length * cd.maxCharsPerByte()); 76 char[] ca = new char[len]; 77 if (len == 0) 78 return new String(ca); 79 // UTF-8 only for now. Other ArrayDeocder only handles 80 // CodingErrorAction.REPLACE mode. ZipCoder uses 81 // REPORT mode. 82 if (isUTF8 && cd instanceof ArrayDecoder) { 83 int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca); 84 if (clen == -1) // malformed 85 throw new IllegalArgumentException("MALFORMED"); 86 return new String(ca, 0, clen); 87 } 88 ByteBuffer bb = ByteBuffer.wrap(ba, off, length); 89 CharBuffer cb = CharBuffer.wrap(ca); 90 CoderResult cr = cd.decode(bb, cb, true); 91 if (!cr.isUnderflow()) 92 throw new IllegalArgumentException(cr.toString()); 93 cr = cd.flush(cb); 94 if (!cr.isUnderflow()) 95 throw new IllegalArgumentException(cr.toString()); 96 return new String(ca, 0, cb.position()); 97 } 98 99 String toString(byte[] ba, int length) { 100 return toString(ba, 0, length); 101 } 102 103 String toString(byte[] ba) { 104 return toString(ba, 0, ba.length); 105 } 106 107 byte[] getBytes(String s) { 108 if (isUTF8) { 109 // fastpath for UTF8. should only occur when the string 110 // has malformed surrogates. A postscan should still be 111 // faster and use less memory. 112 byte[] ba = s.getBytes(cs); 113 if (!hasReplaceChar(ba)) { 114 return ba; 115 } 116 } 117 CharsetEncoder ce = encoder().reset(); 118 char[] ca = s.toCharArray(); 119 int len = (int)(ca.length * ce.maxBytesPerChar()); 120 byte[] ba = new byte[len]; 121 if (len == 0) 122 return ba; 123 // UTF-8 only for now. Other ArrayDeocder only handles 124 // CodingErrorAction.REPLACE mode. 125 if (isUTF8 && ce instanceof ArrayEncoder) { 126 int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba); 127 if (blen == -1) // malformed 128 throw new IllegalArgumentException("MALFORMED"); 129 return Arrays.copyOf(ba, blen); 130 } 131 ByteBuffer bb = ByteBuffer.wrap(ba); 132 CharBuffer cb = CharBuffer.wrap(ca); 133 CoderResult cr = ce.encode(cb, bb, true); 134 if (!cr.isUnderflow()) 135 throw new IllegalArgumentException(cr.toString()); 136 cr = ce.flush(bb); 137 if (!cr.isUnderflow()) 138 throw new IllegalArgumentException(cr.toString()); 139 if (bb.position() == ba.length) // defensive copy? 140 return ba; 141 else 142 return Arrays.copyOf(ba, bb.position()); 143 } 144 145 // assume invoked only if "this" is not utf8 146 byte[] getBytesUTF8(String s) { 147 if (isUTF8) 148 return getBytes(s); 149 if (utf8 == null) 150 utf8 = new ZipCoder(StandardCharsets.UTF_8); 151 return utf8.getBytes(s); 152 } 153 154 String toStringUTF8(byte[] ba, int len) { 155 return toStringUTF8(ba, 0, len); 156 } 157 158 String toStringUTF8(byte[] ba, int off, int len) { 159 if (isUTF8) 160 return toString(ba, off, len); 161 if (utf8 == null) 162 utf8 = new ZipCoder(StandardCharsets.UTF_8); 163 return utf8.toString(ba, off, len); 164 } 165 166 boolean isUTF8() { 167 return isUTF8; 168 } 169 170 private Charset cs; 171 private CharsetDecoder dec; 172 private CharsetEncoder enc; 173 private boolean isUTF8; 174 private ZipCoder utf8; 175 176 private ZipCoder(Charset cs) { 177 this.cs = cs; 178 this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name()); 179 } 180 181 static ZipCoder get(Charset charset) { 182 return new ZipCoder(charset); 183 } 184 185 private CharsetDecoder decoder() { 186 if (dec == null) { 187 dec = cs.newDecoder() 188 .onMalformedInput(CodingErrorAction.REPORT) 189 .onUnmappableCharacter(CodingErrorAction.REPORT); 190 } 191 return dec; 192 } 193 194 private CharsetEncoder encoder() { 195 if (enc == null) { 196 enc = cs.newEncoder() 197 .onMalformedInput(CodingErrorAction.REPORT) 198 .onUnmappableCharacter(CodingErrorAction.REPORT); 199 } 200 return enc; 201 } 202 } | 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.zip; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.CharsetDecoder; 32 import java.nio.charset.CharsetEncoder; 33 import java.nio.charset.CharacterCodingException; 34 import java.nio.charset.CodingErrorAction; 35 36 import static java.nio.charset.StandardCharsets.UTF_8; 37 38 /** 39 * Utility class for zipfile name and comment decoding and encoding 40 */ 41 42 class ZipCoder { 43 44 private static final jdk.internal.misc.JavaLangAccess JLA = 45 jdk.internal.misc.SharedSecrets.getJavaLangAccess(); 46 47 static final class UTF8 extends ZipCoder { 48 49 UTF8(Charset utf8) { 50 super(utf8); 51 } 52 53 @Override 54 boolean isUTF8() { 55 return true; 56 } 57 58 @Override 59 String toString(byte[] ba, int off, int length) { 60 return JLA.newStringUTF8NoRepl(ba, off, length); 61 } 62 63 @Override 64 byte[] getBytes(String s) { 65 return JLA.getBytesUTF8NoRepl(s); 66 } 67 } 68 69 // UTF_8.ArrayEn/Decoder is stateless, so make it singleton. 70 private static ZipCoder utf8 = new UTF8(UTF_8); 71 72 public static ZipCoder get(Charset charset) { 73 if (charset == UTF_8) 74 return utf8; 75 return new ZipCoder(charset); 76 } 77 78 String toString(byte[] ba, int off, int length) { 79 try { 80 return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString(); 81 82 } catch (CharacterCodingException x) { 83 throw new IllegalArgumentException(x); 84 } 85 } 86 87 String toString(byte[] ba, int length) { 88 return toString(ba, 0, length); 89 } 90 91 String toString(byte[] ba) { 92 return toString(ba, 0, ba.length); 93 } 94 95 byte[] getBytes(String s) { 96 try { 97 ByteBuffer bb = encoder().encode(CharBuffer.wrap(s)); 98 int pos = bb.position(); 99 int limit = bb.limit(); 100 if (bb.hasArray() && pos == 0 && limit == bb.capacity()) { 101 return bb.array(); 102 } 103 byte[] bytes = new byte[bb.limit() - bb.position()]; 104 bb.get(bytes); 105 return bytes; 106 } catch (CharacterCodingException x) { 107 throw new IllegalArgumentException(x); 108 } 109 } 110 111 // assume invoked only if "this" is not utf8 112 byte[] getBytesUTF8(String s) { 113 return utf8.getBytes(s); 114 } 115 116 String toStringUTF8(byte[] ba, int len) { 117 return utf8.toString(ba, 0, len); 118 } 119 120 String toStringUTF8(byte[] ba, int off, int len) { 121 return utf8.toString(ba, off, len); 122 } 123 124 boolean isUTF8() { 125 return false; 126 } 127 128 private Charset cs; 129 private CharsetDecoder dec; 130 private CharsetEncoder enc; 131 132 private ZipCoder(Charset cs) { 133 this.cs = cs; 134 } 135 136 protected CharsetDecoder decoder() { 137 if (dec == null) { 138 dec = cs.newDecoder() 139 .onMalformedInput(CodingErrorAction.REPORT) 140 .onUnmappableCharacter(CodingErrorAction.REPORT); 141 } 142 return dec; 143 } 144 145 protected CharsetEncoder encoder() { 146 if (enc == null) { 147 enc = cs.newEncoder() 148 .onMalformedInput(CodingErrorAction.REPORT) 149 .onUnmappableCharacter(CodingErrorAction.REPORT); 150 } 151 return enc; 152 } 153 } |