1 /* 2 * Copyright (c) 2009, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.zip; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.StandardCharsets; 32 import java.nio.charset.CharsetDecoder; 33 import java.nio.charset.CharsetEncoder; 34 import java.nio.charset.CoderResult; 35 import java.nio.charset.CodingErrorAction; 36 import java.util.Arrays; 37 import sun.nio.cs.ArrayDecoder; 38 import sun.nio.cs.ArrayEncoder; 39 40 /** 41 * Utility class for zipfile name and comment decoding and encoding 42 */ 43 44 final class ZipCoder { 45 46 private static boolean isASCII(byte[] ba, int off, int len) { 47 for (int i = off; i < off + len; i++) { 48 if (ba[i] < 0) 49 return false; 50 } 51 return true; 52 } 53 54 private static boolean hasReplaceChar(byte[] ba) { 55 for (int i = 0; i < ba.length; i++) { 56 if (ba[i] == (byte)'?') 57 return true; 58 } 59 return false; 60 } 61 62 String toString(byte[] ba, int off, int length) { 63 64 // fastpath for UTF-8 cs and ascii only name, leverage the 65 // compact string impl to avoid the unnecessary char[] copy/ 66 // paste. A temporary workaround before we have better approach, 67 // such as a String constructor that throws exception for 68 // malformed and/or unmappable characters, instead of silently 69 // replacing with repl char 70 if (isUTF8 && isASCII(ba, off, length)) { 71 return new String(ba, off, length, cs); 72 } 73 74 CharsetDecoder cd = decoder().reset(); 75 int len = (int)(length * cd.maxCharsPerByte()); 76 char[] ca = new char[len]; 77 if (len == 0) 78 return new String(ca); 79 // UTF-8 only for now. Other ArrayDeocder only handles 80 // CodingErrorAction.REPLACE mode. ZipCoder uses 81 // REPORT mode. 82 if (isUTF8 && cd instanceof ArrayDecoder) { 83 int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca); 84 if (clen == -1) // malformed 85 throw new IllegalArgumentException("MALFORMED"); 86 return new String(ca, 0, clen); 87 } 88 ByteBuffer bb = ByteBuffer.wrap(ba, off, length); 89 CharBuffer cb = CharBuffer.wrap(ca); 90 CoderResult cr = cd.decode(bb, cb, true); 91 if (!cr.isUnderflow()) 92 throw new IllegalArgumentException(cr.toString()); 93 cr = cd.flush(cb); 94 if (!cr.isUnderflow()) 95 throw new IllegalArgumentException(cr.toString()); 96 return new String(ca, 0, cb.position()); 97 } 98 99 String toString(byte[] ba, int length) { 100 return toString(ba, 0, length); 101 } 102 103 String toString(byte[] ba) { 104 return toString(ba, 0, ba.length); 105 } 106 107 byte[] getBytes(String s) { 108 if (isUTF8) { 109 // fastpath for UTF8. should only occur when the string 110 // has malformed surrogates. A postscan should still be 111 // faster and use less memory. 112 byte[] ba = s.getBytes(cs); 113 if (!hasReplaceChar(ba)) { 114 return ba; 115 } 116 } 117 CharsetEncoder ce = encoder().reset(); 118 char[] ca = s.toCharArray(); 119 int len = (int)(ca.length * ce.maxBytesPerChar()); 120 byte[] ba = new byte[len]; 121 if (len == 0) 122 return ba; 123 // UTF-8 only for now. Other ArrayDeocder only handles 124 // CodingErrorAction.REPLACE mode. 125 if (isUTF8 && ce instanceof ArrayEncoder) { 126 int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba); 127 if (blen == -1) // malformed 128 throw new IllegalArgumentException("MALFORMED"); 129 return Arrays.copyOf(ba, blen); 130 } 131 ByteBuffer bb = ByteBuffer.wrap(ba); 132 CharBuffer cb = CharBuffer.wrap(ca); 133 CoderResult cr = ce.encode(cb, bb, true); 134 if (!cr.isUnderflow()) 135 throw new IllegalArgumentException(cr.toString()); 136 cr = ce.flush(bb); 137 if (!cr.isUnderflow()) 138 throw new IllegalArgumentException(cr.toString()); 139 if (bb.position() == ba.length) // defensive copy? 140 return ba; 141 else 142 return Arrays.copyOf(ba, bb.position()); 143 } 144 145 // assume invoked only if "this" is not utf8 146 byte[] getBytesUTF8(String s) { 147 if (isUTF8) 148 return getBytes(s); 149 if (utf8 == null) 150 utf8 = new ZipCoder(StandardCharsets.UTF_8); 151 return utf8.getBytes(s); 152 } 153 154 String toStringUTF8(byte[] ba, int len) { 155 return toStringUTF8(ba, 0, len); 156 } 157 158 String toStringUTF8(byte[] ba, int off, int len) { 159 if (isUTF8) 160 return toString(ba, off, len); 161 if (utf8 == null) 162 utf8 = new ZipCoder(StandardCharsets.UTF_8); 163 return utf8.toString(ba, off, len); 164 } 165 166 boolean isUTF8() { 167 return isUTF8; 168 } 169 170 private Charset cs; 171 private CharsetDecoder dec; 172 private CharsetEncoder enc; 173 private boolean isUTF8; 174 private ZipCoder utf8; 175 176 private ZipCoder(Charset cs) { 177 this.cs = cs; 178 this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name()); 179 } 180 181 static ZipCoder get(Charset charset) { 182 return new ZipCoder(charset); 183 } 184 185 private CharsetDecoder decoder() { 186 if (dec == null) { 187 dec = cs.newDecoder() 188 .onMalformedInput(CodingErrorAction.REPORT) 189 .onUnmappableCharacter(CodingErrorAction.REPORT); 190 } 191 return dec; 192 } 193 194 private CharsetEncoder encoder() { 195 if (enc == null) { 196 enc = cs.newEncoder() 197 .onMalformedInput(CodingErrorAction.REPORT) 198 .onUnmappableCharacter(CodingErrorAction.REPORT); 199 } 200 return enc; 201 } 202 }