1 /*
   2  * Copyright (c) 2009, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.zip;
  27 
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.charset.Charset;
  31 import java.nio.charset.StandardCharsets;
  32 import java.nio.charset.CharsetDecoder;
  33 import java.nio.charset.CharsetEncoder;
  34 import java.nio.charset.CoderResult;
  35 import java.nio.charset.CodingErrorAction;
  36 import java.util.Arrays;
  37 import sun.nio.cs.ArrayDecoder;
  38 import sun.nio.cs.ArrayEncoder;
  39 
  40 /**
  41  * Utility class for zipfile name and comment decoding and encoding
  42  */
  43 
  44 final class ZipCoder {
  45 
  46     private static boolean isASCII(byte[] ba, int off, int len) {
  47         for (int i = off; i < off + len; i++) {
  48             if (ba[i] < 0)
  49                 return false;
  50         }
  51         return true;
  52     }
  53 
  54     private static boolean hasReplaceChar(byte[] ba) {
  55         for (int i = 0; i < ba.length; i++) {
  56             if (ba[i] == (byte)'?')
  57                 return true;
  58         }
  59         return false;
  60     }
  61 
  62     String toString(byte[] ba, int off, int length) {
  63 
  64         // fastpath for UTF-8 cs and ascii only name, leverage the
  65         // compact string impl to avoid the unnecessary char[] copy/
  66         // paste. A temporary workaround before we have better approach,
  67         // such as a String constructor that throws exception for
  68         // malformed and/or unmappable characters, instead of silently
  69         // replacing with repl char
  70         if (isUTF8 && isASCII(ba, off, length)) {
  71             return new String(ba, off, length, cs);
  72         }
  73 
  74         CharsetDecoder cd = decoder().reset();
  75         int len = (int)(length * cd.maxCharsPerByte());
  76         char[] ca = new char[len];
  77         if (len == 0)
  78             return new String(ca);
  79         // UTF-8 only for now. Other ArrayDeocder only handles
  80         // CodingErrorAction.REPLACE mode. ZipCoder uses
  81         // REPORT mode.
  82         if (isUTF8 && cd instanceof ArrayDecoder) {
  83             int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca);
  84             if (clen == -1)    // malformed
  85                 throw new IllegalArgumentException("MALFORMED");
  86             return new String(ca, 0, clen);
  87         }
  88         ByteBuffer bb = ByteBuffer.wrap(ba, off, length);
  89         CharBuffer cb = CharBuffer.wrap(ca);
  90         CoderResult cr = cd.decode(bb, cb, true);
  91         if (!cr.isUnderflow())
  92             throw new IllegalArgumentException(cr.toString());
  93         cr = cd.flush(cb);
  94         if (!cr.isUnderflow())
  95             throw new IllegalArgumentException(cr.toString());
  96         return new String(ca, 0, cb.position());
  97     }
  98 
  99     String toString(byte[] ba, int length) {
 100         return toString(ba, 0, length);
 101     }
 102 
 103     String toString(byte[] ba) {
 104         return toString(ba, 0, ba.length);
 105     }
 106 
 107     byte[] getBytes(String s) {
 108         if (isUTF8) {
 109             // fastpath for UTF8. should only occur when the string
 110             // has malformed surrogates. A postscan should still be
 111             // faster and use less memory.
 112             byte[] ba = s.getBytes(cs);
 113             if (!hasReplaceChar(ba)) {
 114                 return ba;
 115             }
 116         }
 117         CharsetEncoder ce = encoder().reset();
 118         char[] ca = s.toCharArray();
 119         int len = (int)(ca.length * ce.maxBytesPerChar());
 120         byte[] ba = new byte[len];
 121         if (len == 0)
 122             return ba;
 123         // UTF-8 only for now. Other ArrayDeocder only handles
 124         // CodingErrorAction.REPLACE mode.
 125         if (isUTF8 && ce instanceof ArrayEncoder) {
 126             int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
 127             if (blen == -1)    // malformed
 128                 throw new IllegalArgumentException("MALFORMED");
 129             return Arrays.copyOf(ba, blen);
 130         }
 131         ByteBuffer bb = ByteBuffer.wrap(ba);
 132         CharBuffer cb = CharBuffer.wrap(ca);
 133         CoderResult cr = ce.encode(cb, bb, true);
 134         if (!cr.isUnderflow())
 135             throw new IllegalArgumentException(cr.toString());
 136         cr = ce.flush(bb);
 137         if (!cr.isUnderflow())
 138             throw new IllegalArgumentException(cr.toString());
 139         if (bb.position() == ba.length)  // defensive copy?
 140             return ba;
 141         else
 142             return Arrays.copyOf(ba, bb.position());
 143     }
 144 
 145     // assume invoked only if "this" is not utf8
 146     byte[] getBytesUTF8(String s) {
 147         if (isUTF8)
 148             return getBytes(s);
 149         if (utf8 == null)
 150             utf8 = new ZipCoder(StandardCharsets.UTF_8);
 151         return utf8.getBytes(s);
 152     }
 153 
 154     String toStringUTF8(byte[] ba, int len) {
 155         return toStringUTF8(ba, 0, len);
 156     }
 157 
 158     String toStringUTF8(byte[] ba, int off, int len) {
 159         if (isUTF8)
 160             return toString(ba, off, len);
 161         if (utf8 == null)
 162             utf8 = new ZipCoder(StandardCharsets.UTF_8);
 163         return utf8.toString(ba, off, len);
 164     }
 165 
 166     boolean isUTF8() {
 167         return isUTF8;
 168     }
 169 
 170     private Charset cs;
 171     private CharsetDecoder dec;
 172     private CharsetEncoder enc;
 173     private boolean isUTF8;
 174     private ZipCoder utf8;
 175 
 176     private ZipCoder(Charset cs) {
 177         this.cs = cs;
 178         this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name());
 179     }
 180 
 181     static ZipCoder get(Charset charset) {
 182         return new ZipCoder(charset);
 183     }
 184 
 185     private CharsetDecoder decoder() {
 186         if (dec == null) {
 187             dec = cs.newDecoder()
 188               .onMalformedInput(CodingErrorAction.REPORT)
 189               .onUnmappableCharacter(CodingErrorAction.REPORT);
 190         }
 191         return dec;
 192     }
 193 
 194     private CharsetEncoder encoder() {
 195         if (enc == null) {
 196             enc = cs.newEncoder()
 197               .onMalformedInput(CodingErrorAction.REPORT)
 198               .onUnmappableCharacter(CodingErrorAction.REPORT);
 199         }
 200         return enc;
 201     }
 202 }