1 /*
   2  * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.net;
  27 
  28 import java.io.*;
  29 import java.nio.charset.Charset;
  30 import java.nio.charset.IllegalCharsetNameException;
  31 import java.nio.charset.UnsupportedCharsetException;
  32 import java.util.Objects;
  33 
  34 /**
  35  * Utility class for HTML form decoding. This class contains static methods
  36  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
  37  * MIME format.
  38  * <p>
  39  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
  40  * that all characters in the encoded string are one of the following:
  41  * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
  42  * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
  43  * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
  44  * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
  45  * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
  46  * character &quot;{@code %}&quot; is allowed but is interpreted
  47  * as the start of a special escaped sequence.
  48  * <p>
  49  * The following rules are applied in the conversion:
  50  *
  51  * <ul>
  52  * <li>The alphanumeric characters &quot;{@code a}&quot; through
  53  *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
  54  *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
  55  *     through &quot;{@code 9}&quot; remain the same.
  56  * <li>The special characters &quot;{@code .}&quot;,
  57  *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
  58  *     &quot;{@code _}&quot; remain the same.
  59  * <li>The plus sign &quot;{@code +}&quot; is converted into a
  60  *     space character &quot; &nbsp; &quot; .
  61  * <li>A sequence of the form "<i>{@code %xy}</i>" will be
  62  *     treated as representing a byte where <i>xy</i> is the two-digit
  63  *     hexadecimal representation of the 8 bits. Then, all substrings
  64  *     that contain one or more of these byte sequences consecutively
  65  *     will be replaced by the character(s) whose encoding would result
  66  *     in those consecutive bytes.
  67  *     The encoding scheme used to decode these characters may be specified,
  68  *     or if unspecified, the default encoding of the platform will be used.
  69  * </ul>
  70  * <p>
  71  * There are two possible ways in which this decoder could deal with
  72  * illegal strings.  It could either leave illegal characters alone or
  73  * it could throw an {@link java.lang.IllegalArgumentException}.
  74  * Which approach the decoder takes is left to the
  75  * implementation.
  76  *
  77  * @author  Mark Chamness
  78  * @author  Michael McCloskey
  79  * @since   1.2
  80  */
  81 
  82 public class URLDecoder {
  83 
  84     // The platform default encoding
  85     static String dfltEncName = URLEncoder.dfltEncName;
  86 
  87     /**
  88      * Decodes a {@code x-www-form-urlencoded} string.
  89      * The platform's default encoding is used to determine what characters
  90      * are represented by any consecutive sequences of the form
  91      * "<i>{@code %xy}</i>".
  92      * @param s the {@code String} to decode
  93      * @deprecated The resulting string may vary depending on the platform's
  94      *          default encoding. Instead, use the decode(String,String) method
  95      *          to specify the encoding.
  96      * @return the newly decoded {@code String}
  97      */
  98     @Deprecated
  99     public static String decode(String s) {
 100 
 101         String str = null;
 102 
 103         try {
 104             str = decode(s, dfltEncName);
 105         } catch (UnsupportedEncodingException e) {
 106             // The system should always have the platform default
 107         }
 108 
 109         return str;
 110     }
 111 
 112     /**
 113      * Decodes an {@code application/x-www-form-urlencoded} string using
 114      * a specific encoding scheme.
 115      *
 116      * <p>
 117      * This method behaves the same as {@linkplain decode(String s, Charset charset)}
 118      * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
 119      * using the given encoding name.
 120      *
 121      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
 122      * when illegal strings are encountered.
 123      *
 124      * @param s the {@code String} to decode
 125      * @param enc   The name of a supported
 126      *    <a href="../lang/package-summary.html#charenc">character
 127      *    encoding</a>.
 128      * @return the newly decoded {@code String}
 129      * @throws UnsupportedEncodingException
 130      *             If character encoding needs to be consulted, but
 131      *             named character encoding is not supported
 132      * @see URLEncoder#encode(java.lang.String, java.lang.String)
 133      * @since 1.4
 134      */
 135     public static String decode(String s, String enc) throws UnsupportedEncodingException {
 136         if (enc.length() == 0) {
 137             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
 138         }
 139 
 140         try {
 141             Charset charset = Charset.forName(enc);
 142             return decode(s, charset);
 143         } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
 144             throw new UnsupportedEncodingException(enc);
 145         }
 146     }
 147 
 148     /**
 149      * Decodes an {@code application/x-www-form-urlencoded} string using
 150      * a specific {@linkplain java.nio.charset.Charset Charset}.
 151      * The supplied charset is used to determine
 152      * what characters are represented by any consecutive sequences of the
 153      * form "<i>{@code %xy}</i>".
 154      * <p>
 155      * <em><strong>Note:</strong> The <a href=
 156      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
 157      * World Wide Web Consortium Recommendation</a> states that
 158      * UTF-8 should be used. Not doing so may introduce
 159      * incompatibilities.</em>
 160      *
 161      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
 162      * when illegal strings are encountered.
 163      *
 164      * @param s the {@code String} to decode
 165      * @param charset the given charset
 166      * @return the newly decoded {@code String}
 167      * @throws NullPointerException if {@code s} or {@code charset} is {@code null}
 168      * @throws IllegalArgumentException if the implementation encounters illegal
 169      * characters
 170      * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
 171      * @since 10
 172      */
 173     public static String decode(String s, Charset charset) {
 174         Objects.requireNonNull(charset, "Charset");
 175         boolean needToChange = false;
 176         int numChars = s.length();
 177         StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
 178         int i = 0;
 179 
 180         char c;
 181         byte[] bytes = null;
 182         while (i < numChars) {
 183             c = s.charAt(i);
 184             switch (c) {
 185             case '+':
 186                 sb.append(' ');
 187                 i++;
 188                 needToChange = true;
 189                 break;
 190             case '%':
 191                 /*
 192                  * Starting with this instance of %, process all
 193                  * consecutive substrings of the form %xy. Each
 194                  * substring %xy will yield a byte. Convert all
 195                  * consecutive  bytes obtained this way to whatever
 196                  * character(s) they represent in the provided
 197                  * encoding.
 198                  */
 199 
 200                 try {
 201 
 202                     // (numChars-i)/3 is an upper bound for the number
 203                     // of remaining bytes
 204                     if (bytes == null)
 205                         bytes = new byte[(numChars-i)/3];
 206                     int pos = 0;
 207 
 208                     while ( ((i+2) < numChars) &&
 209                             (c=='%')) {
 210                         int v = Integer.parseInt(s, i + 1, i + 3, 16);
 211                         if (v < 0)
 212                             throw new IllegalArgumentException(
 213                                     "URLDecoder: Illegal hex characters in escape "
 214                                             + "(%) pattern - negative value");
 215                         bytes[pos++] = (byte) v;
 216                         i+= 3;
 217                         if (i < numChars)
 218                             c = s.charAt(i);
 219                     }
 220 
 221                     // A trailing, incomplete byte encoding such as
 222                     // "%x" will cause an exception to be thrown
 223 
 224                     if ((i < numChars) && (c=='%'))
 225                         throw new IllegalArgumentException(
 226                          "URLDecoder: Incomplete trailing escape (%) pattern");
 227 
 228                     sb.append(new String(bytes, 0, pos, charset));
 229                 } catch (NumberFormatException e) {
 230                     throw new IllegalArgumentException(
 231                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
 232                     + e.getMessage());
 233                 }
 234                 needToChange = true;
 235                 break;
 236             default:
 237                 sb.append(c);
 238                 i++;
 239                 break;
 240             }
 241         }
 242 
 243         return (needToChange? sb.toString() : s);
 244     }
 245 }