New src/java.base/share/classes/java/net/URLDecoder.java

   1 /*
   2  * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.net;
  27 
  28 import java.io.*;
  29 import java.nio.charset.Charset;
  30 import java.nio.charset.IllegalCharsetNameException;
  31 import java.nio.charset.UnsupportedCharsetException;
  32 import java.util.Objects;
  33 
  34 /**
  35  * Utility class for HTML form decoding. This class contains static methods
  36  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
  37  * MIME format.
  38  * <p>
  39  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
  40  * that all characters in the encoded string are one of the following:
  41  * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
  42  * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
  43  * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
  44  * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
  45  * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
  46  * character &quot;{@code %}&quot; is allowed but is interpreted
  47  * as the start of a special escaped sequence.
  48  * <p>
  49  * The following rules are applied in the conversion:
  50  *
  51  * <ul>
  52  * <li>The alphanumeric characters &quot;{@code a}&quot; through
  53  *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
  54  *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
  55  *     through &quot;{@code 9}&quot; remain the same.
  56  * <li>The special characters &quot;{@code .}&quot;,
  57  *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
  58  *     &quot;{@code _}&quot; remain the same.
  59  * <li>The plus sign &quot;{@code +}&quot; is converted into a
  60  *     space character &quot; &nbsp; &quot; .
  61  * <li>A sequence of the form "<i>{@code %xy}</i>" will be
  62  *     treated as representing a byte where <i>xy</i> is the two-digit
  63  *     hexadecimal representation of the 8 bits. Then, all substrings
  64  *     that contain one or more of these byte sequences consecutively
  65  *     will be replaced by the character(s) whose encoding would result
  66  *     in those consecutive bytes.
  67  *     The encoding scheme used to decode these characters may be specified,
  68  *     or if unspecified, the default encoding of the platform will be used.
  69  * </ul>
  70  * <p>
  71  * There are two possible ways in which this decoder could deal with
  72  * illegal strings.  It could either leave illegal characters alone or
  73  * it could throw an {@link java.lang.IllegalArgumentException}.
  74  * Which approach the decoder takes is left to the
  75  * implementation.
  76  *
  77  * @author  Mark Chamness
  78  * @author  Michael McCloskey
  79  * @since   1.2
  80  */
  81 
  82 public class URLDecoder {
  83 
  84     /**
  85      * Do not call.
  86      */
  87     @Deprecated(since="16", forRemoval=true)
  88     public URLDecoder() {}
  89 
  90     // The platform default encoding
  91     static String dfltEncName = URLEncoder.dfltEncName;
  92 
  93     /**
  94      * Decodes a {@code x-www-form-urlencoded} string.
  95      * The platform's default encoding is used to determine what characters
  96      * are represented by any consecutive sequences of the form
  97      * "<i>{@code %xy}</i>".
  98      * @param s the {@code String} to decode
  99      * @deprecated The resulting string may vary depending on the platform's
 100      *          default encoding. Instead, use the decode(String,String) method
 101      *          to specify the encoding.
 102      * @return the newly decoded {@code String}
 103      */
 104     @Deprecated
 105     public static String decode(String s) {
 106 
 107         String str = null;
 108 
 109         try {
 110             str = decode(s, dfltEncName);
 111         } catch (UnsupportedEncodingException e) {
 112             // The system should always have the platform default
 113         }
 114 
 115         return str;
 116     }
 117 
 118     /**
 119      * Decodes an {@code application/x-www-form-urlencoded} string using
 120      * a specific encoding scheme.
 121      *
 122      * <p>
 123      * This method behaves the same as {@linkplain decode(String s, Charset charset)}
 124      * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
 125      * using the given encoding name.
 126      *
 127      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
 128      * when illegal strings are encountered.
 129      *
 130      * @param s the {@code String} to decode
 131      * @param enc   The name of a supported
 132      *    <a href="../lang/package-summary.html#charenc">character
 133      *    encoding</a>.
 134      * @return the newly decoded {@code String}
 135      * @throws UnsupportedEncodingException
 136      *             If character encoding needs to be consulted, but
 137      *             named character encoding is not supported
 138      * @see URLEncoder#encode(java.lang.String, java.lang.String)
 139      * @since 1.4
 140      */
 141     public static String decode(String s, String enc) throws UnsupportedEncodingException {
 142         if (enc.isEmpty()) {
 143             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
 144         }
 145 
 146         try {
 147             Charset charset = Charset.forName(enc);
 148             return decode(s, charset);
 149         } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
 150             throw new UnsupportedEncodingException(enc);
 151         }
 152     }
 153 
 154     /**
 155      * Decodes an {@code application/x-www-form-urlencoded} string using
 156      * a specific {@linkplain java.nio.charset.Charset Charset}.
 157      * The supplied charset is used to determine
 158      * what characters are represented by any consecutive sequences of the
 159      * form "<i>{@code %xy}</i>".
 160      * <p>
 161      * <em><strong>Note:</strong> The <a href=
 162      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
 163      * World Wide Web Consortium Recommendation</a> states that
 164      * UTF-8 should be used. Not doing so may introduce
 165      * incompatibilities.</em>
 166      *
 167      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
 168      * when illegal strings are encountered.
 169      *
 170      * @param s the {@code String} to decode
 171      * @param charset the given charset
 172      * @return the newly decoded {@code String}
 173      * @throws NullPointerException if {@code s} or {@code charset} is {@code null}
 174      * @throws IllegalArgumentException if the implementation encounters illegal
 175      * characters
 176      * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
 177      * @since 10
 178      */
 179     public static String decode(String s, Charset charset) {
 180         Objects.requireNonNull(charset, "Charset");
 181         boolean needToChange = false;
 182         int numChars = s.length();
 183         StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
 184         int i = 0;
 185 
 186         char c;
 187         byte[] bytes = null;
 188         while (i < numChars) {
 189             c = s.charAt(i);
 190             switch (c) {
 191             case '+':
 192                 sb.append(' ');
 193                 i++;
 194                 needToChange = true;
 195                 break;
 196             case '%':
 197                 /*
 198                  * Starting with this instance of %, process all
 199                  * consecutive substrings of the form %xy. Each
 200                  * substring %xy will yield a byte. Convert all
 201                  * consecutive  bytes obtained this way to whatever
 202                  * character(s) they represent in the provided
 203                  * encoding.
 204                  */
 205 
 206                 try {
 207 
 208                     // (numChars-i)/3 is an upper bound for the number
 209                     // of remaining bytes
 210                     if (bytes == null)
 211                         bytes = new byte[(numChars-i)/3];
 212                     int pos = 0;
 213 
 214                     while ( ((i+2) < numChars) &&
 215                             (c=='%')) {
 216                         int v = Integer.parseInt(s, i + 1, i + 3, 16);
 217                         if (v < 0)
 218                             throw new IllegalArgumentException(
 219                                     "URLDecoder: Illegal hex characters in escape "
 220                                             + "(%) pattern - negative value");
 221                         bytes[pos++] = (byte) v;
 222                         i+= 3;
 223                         if (i < numChars)
 224                             c = s.charAt(i);
 225                     }
 226 
 227                     // A trailing, incomplete byte encoding such as
 228                     // "%x" will cause an exception to be thrown
 229 
 230                     if ((i < numChars) && (c=='%'))
 231                         throw new IllegalArgumentException(
 232                          "URLDecoder: Incomplete trailing escape (%) pattern");
 233 
 234                     sb.append(new String(bytes, 0, pos, charset));
 235                 } catch (NumberFormatException e) {
 236                     throw new IllegalArgumentException(
 237                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
 238                     + e.getMessage());
 239                 }
 240                 needToChange = true;
 241                 break;
 242             default:
 243                 sb.append(c);
 244                 i++;
 245                 break;
 246             }
 247         }
 248 
 249         return (needToChange? sb.toString() : s);
 250     }
 251 }