< prev index next >

src/java.base/share/classes/java/net/URLDecoder.java

Print this page


   1 /*
   2  * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.net;
  27 
  28 import java.io.*;




  29 
  30 /**
  31  * Utility class for HTML form decoding. This class contains static methods
  32  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
  33  * MIME format.
  34  * <p>
  35  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
  36  * that all characters in the encoded string are one of the following:
  37  * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
  38  * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
  39  * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
  40  * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
  41  * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
  42  * character &quot;{@code %}&quot; is allowed but is interpreted
  43  * as the start of a special escaped sequence.
  44  * <p>
  45  * The following rules are applied in the conversion:
  46  *
  47  * <ul>
  48  * <li>The alphanumeric characters &quot;{@code a}&quot; through


  91      *          to specify the encoding.
  92      * @return the newly decoded {@code String}
  93      */
  94     @Deprecated
  95     public static String decode(String s) {
  96 
  97         String str = null;
  98 
  99         try {
 100             str = decode(s, dfltEncName);
 101         } catch (UnsupportedEncodingException e) {
 102             // The system should always have the platform default
 103         }
 104 
 105         return str;
 106     }
 107 
 108     /**
 109      * Decodes an {@code application/x-www-form-urlencoded} string using
 110      * a specific encoding scheme.
 111      * The supplied encoding is used to determine
 112      * what characters are represented by any consecutive sequences of the
 113      * form "<i>{@code %xy}</i>".
 114      * <p>
 115      * <em><strong>Note:</strong> The <a href=
 116      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
 117      * World Wide Web Consortium Recommendation</a> states that
 118      * UTF-8 should be used. Not doing so may introduce
 119      * incompatibilities.</em>

 120      *
 121      * @param s the {@code String} to decode
 122      * @param enc   The name of a supported
 123      *    <a href="../lang/package-summary.html#charenc">character
 124      *    encoding</a>.
 125      * @return the newly decoded {@code String}
 126      * @exception  UnsupportedEncodingException
 127      *             If character encoding needs to be consulted, but
 128      *             named character encoding is not supported
 129      * @see URLEncoder#encode(java.lang.String, java.lang.String)
 130      * @since 1.4
 131      */
 132     public static String decode(String s, String enc)
 133         throws UnsupportedEncodingException{


 134 



































 135         boolean needToChange = false;
 136         int numChars = s.length();
 137         StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
 138         int i = 0;
 139 
 140         if (enc.length() == 0) {
 141             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
 142         }
 143 
 144         char c;
 145         byte[] bytes = null;
 146         while (i < numChars) {
 147             c = s.charAt(i);
 148             switch (c) {
 149             case '+':
 150                 sb.append(' ');
 151                 i++;
 152                 needToChange = true;
 153                 break;
 154             case '%':
 155                 /*
 156                  * Starting with this instance of %, process all
 157                  * consecutive substrings of the form %xy. Each
 158                  * substring %xy will yield a byte. Convert all
 159                  * consecutive  bytes obtained this way to whatever
 160                  * character(s) they represent in the provided
 161                  * encoding.
 162                  */
 163 
 164                 try {
 165 
 166                     // (numChars-i)/3 is an upper bound for the number
 167                     // of remaining bytes
 168                     if (bytes == null)
 169                         bytes = new byte[(numChars-i)/3];
 170                     int pos = 0;
 171 
 172                     while ( ((i+2) < numChars) &&
 173                             (c=='%')) {
 174                         int v = Integer.parseInt(s, i + 1, i + 3, 16);
 175                         if (v < 0)
 176                             throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");


 177                         bytes[pos++] = (byte) v;
 178                         i+= 3;
 179                         if (i < numChars)
 180                             c = s.charAt(i);
 181                     }
 182 
 183                     // A trailing, incomplete byte encoding such as
 184                     // "%x" will cause an exception to be thrown
 185 
 186                     if ((i < numChars) && (c=='%'))
 187                         throw new IllegalArgumentException(
 188                          "URLDecoder: Incomplete trailing escape (%) pattern");
 189 
 190                     sb.append(new String(bytes, 0, pos, enc));
 191                 } catch (NumberFormatException e) {
 192                     throw new IllegalArgumentException(
 193                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
 194                     + e.getMessage());
 195                 }
 196                 needToChange = true;
 197                 break;
 198             default:
 199                 sb.append(c);
 200                 i++;
 201                 break;
 202             }
 203         }
 204 
 205         return (needToChange? sb.toString() : s);
 206     }
 207 }
   1 /*
   2  * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.net;
  27 
  28 import java.io.*;
  29 import java.nio.charset.Charset;
  30 import java.nio.charset.IllegalCharsetNameException;
  31 import java.nio.charset.UnsupportedCharsetException;
  32 import java.util.Objects;
  33 
  34 /**
  35  * Utility class for HTML form decoding. This class contains static methods
  36  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
  37  * MIME format.
  38  * <p>
  39  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
  40  * that all characters in the encoded string are one of the following:
  41  * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
  42  * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
  43  * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
  44  * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
  45  * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
  46  * character &quot;{@code %}&quot; is allowed but is interpreted
  47  * as the start of a special escaped sequence.
  48  * <p>
  49  * The following rules are applied in the conversion:
  50  *
  51  * <ul>
  52  * <li>The alphanumeric characters &quot;{@code a}&quot; through


  95      *          to specify the encoding.
  96      * @return the newly decoded {@code String}
  97      */
  98     @Deprecated
  99     public static String decode(String s) {
 100 
 101         String str = null;
 102 
 103         try {
 104             str = decode(s, dfltEncName);
 105         } catch (UnsupportedEncodingException e) {
 106             // The system should always have the platform default
 107         }
 108 
 109         return str;
 110     }
 111 
 112     /**
 113      * Decodes an {@code application/x-www-form-urlencoded} string using
 114      * a specific encoding scheme.
 115      *


 116      * <p>
 117      * This method behaves the same as {@linkplain decode(String s, Charset charset)}
 118      * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
 119      * using the given encoding name.
 120      *
 121      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
 122      * when illegal strings are encountered.
 123      *
 124      * @param s the {@code String} to decode
 125      * @param enc   The name of a supported
 126      *    <a href="../lang/package-summary.html#charenc">character
 127      *    encoding</a>.
 128      * @return the newly decoded {@code String}
 129      * @throws UnsupportedEncodingException
 130      *             If character encoding needs to be consulted, but
 131      *             named character encoding is not supported
 132      * @see URLEncoder#encode(java.lang.String, java.lang.String)
 133      * @since 1.4
 134      */
 135     public static String decode(String s, String enc) throws UnsupportedEncodingException {
 136         if (enc.length() == 0) {
 137             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
 138         }
 139 
 140         try {
 141             Charset charset = Charset.forName(enc);
 142             return decode(s, charset);
 143         } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
 144             throw new UnsupportedEncodingException(enc);
 145         }
 146     }
 147 
 148     /**
 149      * Decodes an {@code application/x-www-form-urlencoded} string using
 150      * a specific {@linkplain java.nio.charset.Charset Charset}.
 151      * The supplied charset is used to determine
 152      * what characters are represented by any consecutive sequences of the
 153      * form "<i>{@code %xy}</i>".
 154      * <p>
 155      * <em><strong>Note:</strong> The <a href=
 156      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
 157      * World Wide Web Consortium Recommendation</a> states that
 158      * UTF-8 should be used. Not doing so may introduce
 159      * incompatibilities.</em>
 160      *
 161      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
 162      * when illegal strings are encountered.
 163      *
 164      * @param s the {@code String} to decode
 165      * @param charset the given charset
 166      * @return the newly decoded {@code String}
 167      * @throws NullPointerException if {@code s} or {@code charset} is {@code null}
 168      * @throws IllegalArgumentException if the implementation encounters illegal
 169      * characters
 170      * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
 171      * @since 10
 172      */
 173     public static String decode(String s, Charset charset) {
 174         Objects.requireNonNull(charset, "Charset");
 175         boolean needToChange = false;
 176         int numChars = s.length();
 177         StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
 178         int i = 0;
 179 




 180         char c;
 181         byte[] bytes = null;
 182         while (i < numChars) {
 183             c = s.charAt(i);
 184             switch (c) {
 185             case '+':
 186                 sb.append(' ');
 187                 i++;
 188                 needToChange = true;
 189                 break;
 190             case '%':
 191                 /*
 192                  * Starting with this instance of %, process all
 193                  * consecutive substrings of the form %xy. Each
 194                  * substring %xy will yield a byte. Convert all
 195                  * consecutive  bytes obtained this way to whatever
 196                  * character(s) they represent in the provided
 197                  * encoding.
 198                  */
 199 
 200                 try {
 201 
 202                     // (numChars-i)/3 is an upper bound for the number
 203                     // of remaining bytes
 204                     if (bytes == null)
 205                         bytes = new byte[(numChars-i)/3];
 206                     int pos = 0;
 207 
 208                     while ( ((i+2) < numChars) &&
 209                             (c=='%')) {
 210                         int v = Integer.parseInt(s, i + 1, i + 3, 16);
 211                         if (v < 0)
 212                             throw new IllegalArgumentException(
 213                                     "URLDecoder: Illegal hex characters in escape "
 214                                             + "(%) pattern - negative value");
 215                         bytes[pos++] = (byte) v;
 216                         i+= 3;
 217                         if (i < numChars)
 218                             c = s.charAt(i);
 219                     }
 220 
 221                     // A trailing, incomplete byte encoding such as
 222                     // "%x" will cause an exception to be thrown
 223 
 224                     if ((i < numChars) && (c=='%'))
 225                         throw new IllegalArgumentException(
 226                          "URLDecoder: Incomplete trailing escape (%) pattern");
 227 
 228                     sb.append(new String(bytes, 0, pos, charset));
 229                 } catch (NumberFormatException e) {
 230                     throw new IllegalArgumentException(
 231                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
 232                     + e.getMessage());
 233                 }
 234                 needToChange = true;
 235                 break;
 236             default:
 237                 sb.append(c);
 238                 i++;
 239                 break;
 240             }
 241         }
 242 
 243         return (needToChange? sb.toString() : s);
 244     }
 245 }
< prev index next >