1 /* 2 * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.net; 27 28 import java.io.*; 29 import java.nio.charset.Charset; 30 import java.nio.charset.IllegalCharsetNameException; 31 import java.nio.charset.UnsupportedCharsetException; 32 import java.util.Objects; 33 34 /** 35 * Utility class for HTML form decoding. This class contains static methods 36 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE> 37 * MIME format. 38 * <p> 39 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed 40 * that all characters in the encoded string are one of the following: 41 * "{@code a}" through "{@code z}", 42 * "{@code A}" through "{@code Z}", 43 * "{@code 0}" through "{@code 9}", and 44 * "{@code -}", "{@code _}", 45 * "{@code .}", and "{@code *}". The 46 * character "{@code %}" is allowed but is interpreted 47 * as the start of a special escaped sequence. 48 * <p> 49 * The following rules are applied in the conversion: 50 * 51 * <ul> 52 * <li>The alphanumeric characters "{@code a}" through 53 * "{@code z}", "{@code A}" through 54 * "{@code Z}" and "{@code 0}" 55 * through "{@code 9}" remain the same. 56 * <li>The special characters "{@code .}", 57 * "{@code -}", "{@code *}", and 58 * "{@code _}" remain the same. 59 * <li>The plus sign "{@code +}" is converted into a 60 * space character " " . 61 * <li>A sequence of the form "<i>{@code %xy}</i>" will be 62 * treated as representing a byte where <i>xy</i> is the two-digit 63 * hexadecimal representation of the 8 bits. Then, all substrings 64 * that contain one or more of these byte sequences consecutively 65 * will be replaced by the character(s) whose encoding would result 66 * in those consecutive bytes. 67 * The encoding scheme used to decode these characters may be specified, 68 * or if unspecified, the default encoding of the platform will be used. 69 * </ul> 70 * <p> 71 * There are two possible ways in which this decoder could deal with 72 * illegal strings. It could either leave illegal characters alone or 73 * it could throw an {@link java.lang.IllegalArgumentException}. 74 * Which approach the decoder takes is left to the 75 * implementation. 76 * 77 * @author Mark Chamness 78 * @author Michael McCloskey 79 * @since 1.2 80 */ 81 82 public class URLDecoder { 83 84 /** 85 * Do not call. 86 */ 87 @Deprecated(since="16", forRemoval=true) 88 public URLDecoder() {} 89 90 // The platform default encoding 91 static String dfltEncName = URLEncoder.dfltEncName; 92 93 /** 94 * Decodes a {@code x-www-form-urlencoded} string. 95 * The platform's default encoding is used to determine what characters 96 * are represented by any consecutive sequences of the form 97 * "<i>{@code %xy}</i>". 98 * @param s the {@code String} to decode 99 * @deprecated The resulting string may vary depending on the platform's 100 * default encoding. Instead, use the decode(String,String) method 101 * to specify the encoding. 102 * @return the newly decoded {@code String} 103 */ 104 @Deprecated 105 public static String decode(String s) { 106 107 String str = null; 108 109 try { 110 str = decode(s, dfltEncName); 111 } catch (UnsupportedEncodingException e) { 112 // The system should always have the platform default 113 } 114 115 return str; 116 } 117 118 /** 119 * Decodes an {@code application/x-www-form-urlencoded} string using 120 * a specific encoding scheme. 121 * 122 * <p> 123 * This method behaves the same as {@linkplain decode(String s, Charset charset)} 124 * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset} 125 * using the given encoding name. 126 * 127 * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException} 128 * when illegal strings are encountered. 129 * 130 * @param s the {@code String} to decode 131 * @param enc The name of a supported 132 * <a href="../lang/package-summary.html#charenc">character 133 * encoding</a>. 134 * @return the newly decoded {@code String} 135 * @throws UnsupportedEncodingException 136 * If character encoding needs to be consulted, but 137 * named character encoding is not supported 138 * @see URLEncoder#encode(java.lang.String, java.lang.String) 139 * @since 1.4 140 */ 141 public static String decode(String s, String enc) throws UnsupportedEncodingException { 142 if (enc.isEmpty()) { 143 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter"); 144 } 145 146 try { 147 Charset charset = Charset.forName(enc); 148 return decode(s, charset); 149 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 150 throw new UnsupportedEncodingException(enc); 151 } 152 } 153 154 /** 155 * Decodes an {@code application/x-www-form-urlencoded} string using 156 * a specific {@linkplain java.nio.charset.Charset Charset}. 157 * The supplied charset is used to determine 158 * what characters are represented by any consecutive sequences of the 159 * form "<i>{@code %xy}</i>". 160 * <p> 161 * <em><strong>Note:</strong> The <a href= 162 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> 163 * World Wide Web Consortium Recommendation</a> states that 164 * UTF-8 should be used. Not doing so may introduce 165 * incompatibilities.</em> 166 * 167 * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException} 168 * when illegal strings are encountered. 169 * 170 * @param s the {@code String} to decode 171 * @param charset the given charset 172 * @return the newly decoded {@code String} 173 * @throws NullPointerException if {@code s} or {@code charset} is {@code null} 174 * @throws IllegalArgumentException if the implementation encounters illegal 175 * characters 176 * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset) 177 * @since 10 178 */ 179 public static String decode(String s, Charset charset) { 180 Objects.requireNonNull(charset, "Charset"); 181 boolean needToChange = false; 182 int numChars = s.length(); 183 StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars); 184 int i = 0; 185 186 char c; 187 byte[] bytes = null; 188 while (i < numChars) { 189 c = s.charAt(i); 190 switch (c) { 191 case '+': 192 sb.append(' '); 193 i++; 194 needToChange = true; 195 break; 196 case '%': 197 /* 198 * Starting with this instance of %, process all 199 * consecutive substrings of the form %xy. Each 200 * substring %xy will yield a byte. Convert all 201 * consecutive bytes obtained this way to whatever 202 * character(s) they represent in the provided 203 * encoding. 204 */ 205 206 try { 207 208 // (numChars-i)/3 is an upper bound for the number 209 // of remaining bytes 210 if (bytes == null) 211 bytes = new byte[(numChars-i)/3]; 212 int pos = 0; 213 214 while ( ((i+2) < numChars) && 215 (c=='%')) { 216 int v = Integer.parseInt(s, i + 1, i + 3, 16); 217 if (v < 0) 218 throw new IllegalArgumentException( 219 "URLDecoder: Illegal hex characters in escape " 220 + "(%) pattern - negative value"); 221 bytes[pos++] = (byte) v; 222 i+= 3; 223 if (i < numChars) 224 c = s.charAt(i); 225 } 226 227 // A trailing, incomplete byte encoding such as 228 // "%x" will cause an exception to be thrown 229 230 if ((i < numChars) && (c=='%')) 231 throw new IllegalArgumentException( 232 "URLDecoder: Incomplete trailing escape (%) pattern"); 233 234 sb.append(new String(bytes, 0, pos, charset)); 235 } catch (NumberFormatException e) { 236 throw new IllegalArgumentException( 237 "URLDecoder: Illegal hex characters in escape (%) pattern - " 238 + e.getMessage()); 239 } 240 needToChange = true; 241 break; 242 default: 243 sb.append(c); 244 i++; 245 break; 246 } 247 } 248 249 return (needToChange? sb.toString() : s); 250 } 251 }