1 /*
2 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.net;
27
28 import java.io.*;
29
30 /**
31 * Utility class for HTML form decoding. This class contains static methods
32 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
33 * MIME format.
34 * <p>
35 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
36 * that all characters in the encoded string are one of the following:
37 * "{@code a}" through "{@code z}",
38 * "{@code A}" through "{@code Z}",
39 * "{@code 0}" through "{@code 9}", and
40 * "{@code -}", "{@code _}",
41 * "{@code .}", and "{@code *}". The
42 * character "{@code %}" is allowed but is interpreted
43 * as the start of a special escaped sequence.
44 * <p>
45 * The following rules are applied in the conversion:
46 *
47 * <ul>
48 * <li>The alphanumeric characters "{@code a}" through
91 * to specify the encoding.
92 * @return the newly decoded {@code String}
93 */
94 @Deprecated
95 public static String decode(String s) {
96
97 String str = null;
98
99 try {
100 str = decode(s, dfltEncName);
101 } catch (UnsupportedEncodingException e) {
102 // The system should always have the platform default
103 }
104
105 return str;
106 }
107
108 /**
109 * Decodes an {@code application/x-www-form-urlencoded} string using
110 * a specific encoding scheme.
111 * The supplied encoding is used to determine
112 * what characters are represented by any consecutive sequences of the
113 * form "<i>{@code %xy}</i>".
114 * <p>
115 * <em><strong>Note:</strong> The <a href=
116 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
117 * World Wide Web Consortium Recommendation</a> states that
118 * UTF-8 should be used. Not doing so may introduce
119 * incompatibilities.</em>
120 *
121 * @param s the {@code String} to decode
122 * @param enc The name of a supported
123 * <a href="../lang/package-summary.html#charenc">character
124 * encoding</a>.
125 * @return the newly decoded {@code String}
126 * @exception UnsupportedEncodingException
127 * If character encoding needs to be consulted, but
128 * named character encoding is not supported
129 * @see URLEncoder#encode(java.lang.String, java.lang.String)
130 * @since 1.4
131 */
132 public static String decode(String s, String enc)
133 throws UnsupportedEncodingException{
134
135 boolean needToChange = false;
136 int numChars = s.length();
137 StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
138 int i = 0;
139
140 if (enc.length() == 0) {
141 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
142 }
143
144 char c;
145 byte[] bytes = null;
146 while (i < numChars) {
147 c = s.charAt(i);
148 switch (c) {
149 case '+':
150 sb.append(' ');
151 i++;
152 needToChange = true;
153 break;
154 case '%':
155 /*
156 * Starting with this instance of %, process all
157 * consecutive substrings of the form %xy. Each
158 * substring %xy will yield a byte. Convert all
159 * consecutive bytes obtained this way to whatever
160 * character(s) they represent in the provided
161 * encoding.
162 */
163
164 try {
165
166 // (numChars-i)/3 is an upper bound for the number
167 // of remaining bytes
168 if (bytes == null)
169 bytes = new byte[(numChars-i)/3];
170 int pos = 0;
171
172 while ( ((i+2) < numChars) &&
173 (c=='%')) {
174 int v = Integer.parseInt(s, i + 1, i + 3, 16);
175 if (v < 0)
176 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
177 bytes[pos++] = (byte) v;
178 i+= 3;
179 if (i < numChars)
180 c = s.charAt(i);
181 }
182
183 // A trailing, incomplete byte encoding such as
184 // "%x" will cause an exception to be thrown
185
186 if ((i < numChars) && (c=='%'))
187 throw new IllegalArgumentException(
188 "URLDecoder: Incomplete trailing escape (%) pattern");
189
190 sb.append(new String(bytes, 0, pos, enc));
191 } catch (NumberFormatException e) {
192 throw new IllegalArgumentException(
193 "URLDecoder: Illegal hex characters in escape (%) pattern - "
194 + e.getMessage());
195 }
196 needToChange = true;
197 break;
198 default:
199 sb.append(c);
200 i++;
201 break;
202 }
203 }
204
205 return (needToChange? sb.toString() : s);
206 }
207 }
|
1 /*
2 * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.net;
27
28 import java.io.*;
29 import java.nio.charset.Charset;
30 import java.nio.charset.IllegalCharsetNameException;
31 import java.nio.charset.UnsupportedCharsetException;
32 import java.util.Objects;
33
34 /**
35 * Utility class for HTML form decoding. This class contains static methods
36 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
37 * MIME format.
38 * <p>
39 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
40 * that all characters in the encoded string are one of the following:
41 * "{@code a}" through "{@code z}",
42 * "{@code A}" through "{@code Z}",
43 * "{@code 0}" through "{@code 9}", and
44 * "{@code -}", "{@code _}",
45 * "{@code .}", and "{@code *}". The
46 * character "{@code %}" is allowed but is interpreted
47 * as the start of a special escaped sequence.
48 * <p>
49 * The following rules are applied in the conversion:
50 *
51 * <ul>
52 * <li>The alphanumeric characters "{@code a}" through
95 * to specify the encoding.
96 * @return the newly decoded {@code String}
97 */
98 @Deprecated
99 public static String decode(String s) {
100
101 String str = null;
102
103 try {
104 str = decode(s, dfltEncName);
105 } catch (UnsupportedEncodingException e) {
106 // The system should always have the platform default
107 }
108
109 return str;
110 }
111
112 /**
113 * Decodes an {@code application/x-www-form-urlencoded} string using
114 * a specific encoding scheme.
115 *
116 * <p>
117 * This method behaves the same as {@linkplain decode(String s, Charset charset)}
118 * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
119 * using the given encoding name.
120 *
121 * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
122 * when illegal strings are encountered.
123 *
124 * @param s the {@code String} to decode
125 * @param enc The name of a supported
126 * <a href="../lang/package-summary.html#charenc">character
127 * encoding</a>.
128 * @return the newly decoded {@code String}
129 * @throws UnsupportedEncodingException
130 * If character encoding needs to be consulted, but
131 * named character encoding is not supported
132 * @see URLEncoder#encode(java.lang.String, java.lang.String)
133 * @since 1.4
134 */
135 public static String decode(String s, String enc) throws UnsupportedEncodingException {
136 if (enc.length() == 0) {
137 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
138 }
139
140 try {
141 Charset charset = Charset.forName(enc);
142 return decode(s, charset);
143 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
144 throw new UnsupportedEncodingException(enc);
145 }
146 }
147
148 /**
149 * Decodes an {@code application/x-www-form-urlencoded} string using
150 * a specific {@linkplain java.nio.charset.Charset Charset}.
151 * The supplied charset is used to determine
152 * what characters are represented by any consecutive sequences of the
153 * form "<i>{@code %xy}</i>".
154 * <p>
155 * <em><strong>Note:</strong> The <a href=
156 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
157 * World Wide Web Consortium Recommendation</a> states that
158 * UTF-8 should be used. Not doing so may introduce
159 * incompatibilities.</em>
160 *
161 * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
162 * when illegal strings are encountered.
163 *
164 * @param s the {@code String} to decode
165 * @param charset the given charset
166 * @return the newly decoded {@code String}
167 * @throws NullPointerException if {@code s} or {@code charset} is {@code null}
168 * @throws IllegalArgumentException if the implementation encounters illegal
169 * characters
170 * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
171 * @since 10
172 */
173 public static String decode(String s, Charset charset) {
174 Objects.requireNonNull(charset, "Charset");
175 boolean needToChange = false;
176 int numChars = s.length();
177 StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
178 int i = 0;
179
180 char c;
181 byte[] bytes = null;
182 while (i < numChars) {
183 c = s.charAt(i);
184 switch (c) {
185 case '+':
186 sb.append(' ');
187 i++;
188 needToChange = true;
189 break;
190 case '%':
191 /*
192 * Starting with this instance of %, process all
193 * consecutive substrings of the form %xy. Each
194 * substring %xy will yield a byte. Convert all
195 * consecutive bytes obtained this way to whatever
196 * character(s) they represent in the provided
197 * encoding.
198 */
199
200 try {
201
202 // (numChars-i)/3 is an upper bound for the number
203 // of remaining bytes
204 if (bytes == null)
205 bytes = new byte[(numChars-i)/3];
206 int pos = 0;
207
208 while ( ((i+2) < numChars) &&
209 (c=='%')) {
210 int v = Integer.parseInt(s, i + 1, i + 3, 16);
211 if (v < 0)
212 throw new IllegalArgumentException(
213 "URLDecoder: Illegal hex characters in escape "
214 + "(%) pattern - negative value");
215 bytes[pos++] = (byte) v;
216 i+= 3;
217 if (i < numChars)
218 c = s.charAt(i);
219 }
220
221 // A trailing, incomplete byte encoding such as
222 // "%x" will cause an exception to be thrown
223
224 if ((i < numChars) && (c=='%'))
225 throw new IllegalArgumentException(
226 "URLDecoder: Incomplete trailing escape (%) pattern");
227
228 sb.append(new String(bytes, 0, pos, charset));
229 } catch (NumberFormatException e) {
230 throw new IllegalArgumentException(
231 "URLDecoder: Illegal hex characters in escape (%) pattern - "
232 + e.getMessage());
233 }
234 needToChange = true;
235 break;
236 default:
237 sb.append(c);
238 i++;
239 break;
240 }
241 }
242
243 return (needToChange? sb.toString() : s);
244 }
245 }
|