6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /*
25 * @test
26 * @bug 4486841
27 * @summary Test UTF-8 charset
28 */
29
30 import java.nio.charset.*;
31 import java.nio.*;
32 import java.util.*;
33
34 public class TestUTF8 {
35 static char[] decode(byte[] bb, String csn, boolean testDirect)
36 throws Exception {
37 CharsetDecoder dec = Charset.forName(csn).newDecoder();
38 ByteBuffer bbf;
39 CharBuffer cbf;
40 if (testDirect) {
41 bbf = ByteBuffer.allocateDirect(bb.length);
42 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
43 bbf.put(bb).flip();
44 } else {
45 bbf = ByteBuffer.wrap(bb);
46 cbf = CharBuffer.allocate(bb.length);
53 return cc;
54
55 }
56
57 static CoderResult decodeCR(byte[] bb, String csn, boolean testDirect)
58 throws Exception {
59 CharsetDecoder dec = Charset.forName(csn).newDecoder();
60 ByteBuffer bbf;
61 CharBuffer cbf;
62 if (testDirect) {
63 bbf = ByteBuffer.allocateDirect(bb.length);
64 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
65 bbf.put(bb).flip();
66 } else {
67 bbf = ByteBuffer.wrap(bb);
68 cbf = CharBuffer.allocate(bb.length);
69 }
70 return dec.decode(bbf, cbf, true);
71 }
72
73 static byte[] encode(char[] cc, String csn, boolean testDirect)
74 throws Exception {
75 ByteBuffer bbf;
76 CharBuffer cbf;
77 CharsetEncoder enc = Charset.forName(csn).newEncoder();
78 if (testDirect) {
79 bbf = ByteBuffer.allocateDirect(cc.length * 4);
80 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
81 cbf.put(cc).flip();
82 } else {
83 bbf = ByteBuffer.allocate(cc.length * 4);
84 cbf = CharBuffer.wrap(cc);
85 }
86
87 CoderResult cr = enc.encode(cbf, bbf, true);
88 if (cr != CoderResult.UNDERFLOW)
89 throw new RuntimeException("Encoding err: " + csn);
90 byte[] bb = new byte[bbf.position()];
91 bbf.flip(); bbf.get(bb);
92 return bb;
125
126 static int to3ByteUTF8(char c, byte[] bb, int pos) {
127 bb[pos++] = (byte)(0xe0 | ((c >> 12)));
128 bb[pos++] = (byte)(0x80 | ((c >> 06) & 0x3f));
129 bb[pos++] = (byte)(0x80 | ((c >> 00) & 0x3f));
130 return 3;
131 }
132
133 static void checkRoundtrip(String csn) throws Exception {
134 System.out.printf(" Check roundtrip <%s>...", csn);
135 char[] cc = getUTFChars();
136 byte[] bb = encode(cc, csn, false);
137 char[] ccO = decode(bb, csn, false);
138
139 if (!Arrays.equals(cc, ccO)) {
140 System.out.printf(" non-direct failed");
141 }
142 bb = encode(cc, csn, true);
143 ccO = decode(bb, csn, true);
144 if (!Arrays.equals(cc, ccO)) {
145 System.out.printf(" (direct) failed");
146 }
147 System.out.println();
148 }
149
150 static void check6ByteSurrs(String csn) throws Exception {
151 System.out.printf(" Check 6-byte Surrogates <%s>...%n", csn);
152 byte[] bb = new byte[(0x110000 - 0x10000) * 6];
153 char[] cc = new char[(0x110000 - 0x10000) * 2];
154 int bpos = 0;
155 int cpos = 0;
156 for (int i = 0x10000; i < 0x110000; i++) {
157 Character.toChars(i, cc, cpos);
158 bpos += to3ByteUTF8(cc[cpos], bb, bpos);
159 bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
160 cpos += 2;
161 }
162
163 char[] ccO = decode(bb, csn, false);
164 if (!Arrays.equals(cc, ccO)) {
165 System.out.printf(" decoding failed%n");
166 }
167 ccO = decode(bb, csn, true);
168 if (!Arrays.equals(cc, ccO)) {
169 System.out.printf(" decoding(direct) failed%n");
170 }
171 }
172
173 static void compare(String csn1, String csn2) throws Exception {
174 System.out.printf(" Diff <%s> <%s>...%n", csn1, csn2);
175 char[] cc = getUTFChars();
176
177 byte[] bb1 = encode(cc, csn1, false);
178 byte[] bb2 = encode(cc, csn2, false);
179 if (!Arrays.equals(bb1, bb2))
180 System.out.printf(" encoding failed%n");
181 char[] cc1 = decode(bb1, csn1, false);
182 char[] cc2 = decode(bb1, csn2, false);
183 if (!Arrays.equals(cc1, cc2)) {
184 System.out.printf(" decoding failed%n");
185 }
186
187 bb1 = encode(cc, csn1, true);
188 bb2 = encode(cc, csn2, true);
189 if (!Arrays.equals(bb1, bb2))
190 System.out.printf(" encoding (direct) failed%n");
257 {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
258 {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
259 {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
260 {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
261
262 // Six-byte sequences
263 {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
264 {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
265 {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
266 {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
267 {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
268 {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
269 {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
270 {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
271 {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
272 };
273
274 static void checkMalformed(String csn) throws Exception {
275 boolean failed = false;
276 System.out.printf(" Check malformed <%s>...%n", csn);
277 for (boolean direct: new boolean[] {false, true}) {
278 for (byte[] bins : malformed) {
279 int mlen = bins[0];
280 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
281 CoderResult cr = decodeCR(bin, csn, direct);
282 String ashex = "";
283 for (int i = 0; i < bin.length; i++) {
284 if (i > 0) ashex += " ";
285 ashex += Integer.toBinaryString((int)bin[i] & 0xff);
286 }
287 if (!cr.isMalformed()) {
288 System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
289 failed = true;
290 } else if (cr.length() != mlen) {
291 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
292 failed = true;
293 }
294 }
295 }
296 if (failed)
297 throw new RuntimeException("Check malformed failed " + csn);
298 }
299
300 static boolean check(CharsetDecoder dec, byte[] utf8s, boolean direct, int[] flow) {
301 int inPos = flow[0];
302 int inLen = flow[1];
303 int outPos = flow[2];
304 int outLen = flow[3];
305 int expedInPos = flow[4];
306 int expedOutPos = flow[5];
307 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
308 :CoderResult.OVERFLOW;
309 ByteBuffer bbf;
310 CharBuffer cbf;
311 if (direct) {
|
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /*
25 * @test
26 * @bug 4486841 7040220
27 * @summary Test UTF-8 charset
28 */
29
30 import java.nio.charset.*;
31 import java.nio.*;
32 import java.util.*;
33
34 public class TestUTF8 {
35 static char[] decode(byte[] bb, String csn, boolean testDirect)
36 throws Exception {
37 CharsetDecoder dec = Charset.forName(csn).newDecoder();
38 ByteBuffer bbf;
39 CharBuffer cbf;
40 if (testDirect) {
41 bbf = ByteBuffer.allocateDirect(bb.length);
42 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
43 bbf.put(bb).flip();
44 } else {
45 bbf = ByteBuffer.wrap(bb);
46 cbf = CharBuffer.allocate(bb.length);
53 return cc;
54
55 }
56
57 static CoderResult decodeCR(byte[] bb, String csn, boolean testDirect)
58 throws Exception {
59 CharsetDecoder dec = Charset.forName(csn).newDecoder();
60 ByteBuffer bbf;
61 CharBuffer cbf;
62 if (testDirect) {
63 bbf = ByteBuffer.allocateDirect(bb.length);
64 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
65 bbf.put(bb).flip();
66 } else {
67 bbf = ByteBuffer.wrap(bb);
68 cbf = CharBuffer.allocate(bb.length);
69 }
70 return dec.decode(bbf, cbf, true);
71 }
72
73 // copy/paste of the StringCoding.decode()
74 static char[] decode(Charset cs, byte[] ba, int off, int len) {
75 CharsetDecoder cd = cs.newDecoder();
76 int en = (int)(len * cd.maxCharsPerByte());
77 char[] ca = new char[en];
78 if (len == 0)
79 return ca;
80 cd.onMalformedInput(CodingErrorAction.REPLACE)
81 .onUnmappableCharacter(CodingErrorAction.REPLACE)
82 .reset();
83
84 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
85 CharBuffer cb = CharBuffer.wrap(ca);
86 try {
87 CoderResult cr = cd.decode(bb, cb, true);
88 if (!cr.isUnderflow())
89 cr.throwException();
90 cr = cd.flush(cb);
91 if (!cr.isUnderflow())
92 cr.throwException();
93 } catch (CharacterCodingException x) {
94 throw new Error(x);
95 }
96 return Arrays.copyOf(ca, cb.position());
97 }
98
99 static byte[] encode(char[] cc, String csn, boolean testDirect)
100 throws Exception {
101 ByteBuffer bbf;
102 CharBuffer cbf;
103 CharsetEncoder enc = Charset.forName(csn).newEncoder();
104 if (testDirect) {
105 bbf = ByteBuffer.allocateDirect(cc.length * 4);
106 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
107 cbf.put(cc).flip();
108 } else {
109 bbf = ByteBuffer.allocate(cc.length * 4);
110 cbf = CharBuffer.wrap(cc);
111 }
112
113 CoderResult cr = enc.encode(cbf, bbf, true);
114 if (cr != CoderResult.UNDERFLOW)
115 throw new RuntimeException("Encoding err: " + csn);
116 byte[] bb = new byte[bbf.position()];
117 bbf.flip(); bbf.get(bb);
118 return bb;
151
152 static int to3ByteUTF8(char c, byte[] bb, int pos) {
153 bb[pos++] = (byte)(0xe0 | ((c >> 12)));
154 bb[pos++] = (byte)(0x80 | ((c >> 06) & 0x3f));
155 bb[pos++] = (byte)(0x80 | ((c >> 00) & 0x3f));
156 return 3;
157 }
158
159 static void checkRoundtrip(String csn) throws Exception {
160 System.out.printf(" Check roundtrip <%s>...", csn);
161 char[] cc = getUTFChars();
162 byte[] bb = encode(cc, csn, false);
163 char[] ccO = decode(bb, csn, false);
164
165 if (!Arrays.equals(cc, ccO)) {
166 System.out.printf(" non-direct failed");
167 }
168 bb = encode(cc, csn, true);
169 ccO = decode(bb, csn, true);
170 if (!Arrays.equals(cc, ccO)) {
171 System.out.print(" (direct) failed");
172 }
173 // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
174 if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
175 System.out.printf(" String.getBytes() failed");
176 }
177 if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
178 System.out.printf(" String.toCharArray() failed");
179 }
180 System.out.println();
181 }
182
183 static void check6ByteSurrs(String csn) throws Exception {
184 System.out.printf(" Check 6-byte Surrogates <%s>...%n", csn);
185 byte[] bb = new byte[(0x110000 - 0x10000) * 6];
186 char[] cc = new char[(0x110000 - 0x10000) * 2];
187 int bpos = 0;
188 int cpos = 0;
189 for (int i = 0x10000; i < 0x110000; i++) {
190 Character.toChars(i, cc, cpos);
191 bpos += to3ByteUTF8(cc[cpos], bb, bpos);
192 bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
193 cpos += 2;
194 }
195
196 char[] ccO = decode(bb, csn, false);
197 if (!Arrays.equals(cc, ccO)) {
198 System.out.printf(" decoding failed%n");
199 }
200 ccO = decode(bb, csn, true);
201 if (!Arrays.equals(cc, ccO)) {
202 System.out.printf(" decoding(direct) failed%n");
203 }
204 // new String(bb, csn).getBytes(csn) will not return
205 // the 6 bytes surrogates as in bb, so only test
206 // toCharArray() here.
207 if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
208 System.out.printf(" String.toCharArray() failed");
209 }
210 }
211
212 static void compare(String csn1, String csn2) throws Exception {
213 System.out.printf(" Diff <%s> <%s>...%n", csn1, csn2);
214 char[] cc = getUTFChars();
215
216 byte[] bb1 = encode(cc, csn1, false);
217 byte[] bb2 = encode(cc, csn2, false);
218 if (!Arrays.equals(bb1, bb2))
219 System.out.printf(" encoding failed%n");
220 char[] cc1 = decode(bb1, csn1, false);
221 char[] cc2 = decode(bb1, csn2, false);
222 if (!Arrays.equals(cc1, cc2)) {
223 System.out.printf(" decoding failed%n");
224 }
225
226 bb1 = encode(cc, csn1, true);
227 bb2 = encode(cc, csn2, true);
228 if (!Arrays.equals(bb1, bb2))
229 System.out.printf(" encoding (direct) failed%n");
296 {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
297 {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
298 {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
299 {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
300
301 // Six-byte sequences
302 {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
303 {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
304 {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
305 {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
306 {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
307 {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
308 {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
309 {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
310 {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
311 };
312
313 static void checkMalformed(String csn) throws Exception {
314 boolean failed = false;
315 System.out.printf(" Check malformed <%s>...%n", csn);
316 Charset cs = Charset.forName(csn);
317 for (boolean direct: new boolean[] {false, true}) {
318 for (byte[] bins : malformed) {
319 int mlen = bins[0];
320 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
321 CoderResult cr = decodeCR(bin, csn, direct);
322 String ashex = "";
323 for (int i = 0; i < bin.length; i++) {
324 if (i > 0) ashex += " ";
325 ashex += Integer.toBinaryString((int)bin[i] & 0xff);
326 }
327 if (!cr.isMalformed()) {
328 System.out.printf(" FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
329 failed = true;
330 } else if (cr.length() != mlen) {
331 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
332 failed = true;
333 }
334 if (!Arrays.equals(decode(cs, bin, 0, bin.length),
335 new String(bin, csn).toCharArray())) {
336 System.out.printf(" FAIL(new String(bb, %s)) failed%n", csn);
337 failed = true;
338 }
339 }
340 }
341 if (failed)
342 throw new RuntimeException("Check malformed failed " + csn);
343 }
344
345 static boolean check(CharsetDecoder dec, byte[] utf8s, boolean direct, int[] flow) {
346 int inPos = flow[0];
347 int inLen = flow[1];
348 int outPos = flow[2];
349 int outLen = flow[3];
350 int expedInPos = flow[4];
351 int expedOutPos = flow[5];
352 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
353 :CoderResult.OVERFLOW;
354 ByteBuffer bbf;
355 CharBuffer cbf;
356 if (direct) {
|