6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /*
25 * @test
26 * @bug 4486841 7040220 7096080
27 * @summary Test UTF-8 charset
28 */
29
30 import java.nio.charset.*;
31 import java.nio.*;
32 import java.util.*;
33
34 public class TestUTF8 {
35 static char[] decode(byte[] bb, String csn, boolean testDirect)
36 throws Exception {
37 CharsetDecoder dec = Charset.forName(csn).newDecoder();
38 ByteBuffer bbf;
39 CharBuffer cbf;
40 if (testDirect) {
41 bbf = ByteBuffer.allocateDirect(bb.length);
42 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
43 bbf.put(bb).flip();
44 } else {
45 bbf = ByteBuffer.wrap(bb);
46 cbf = CharBuffer.allocate(bb.length);
274
275 // Two-byte sequences:
276 {1, (byte)0xC0, (byte)0x80}, // invalid first byte
277 {1, (byte)0xC1, (byte)0xBF}, // invalid first byte
278 {1, (byte)0xC2, (byte)0x00}, // invalid second byte
279 {1, (byte)0xC2, (byte)0xC0}, // invalid second byte
280 {1, (byte)0xD0, (byte)0x00}, // invalid second byte
281 {1, (byte)0xD0, (byte)0xC0}, // invalid second byte
282 {1, (byte)0xDF, (byte)0x00}, // invalid second byte
283 {1, (byte)0xDF, (byte)0xC0}, // invalid second byte
284
285 // Three-byte sequences
286 {1, (byte)0xE0, (byte)0x80, (byte)0x80}, // 111x first byte first nibble
287 {1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
288 {1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
289 {1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
290
291 {1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
292 {2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
293 {2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
294 {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
295 {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
296 {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
297 {1, (byte)0xE0, (byte)0x41,}, // invalid second byte & 2 bytes
298 {3, (byte)0xED, (byte)0xAE, (byte)0x80 }, // 3 bytes surrogate
299 {3, (byte)0xED, (byte)0xB0, (byte)0x80 }, // 3 bytes surrogate
300
301
302 // Four-byte sequences
303 {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
304 {1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
305 {1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded
306 {1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded
307
308 {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
309 {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid second byte
310 {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
311 {1, (byte)0xF0, (byte)41 }, // invalid second byte
312 // & only 2 bytes
313
314 {2, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
315 {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte
316 {2, (byte)0xF0, (byte)0x90, (byte)0x41 }, // invalid third byte
317 // & 3 bytes input
318
319 {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
320 {2, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
321 {3, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte
322 {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
323 {1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
324 {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
325
326 // Five-byte sequences
327 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid first byte
328 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
329 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
330 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
331 {1, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
332
333 {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
334 {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
335 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
336 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
337
338 // Six-byte sequences
339 {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
340 {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
341 {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
342 {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
343 {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
344 {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
345 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
536 {inoff, 7, outoff, 4, inoff + 6, outoff + 3, 0},
537 {inoff, 8, outoff, 4, inoff + 6, outoff + 3, 0},
538 {inoff, 9, outoff, 4, inoff + 6, outoff + 3, 0},
539 {inoff, 10, outoff, 4, inoff + 6, outoff + 3, 1},
540 };
541 for (boolean direct: new boolean[] {false, true}) {
542 for (int[] flow: Flows) {
543 if (!check(dec, utf8s, direct, flow))
544 failed = true;
545 }
546 }}}
547 if (failed)
548 throw new RuntimeException("Check under/overflow failed " + csn);
549 }
550
551 public static void main(String[] args) throws Exception {
552 checkRoundtrip("UTF-8");
553 check4ByteSurrs("UTF-8");
554 checkMalformed("UTF-8", malformed);
555 checkUnderOverflow("UTF-8");
556
557 checkRoundtrip("CESU-8");
558 check6ByteSurrs("CESU-8");
559 checkMalformed("CESU-8", malformed_cesu8);
560 }
561 }
|
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /*
25 * @test
26 * @bug 4486841 7040220 7096080 8039751
27 * @summary Test UTF-8 charset
28 */
29
30 import java.nio.charset.*;
31 import java.nio.*;
32 import java.util.*;
33
34 public class TestUTF8 {
35 static char[] decode(byte[] bb, String csn, boolean testDirect)
36 throws Exception {
37 CharsetDecoder dec = Charset.forName(csn).newDecoder();
38 ByteBuffer bbf;
39 CharBuffer cbf;
40 if (testDirect) {
41 bbf = ByteBuffer.allocateDirect(bb.length);
42 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
43 bbf.put(bb).flip();
44 } else {
45 bbf = ByteBuffer.wrap(bb);
46 cbf = CharBuffer.allocate(bb.length);
274
275 // Two-byte sequences:
276 {1, (byte)0xC0, (byte)0x80}, // invalid first byte
277 {1, (byte)0xC1, (byte)0xBF}, // invalid first byte
278 {1, (byte)0xC2, (byte)0x00}, // invalid second byte
279 {1, (byte)0xC2, (byte)0xC0}, // invalid second byte
280 {1, (byte)0xD0, (byte)0x00}, // invalid second byte
281 {1, (byte)0xD0, (byte)0xC0}, // invalid second byte
282 {1, (byte)0xDF, (byte)0x00}, // invalid second byte
283 {1, (byte)0xDF, (byte)0xC0}, // invalid second byte
284
285 // Three-byte sequences
286 {1, (byte)0xE0, (byte)0x80, (byte)0x80}, // 111x first byte first nibble
287 {1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
288 {1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
289 {1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
290
291 {1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
292 {2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
293 {2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
294 {2, (byte)0xE1, (byte)0x80, (byte)0x42}, // invalid third byte
295
296 {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
297 {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
298 {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
299 {1, (byte)0xE0, (byte)0x41,}, // invalid second byte & 2 bytes
300 {1, (byte)0xE1, (byte)0x40,}, // invalid second byte & 2 bytes
301 {3, (byte)0xED, (byte)0xAE, (byte)0x80 }, // 3 bytes surrogate
302 {3, (byte)0xED, (byte)0xB0, (byte)0x80 }, // 3 bytes surrogate
303
304
305
306 // Four-byte sequences
307 {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
308 {1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
309 {1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded
310 {1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded
311
312 {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
313 {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid second byte
314 {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
315 {1, (byte)0xF0, (byte)41 }, // invalid second byte
316 // & only 2 bytes
317
318 {2, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
319 {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte
320 {2, (byte)0xF0, (byte)0x90, (byte)0x41 }, // invalid third byte
321 // & 3 bytes input
322
323 {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
324 {2, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
325 {3, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte
326 {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
327 {1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
328 {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
329
330 // #8039751
331 {1, (byte)0xF6, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 1st byte
332 {1, (byte)0xF6, (byte)0x80, (byte)0x80, },
333 {1, (byte)0xF6, (byte)0x80, },
334 {1, (byte)0xF6, },
335 {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 1st byte
336 {1, (byte)0xF5, (byte)0x80, (byte)0x80, },
337 {1, (byte)0xF5, (byte)0x80, },
338 {1, (byte)0xF5 },
339
340 {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
341 {1, (byte)0xF4, (byte)0x90, (byte)0x80 },
342 {1, (byte)0xF4, (byte)0x90 },
343
344 {1, (byte)0xF4, (byte)0x7f, (byte)0x80, (byte)0x80 }, // out-range/ascii 2nd byte
345 {1, (byte)0xF4, (byte)0x7f, (byte)0x80 },
346 {1, (byte)0xF4, (byte)0x7f },
347
348 {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
349 {1, (byte)0xF0, (byte)0x80, (byte)0x80 },
350 {1, (byte)0xF0, (byte)0x80 },
351
352 {1, (byte)0xF0, (byte)0xc0, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
353 {1, (byte)0xF0, (byte)0xc0, (byte)0x80 },
354 {1, (byte)0xF0, (byte)0xc0 },
355
356 // Five-byte sequences
357 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid first byte
358 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
359 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
360 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
361 {1, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
362
363 {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
364 {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
365 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
366 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
367
368 // Six-byte sequences
369 {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
370 {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
371 {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
372 {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
373 {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
374 {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
375 {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
566 {inoff, 7, outoff, 4, inoff + 6, outoff + 3, 0},
567 {inoff, 8, outoff, 4, inoff + 6, outoff + 3, 0},
568 {inoff, 9, outoff, 4, inoff + 6, outoff + 3, 0},
569 {inoff, 10, outoff, 4, inoff + 6, outoff + 3, 1},
570 };
571 for (boolean direct: new boolean[] {false, true}) {
572 for (int[] flow: Flows) {
573 if (!check(dec, utf8s, direct, flow))
574 failed = true;
575 }
576 }}}
577 if (failed)
578 throw new RuntimeException("Check under/overflow failed " + csn);
579 }
580
581 public static void main(String[] args) throws Exception {
582 checkRoundtrip("UTF-8");
583 check4ByteSurrs("UTF-8");
584 checkMalformed("UTF-8", malformed);
585 checkUnderOverflow("UTF-8");
586 checkRoundtrip("CESU-8");
587 check6ByteSurrs("CESU-8");
588 checkMalformed("CESU-8", malformed_cesu8);
589 }
590 }
|