/* * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ /* * @test * @bug 6831794 6229811 * @summary Test EUC_TW charset */ import java.nio.charset.*; import java.nio.*; import java.util.*; public class TestEUC_TW { static class Time { long t; } static int iteration = 1000; static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) throws Exception { String csn = cs.name(); CharsetDecoder dec = cs.newDecoder(); ByteBuffer bbf; CharBuffer cbf; if (testDirect) { bbf = ByteBuffer.allocateDirect(bb.length); cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); bbf.put(bb); } else { bbf = ByteBuffer.wrap(bb); cbf = CharBuffer.allocate(bb.length); } CoderResult cr = null; long t1 = System.nanoTime()/1000; for (int i = 0; i < iteration; i++) { bbf.rewind(); cbf.clear(); dec.reset(); cr = dec.decode(bbf, cbf, true); } long t2 = System.nanoTime()/1000; if (t != null) t.t = (t2 - t1)/iteration; if (cr != CoderResult.UNDERFLOW) { System.out.println("DEC-----------------"); int pos = bbf.position(); System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", cr.toString(), pos, bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff); throw new RuntimeException("Decoding err: " + csn); } char[] cc = new char[cbf.position()]; cbf.flip(); cbf.get(cc); return cc; } static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) throws Exception { CharsetDecoder dec = cs.newDecoder(); ByteBuffer bbf; CharBuffer cbf; if (testDirect) { bbf = ByteBuffer.allocateDirect(bb.length); cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); bbf.put(bb).flip(); } else { bbf = ByteBuffer.wrap(bb); cbf = CharBuffer.allocate(bb.length); } return dec.decode(bbf, cbf, true); } static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception { ByteBuffer bbf; CharBuffer cbf; CharsetEncoder enc = cs.newEncoder(); String csn = cs.name(); if (testDirect) { bbf = ByteBuffer.allocateDirect(cc.length * 4); cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); cbf.put(cc).flip(); } else { bbf = ByteBuffer.allocate(cc.length * 4); cbf = CharBuffer.wrap(cc); } CoderResult cr = null; long t1 = System.nanoTime()/1000; for (int i = 0; i < iteration; i++) { cbf.rewind(); bbf.clear(); enc.reset(); cr = enc.encode(cbf, bbf, true); } long t2 = System.nanoTime()/1000; if (t != null) t.t = (t2 - t1)/iteration; if (cr != CoderResult.UNDERFLOW) { System.out.println("ENC-----------------"); int pos = cbf.position(); System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos]&0xffff); throw new RuntimeException("Encoding err: " + csn); } byte[] bb = new byte[bbf.position()]; bbf.flip(); bbf.get(bb); return bb; } static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) throws Exception { ByteBuffer bbf; CharBuffer cbf; CharsetEncoder enc = cs.newEncoder(); if (testDirect) { bbf = ByteBuffer.allocateDirect(cc.length * 4); cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); cbf.put(cc).flip(); } else { bbf = ByteBuffer.allocate(cc.length * 4); cbf = CharBuffer.wrap(cc); } return enc.encode(cbf, bbf, true); } static char[] getEUC_TWChars(boolean skipNR) { //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder(); CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder(); CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder(); char[] cc = new char[0x20000]; char[] c2 = new char[2]; int pos = 0; int i = 0; //bmp for (i = 0; i < 0x10000; i++) { //SKIP these 3 NR codepoints if compared to EUC_TW if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9)) continue; if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) { System.out.printf(" Err i=%x: old=%b new=%b%n", i, encOLD.canEncode((char)i), enc.canEncode((char)i)); throw new RuntimeException("canEncode() err!"); } if (enc.canEncode((char)i)) { cc[pos++] = (char)i; } } //supp CharBuffer cb = CharBuffer.wrap(new char[2]); for (i = 0x20000; i < 0x30000; i++) { Character.toChars(i, c2, 0); cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip(); if (encOLD.canEncode(cb) != enc.canEncode(cb)) { throw new RuntimeException("canEncode() err!"); } if (enc.canEncode(cb)) { //System.out.printf("cp=%x, (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff); cc[pos++] = c2[0]; cc[pos++] = c2[1]; } } return Arrays.copyOf(cc, pos); } static void checkRoundtrip(Charset cs) throws Exception { char[] cc = getEUC_TWChars(false); System.out.printf("Check roundtrip <%s>...", cs.name()); byte[] bb = encode(cc, cs, false, null); char[] ccO = decode(bb, cs, false, null); if (!Arrays.equals(cc, ccO)) { System.out.printf(" non-direct failed"); } bb = encode(cc, cs, true, null); ccO = decode(bb, cs, true, null); if (!Arrays.equals(cc, ccO)) { System.out.printf(" (direct) failed"); } System.out.println(); } static void checkInit(String csn) throws Exception { System.out.printf("Check init <%s>...%n", csn); Charset.forName("Big5"); // load in the ExtendedCharsets long t1 = System.nanoTime()/1000; Charset cs = Charset.forName(csn); long t2 = System.nanoTime()/1000; System.out.printf(" charset :%d%n", t2 - t1); t1 = System.nanoTime()/1000; cs.newDecoder(); t2 = System.nanoTime()/1000; System.out.printf(" new Decoder :%d%n", t2 - t1); t1 = System.nanoTime()/1000; cs.newEncoder(); t2 = System.nanoTime()/1000; System.out.printf(" new Encoder :%d%n", t2 - t1); } static void compare(Charset cs1, Charset cs2) throws Exception { char[] cc = getEUC_TWChars(true); String csn1 = cs1.name(); String csn2 = cs2.name(); System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); Time t1 = new Time(); Time t2 = new Time(); byte[] bb1 = encode(cc, cs1, false, t1); byte[] bb2 = encode(cc, cs2, false, t2); System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double)(t2.t)/(t1.t)); if (!Arrays.equals(bb1, bb2)) { System.out.printf(" encoding failed%n"); } char[] cc2 = decode(bb1, cs2, false, t2); char[] cc1 = decode(bb1, cs1, false, t1); System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double)(t2.t)/(t1.t)); if (!Arrays.equals(cc1, cc2)) { System.out.printf(" decoding failed%n"); } bb1 = encode(cc, cs1, true, t1); bb2 = encode(cc, cs2, true, t2); System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double)(t2.t)/(t1.t)); if (!Arrays.equals(bb1, bb2)) System.out.printf(" encoding (direct) failed%n"); cc1 = decode(bb1, cs1, true, t1); cc2 = decode(bb1, cs2, true, t2); System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double)(t2.t)/(t1.t)); if (!Arrays.equals(cc1, cc2)) { System.out.printf(" decoding (direct) failed%n"); } } // The first byte is the length of malformed bytes static byte[][] malformed = { //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, }; static void checkMalformed(Charset cs) throws Exception { boolean failed = false; String csn = cs.name(); System.out.printf("Check malformed <%s>...%n", csn); for (boolean direct: new boolean[] {false, true}) { for (byte[] bins : malformed) { int mlen = bins[0]; byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); CoderResult cr = decodeCR(bin, cs, direct); String ashex = ""; for (int i = 0; i < bin.length; i++) { if (i > 0) ashex += " "; ashex += Integer.toBinaryString((int)bin[i] & 0xff); } if (!cr.isMalformed()) { System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex); failed = true; } else if (cr.length() != mlen) { System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); failed = true; } } } if (failed) throw new RuntimeException("Check malformed failed " + csn); } static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) { int inPos = flow[0]; int inLen = flow[1]; int outPos = flow[2]; int outLen = flow[3]; int expedInPos = flow[4]; int expedOutPos = flow[5]; CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW :CoderResult.OVERFLOW; ByteBuffer bbf; CharBuffer cbf; if (direct) { bbf = ByteBuffer.allocateDirect(inPos + bytes.length); cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); } else { bbf = ByteBuffer.allocate(inPos + bytes.length); cbf = CharBuffer.allocate(outPos + outLen); } bbf.position(inPos); bbf.put(bytes).flip().position(inPos).limit(inPos + inLen); cbf.position(outPos); dec.reset(); CoderResult cr = dec.decode(bbf, cbf, false); if (cr != expedCR || bbf.position() != expedInPos || cbf.position() != expedOutPos) { System.out.printf("Expected(direct=%5b): [", direct); for (int i:flow) System.out.print(" " + i); System.out.println("] CR=" + cr + ", inPos=" + bbf.position() + ", outPos=" + cbf.position()); return false; } return true; } static void checkUnderOverflow(Charset cs) throws Exception { String csn = cs.name(); System.out.printf("Check under/overflow <%s>...%n", csn); CharsetDecoder dec = cs.newDecoder(); boolean failed = false; //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 //0 1 2 3 7 11 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); int inlen = bytes.length; int MAXOFF = 20; for (int inoff = 0; inoff < MAXOFF; inoff++) { for (int outoff = 0; outoff < MAXOFF; outoff++) { int[][] Flows = { //inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) //overflow {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, //underflow {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0}, {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0}, {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, // 2-byte under/overflow {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, // 4-byte under/overflow {inoff, 4, outoff, 2, inoff + 3, outoff + 2, 0}, {inoff, 5, outoff, 2, inoff + 3, outoff + 2, 0}, {inoff, 6, outoff, 2, inoff + 3, outoff + 2, 0}, {inoff, 7, outoff, 2, inoff + 3, outoff + 2, 1}, {inoff, 7, outoff, 3, inoff + 7, outoff + 3, 0}, // 4-byte under/overflow {inoff, 8, outoff, 3, inoff + 7, outoff + 3, 0}, {inoff, 9, outoff, 3, inoff + 7, outoff + 3, 0}, {inoff, 10, outoff, 3, inoff + 7, outoff + 3, 0}, {inoff, 11, outoff, 3, inoff + 7, outoff + 3, 1}, {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0}, // 4-byte/supp under/overflow {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0}, {inoff, 12, outoff, 4, inoff +11, outoff + 4, 0}, {inoff, 13, outoff, 4, inoff +11, outoff + 4, 0}, {inoff, 14, outoff, 4, inoff +11, outoff + 4, 0}, {inoff, 15, outoff, 4, inoff +11, outoff + 4, 1}, {inoff, 15, outoff, 5, inoff +11, outoff + 4, 1}, {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, }; for (boolean direct: new boolean[] {false, true}) { for (int[] flow: Flows) { if (!check(dec, bytes, direct, flow)) failed = true; } }}} if (failed) throw new RuntimeException("Check under/overflow failed " + csn); } public static void main(String[] args) throws Exception { // be the first one //checkInit("EUC_TW_OLD"); checkInit("EUC_TW"); Charset euctw = Charset.forName("EUC_TW"); checkRoundtrip(euctw); compare(euctw, new EUC_TW_OLD()); checkMalformed(euctw); checkUnderOverflow(euctw); } }