--- /dev/null 2017-07-29 19:06:40.011000000 -0700 +++ new/test/sun/nio/cs/TestCharsetMapping.java 2017-08-27 22:05:49.438021678 -0700 @@ -0,0 +1,659 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* @test + * @bug 8186801 + * @summary Test the charset mappings + */ + +import java.io.*; +import java.nio.*; +import java.nio.file.*; +import java.nio.charset.*; +import java.util.*; +import java.util.function.*; +import java.util.regex.*; +import java.util.stream.*; + +public class TestCharsetMapping { + + private static final int BUFSIZ = 8192; // Initial buffer size + private static final int MAXERRS = 10; // Errors reported per test + + private static final PrintStream log = System.out; + + // Set by -v on the command line + private static boolean verbose = false; + + // Test modes + private static final int ENCODE = 1; + private static final int DECODE = 2; + + // Utilities + private static ByteBuffer expand(ByteBuffer bb) { + ByteBuffer nbb = ByteBuffer.allocate(bb.capacity() * 2); + bb.flip(); + nbb.put(bb); + return nbb; + } + + private static CharBuffer expand(CharBuffer cb) { + CharBuffer ncb = CharBuffer.allocate(cb.capacity() * 2); + cb.flip(); + ncb.put(cb); + return ncb; + } + + private static byte[] parseBytes(String s) { + int nb = s.length() / 2; + byte[] bs = new byte[nb]; + for (int i = 0; i < nb; i++) { + int j = i * 2; + if (j + 2 > s.length()) + throw new RuntimeException("Malformed byte string: " + s); + bs[i] = (byte)Integer.parseInt(s.substring(j, j + 2), 16); + } + return bs; + } + + private static String printBytes(byte[] bs) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < bs.length; i++) { + sb.append(Integer.toHexString((bs[i] >> 4) & 0xf)); + sb.append(Integer.toHexString((bs[i] >> 0) & 0xf)); + } + return sb.toString(); + } + + private static String printCodePoint(int cp) { + StringBuffer sb = new StringBuffer(); + sb.append("U+"); + if (cp > 0xffff) + sb.append(Integer.toHexString((cp >> 16) & 0xf)); + sb.append(Integer.toHexString((cp >> 12) & 0xf)); + sb.append(Integer.toHexString((cp >> 8) & 0xf)); + sb.append(Integer.toHexString((cp >> 4) & 0xf)); + sb.append(Integer.toHexString((cp >> 0) & 0xf)); + return sb.toString(); + } + + private static int getCodePoint(CharBuffer cb) { + char c = cb.get(); + if (Character.isHighSurrogate(c)) + return Character.toCodePoint(c, cb.get()); + else + return c; + } + + private static String plural(int n) { + return (n == 1 ? "" : "s"); + } + + // TestCharsetMapping + private CharsetInfo csinfo; + private CharsetDecoder decoder = null; + private CharsetEncoder encoder = null; + + // Stateful dbcs encoding has leading shift byte '0x0e' + // and trailing shift byte '0x0f'. + // The flag variable shiftHackDBCS is 'true' for stateful + // EBCDIC encodings, which indicates the need of adding/ + // removing the shift bytes. + private boolean shiftHackDBCS = false; + + private TestCharsetMapping(CharsetInfo csinfo) throws Exception { + this.csinfo = csinfo; + this.encoder = csinfo.cs.newEncoder() + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .onMalformedInput(CodingErrorAction.REPLACE); + this.decoder = csinfo.cs.newDecoder() + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .onMalformedInput(CodingErrorAction.REPLACE); + } + + private class Test { + // An instance of this class tests all mappings for + // a particular bytesPerChar value + private int bytesPerChar; + + // Reference data from .map/nr/c2b files + private ByteBuffer refBytes = ByteBuffer.allocate(BUFSIZ); + private CharBuffer refChars = CharBuffer.allocate(BUFSIZ); + + private ByteBuffer dRefBytes = ByteBuffer.allocateDirect(BUFSIZ); + private CharBuffer dRefChars = ByteBuffer.allocateDirect(BUFSIZ*2).asCharBuffer(); + + private Test(int bpc) { + bytesPerChar = bpc; + } + + // shiftHackDBCS can add the leading/trailing shift bytesa + private void put(byte[] bs) { + if (refBytes.remaining() < bytesPerChar) + refBytes = expand(refBytes); + refBytes.put(bs); + } + + private void put(byte[] bs, char[] cc) { + if (bs.length != bytesPerChar) + throw new IllegalArgumentException(bs.length + + " != " + + bytesPerChar); + if (refBytes.remaining() < bytesPerChar) + refBytes = expand(refBytes); + refBytes.put(bs); + if (refChars.remaining() < cc.length) + refChars = expand(refChars); + refChars.put(cc); + } + + private boolean decode(ByteBuffer refBytes, CharBuffer refChars) + throws Exception { + log.println(" decode" + (refBytes.isDirect()?" (direct)":"")); + CharBuffer out = decoder.decode(refBytes); + + refBytes.rewind(); + byte[] bs = new byte[bytesPerChar]; + int e = 0; + + if (shiftHackDBCS && bytesPerChar == 2 && refBytes.get() != (byte)0x0e) { + log.println("Missing leading byte"); + } + + while (refChars.hasRemaining()) { + refBytes.get(bs); + int rcp = getCodePoint(refChars); + int ocp = getCodePoint(out); + if (rcp != ocp) { + log.println(" Error: " + + printBytes(bs) + + " --> " + + printCodePoint(ocp) + + ", expected " + + printCodePoint(rcp)); + if (++e >= MAXERRS) { + log.println(" Too many errors, giving up"); + break; + } + } + if (verbose) { + log.println(" " + + printBytes(bs) + + " --> " + + printCodePoint(rcp)); + } + } + + if (shiftHackDBCS && bytesPerChar == 2 && refBytes.get() != (byte)0x0f) { + log.println("Missing trailing byte"); + } + + if (e == 0 && (refChars.hasRemaining() || out.hasRemaining())) { + // Paranoia: Didn't consume everything + throw new IllegalStateException(); + } + refBytes.rewind(); + refChars.rewind(); + return (e == 0); + } + + private boolean encode(ByteBuffer refBytes, CharBuffer refChars) + throws Exception { + log.println(" encode" + (refBytes.isDirect()?" (direct)":"")); + ByteBuffer out = encoder.encode(refChars); + refChars.rewind(); + + if (shiftHackDBCS && bytesPerChar == 2 && out.get() != refBytes.get()) { + log.println("Missing leading byte"); + return false; + } + + byte[] rbs = new byte[bytesPerChar]; + byte[] obs = new byte[bytesPerChar]; + int e = 0; + while (refChars.hasRemaining()) { + int cp = getCodePoint(refChars); + refBytes.get(rbs); + out.get(obs); + boolean eq = true; + for (int i = 0; i < bytesPerChar; i++) + eq &= rbs[i] == obs[i]; + if (!eq) { + log.println(" Error: " + + printCodePoint(cp) + + " --> " + + printBytes(obs) + + ", expected " + + printBytes(rbs)); + if (++e >= MAXERRS) { + log.println(" Too many errors, giving up"); + break; + } + } + if (verbose) { + log.println(" " + + printCodePoint(cp) + + " --> " + + printBytes(rbs)); + } + } + + if (shiftHackDBCS && bytesPerChar == 2 && out.get() != refBytes.get()) { + log.println("Missing trailing byte"); + return false; + } + + if (e == 0 && (refBytes.hasRemaining() || out.hasRemaining())) { + // Paranoia: Didn't consume everything + throw new IllegalStateException(); + } + + refBytes.rewind(); + refChars.rewind(); + return (e == 0); + } + + private boolean run(int mode) throws Exception { + log.println(" " + bytesPerChar + + " byte" + plural(bytesPerChar) + "/char"); + + if (dRefBytes.capacity() < refBytes.capacity()) { + dRefBytes = ByteBuffer.allocateDirect(refBytes.capacity()); + } + if (dRefChars.capacity() < refChars.capacity()) { + dRefChars = ByteBuffer.allocateDirect(refChars.capacity()*2) + .asCharBuffer(); + } + refBytes.flip(); + refChars.flip(); + dRefBytes.clear(); + dRefChars.clear(); + + dRefBytes.put(refBytes).flip(); + dRefChars.put(refChars).flip(); + refBytes.flip(); + refChars.flip(); + + boolean rv = true; + if (mode != ENCODE) { + rv &= decode(refBytes, refChars); + rv &= decode(dRefBytes, dRefChars); + } + if (mode != DECODE) { + rv &= encode(refBytes, refChars); + rv &= encode(dRefBytes, dRefChars); + } + return rv; + } + } + + // Maximum bytes/char being tested + private int maxBytesPerChar = 0; + + // Tests, indexed by bytesPerChar - 1 + private Test[] tests; + + private void clearTests() { + maxBytesPerChar = 0; + tests = new Test[0]; + } + + // Find the test for the given bytes/char value, + // expanding the test array if needed + // + private Test testFor(int bpc) { + if (bpc > maxBytesPerChar) { + Test[] ts = new Test[bpc]; + System.arraycopy(tests, 0, ts, 0, maxBytesPerChar); + for (int i = maxBytesPerChar; i < bpc; i++) + ts[i] = new Test(i + 1); + tests = ts; + maxBytesPerChar = bpc; + } + return tests[bpc - 1]; + } + + private boolean testStringConv() throws Exception { + if (shiftHackDBCS) { + log.println(" string de/encoding skipped for ebcdic"); + return true; + } + boolean rv = true; + log.println(" string de/encoding"); + // for new String() + ByteArrayOutputStream baosDec = new ByteArrayOutputStream(); + StringBuilder sbDec = new StringBuilder(); + // for String.getBytes() + ByteArrayOutputStream baosEnc = new ByteArrayOutputStream(); + StringBuilder sbEnc = new StringBuilder(); + + for (Entry e : csinfo.mappings) { + baosDec.write(e.bs); + sbDec.append(Character.toChars(e.cp)); + if (e.cp2 != 0) + sbDec.append(e.cp2); + + // non-roundtrip b2c, and c2b + if (csinfo.nr != null && csinfo.nr.containsKey(e.bb) || + csinfo.c2b != null && !csinfo.c2b.containsKey(e.cp)) + continue; + baosEnc.write(e.bs); + sbEnc.append(Character.toChars(e.cp)); + if (e.cp2 != 0) + sbEnc.append(e.cp2); + } + log.println(" new String()"); + if (!new String(baosDec.toByteArray(), csinfo.csName).equals(sbDec.toString())) { + log.println(" Error: new String() failed"); + rv = false; + } + log.println(" String.getBytes()"); + if (!Arrays.equals(baosEnc.toByteArray(), sbEnc.toString().getBytes(csinfo.csName))) { + log.println(" Error: String().getBytes() failed"); + rv = false; + } + return rv; + } + + private boolean run() throws Exception { + boolean rv = true; + shiftHackDBCS = csinfo.type.equals("ebcdic"); // isStateful; + + // (1) new String()/String.getBytes() + rv &= testStringConv(); + + // (2) DECODE: + clearTests(); + if (shiftHackDBCS) { + testFor(2).put(new byte[] { 0x0e }); + } + csinfo.mappings.forEach(e -> { + if (e.cp2 != 0) + return; // skip composite (base+cc) for now + byte[] bs = e.bs; + char[] cc = Character.toChars(e.cp); + testFor(bs.length).put(bs, cc); + }); + if (shiftHackDBCS) { + testFor(2).put(new byte[] { 0x0f }); + } + for (int i = 0; i < maxBytesPerChar; i++) { + rv &= tests[i].run(DECODE); + } + + // (3) ENCODE: + clearTests(); + if (shiftHackDBCS) { + testFor(2).put(new byte[] { 0x0e }); + } + csinfo.mappings.forEach(e -> { + if (e.cp2 != 0) + return; // skip composite (base+cc) for now + if (csinfo.nr != null && csinfo.nr.containsKey(e.bb)) + return; // non-roundtrip b2c + if (csinfo.c2b != null && csinfo.c2b.containsKey(e.cp)) + return; // c2b only mapping + byte[] bs = e.bs; + char[] cc = Character.toChars(e.cp); + testFor(bs.length).put(bs, cc); + }); + if (csinfo.c2b != null) + csinfo.c2b.values().forEach(e -> { + byte[] bs = e.bs; + char[] cc = Character.toChars(e.cp); + testFor(bs.length).put(bs, cc); + }); + if (shiftHackDBCS) { + testFor(2).put(new byte[] { 0x0f }); + } + for (int i = 0; i < maxBytesPerChar; i++) { + rv &= tests[i].run(ENCODE); + } + return rv; + } + + private static class Entry { + byte[] bs; // byte sequence reps + int cp; // Unicode codepoint + int cp2; // CC of composite + long bb; // bs in "long" form for nr lookup; + } + + private final static int UNMAPPABLE = 0xFFFD; + private static final Pattern ptn = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:U\\+|0x)?(\\p{XDigit}++)(?:\\s++#.*)?"); + private static final int G_BS = 1; + private static final int G_CP = 2; + private static final int G_CP2 = 3; + + private static class CharsetInfo { + Charset cs; + String pkgName; + String clzName; + String csName; + String hisName; + String type; + boolean isInternal; + Set aliases = new HashSet<>(); + + // mapping entries + List mappings; + Map nr; // bytes -> entry + Map c2b; // cp -> entry + + CharsetInfo(String csName, String clzName) { + this.csName = csName; + this.clzName = clzName; + } + + private Entry parse(Matcher m) { + Entry e = new Entry(); + e.bb = Long.parseLong(m.group(G_BS), 16); + if (e.bb < 0x100) + e.bs = new byte[] { (byte)e.bb }; + else + e.bs = parseBytes(m.group(G_BS)); + e.cp = Integer.parseInt(m.group(G_CP), 16); + if (G_CP2 <= m.groupCount() && m.group(G_CP2) != null) + e.cp2 = Integer.parseInt(m.group(G_CP2), 16); + else + e.cp2 = 0; + return e; + } + + boolean loadMappings(Path dir) throws IOException { + // xxx.map + Path path = dir.resolve(clzName + ".map"); + if (!Files.exists(path)) { + return false; + } + Matcher m = ptn.matcher(""); + mappings = Files.lines(path) + .filter(ln -> !ln.startsWith("#") && m.reset(ln).lookingAt()) + .map(ln -> parse(m)) + .filter(e -> e.cp != UNMAPPABLE) // non-mapping + .collect(Collectors.toList()); + // xxx.nr + path = dir.resolve(clzName + ".nr"); + if (Files.exists(path)) { + nr = Files.lines(path) + .filter(ln -> !ln.startsWith("#") && m.reset(ln).lookingAt()) + .map(ln -> parse(m)) + .collect(Collectors.toMap(e -> e.bb, Function.identity())); + } + // xxx.c2b + path = dir.resolve(clzName + ".c2b"); + if (Files.exists(path)) { + c2b = Files.lines(path) + .filter(ln -> !ln.startsWith("#") && m.reset(ln).lookingAt()) + .map(ln -> parse(m)) + .collect(Collectors.toMap(e -> e.cp, Function.identity())); + } + return true; + } + } + + private static Set charsets(Path cslist) throws IOException { + Set charsets = new LinkedHashSet<>(); + Iterator itr = Files.readAllLines(cslist).iterator(); + CharsetInfo cs = null; + + while (itr.hasNext()) { + String line = itr.next(); + if (line.startsWith("#") || line.length() == 0) { + continue; + } + String[] tokens = line.split("\\s+"); + if (tokens.length < 2) { + continue; + } + if ("charset".equals(tokens[0])) { + if (cs != null) { + charsets.add(cs); + cs = null; + } + if (tokens.length < 3) { + throw new RuntimeException("Error: incorrect charset line [" + line + "]"); + } + cs = new CharsetInfo(tokens[1], tokens[2]); + } else { + String key = tokens[1]; // leading empty str + switch (key) { + case "alias": + if (tokens.length < 3) { + throw new RuntimeException("Error: incorrect alias line [" + line + "]"); + } + cs.aliases.add(tokens[2]); // ALIAS_NAME + break; + case "package": + cs.pkgName = tokens[2]; + break; + case "type": + cs.type = tokens[2]; + break; + case "hisname": + cs.hisName = tokens[2]; + break; + case "internal": + cs.isInternal = Boolean.parseBoolean(tokens[2]); + break; + default: // ignore + } + } + } + if (cs != null) { + charsets.add(cs); + } + return charsets; + } + + public static void main(String args[]) throws Exception { + Path dir = Paths.get(System.getProperty("test.src", ".") + + "/../../../../make/data/charsetmapping"); + if (!Files.exists(dir)) { + // not inside jdk repo, no mappings, exit silently + log.println("Nothing done, not in a jdk repo: "); + return; + } + if (args.length > 0 && "-v".equals(args[0])) { + // For debugging: java CoderTest [-v] + verbose = true; + } + + int errors = 0; + int tested = 0; + int skipped = 0; + int known = 0; + + for (CharsetInfo csinfo : charsets(dir.resolve("charsets"))) { + String csname = csinfo.csName; + + if (csinfo.isInternal) { + continue; + } + + log.printf("%ntesting: %-16s", csname); + + if (!Charset.isSupported(csname)) { + errors++; + log.println(" [error: charset is not supported]"); + continue; + } + + Charset cs = csinfo.cs = Charset.forName(csinfo.csName); + // test name() + if (!cs.name().equals(csinfo.csName)) { + errors++; + log.printf(" [error: wrong csname: " + csinfo.csName + + " vs " + cs.name() + "]"); + } + // test aliases() + if (!cs.aliases().equals(csinfo.aliases)) { + errors++; + log.printf(" [error wrong aliases]"); + if (verbose) { + log.println(); + log.println(" expected: " + csinfo.aliases); + log.println(" got: " + cs.aliases()); + } + } + + if (csinfo.type.equals("source")) { + log.println(" [skipped: source based]"); + skipped++; + continue; + } + + if (!csinfo.loadMappings(dir)) { + log.println(" [error loading mappings failed]"); + errors++; + continue; + } + + tested++; + log.println(); + if (!new TestCharsetMapping(csinfo).run()) { + + /////////////// known nr/c2b issues //////////////// + if (csinfo.csName.equals("x-IBM948") || + csinfo.csName.equals("x-IBM950") || + csinfo.csName.equals("x-IBM937") || + csinfo.csName.equals("x-IBM1383")) + { + log.println(" [**** skipped, KNOWN nr/c2b mapping issue]"); + known++; + continue; + } + + errors++; + } + } + + log.println(); + log.println(tested + " charset" + plural(tested) + " tested, " + + skipped + " skipped, " + known + " known issue(s)"); + log.println(); + if (errors > 0) + throw new Exception("Errors detected in " + + errors + " charset" + plural(errors)); + } +}