1 /* 2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 4831163 5053096 5056440 27 * @summary NIO charset basic verification of JISAutodetect decoder 28 * @author Martin Buchholz 29 */ 30 31 import java.io.*; 32 import java.nio.ByteBuffer; 33 import java.nio.CharBuffer; 34 import java.nio.charset.Charset; 35 import java.nio.charset.CharsetDecoder; 36 import java.nio.charset.CoderResult; 37 import static java.lang.System.*; 38 39 public class NIOJISAutoDetectTest { 40 private static int failures = 0; 41 42 private static void fail(String failureMsg) { 43 System.out.println(failureMsg); 44 failures++; 45 } 46 47 private static void check(boolean cond, String msg) { 48 if (!cond) { 49 fail("test failed: " + msg); 50 new Exception().printStackTrace(); 51 } 52 } 53 54 private static String SJISName() throws Exception { 55 return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd, 56 (byte)0xcf, (byte)0xb2}); 57 } 58 59 private static String EUCJName() throws Exception { 60 return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2, 61 (byte)0xa4, (byte)0xe9}); 62 } 63 64 private static String detectingCharset(byte[] bytes) throws Exception { 65 //---------------------------------------------------------------- 66 // Test special public methods of CharsetDecoder while we're here 67 //---------------------------------------------------------------- 68 CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder(); 69 check(cd.isAutoDetecting(), "isAutodecting()"); 70 check(! cd.isCharsetDetected(), "isCharsetDetected"); 71 cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'})); 72 check(! cd.isCharsetDetected(), "isCharsetDetected"); 73 try { 74 cd.detectedCharset(); 75 fail("no IllegalStateException"); 76 } catch (IllegalStateException e) {} 77 cd.decode(ByteBuffer.wrap(bytes)); 78 check(cd.isCharsetDetected(), "isCharsetDetected"); 79 Charset cs = cd.detectedCharset(); 80 check(cs != null, "cs != null"); 81 check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()"); 82 return cs.name(); 83 } 84 85 public static void main(String[] argv) throws Exception { 86 //---------------------------------------------------------------- 87 // Used to throw BufferOverflowException 88 //---------------------------------------------------------------- 89 out.println(new String(new byte[] {0x61}, "JISAutoDetect")); 90 91 //---------------------------------------------------------------- 92 // InputStreamReader(...JISAutoDetect) used to infloop 93 //---------------------------------------------------------------- 94 { 95 byte[] bytes = "ABCD\n".getBytes(); 96 ByteArrayInputStream bais = new ByteArrayInputStream(bytes); 97 InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect"); 98 BufferedReader reader = new BufferedReader(isr); 99 check (reader.readLine().equals("ABCD"), "first read gets text"); 100 // used to return "ABCD" on second and subsequent reads 101 check (reader.readLine() == null, "second read gets null"); 102 } 103 104 //---------------------------------------------------------------- 105 // Check all Japanese chars for sanity 106 //---------------------------------------------------------------- 107 String SJIS = SJISName(); 108 String EUCJ = EUCJName(); 109 out.printf("SJIS charset is %s%n", SJIS); 110 out.printf("EUCJ charset is %s%n", EUCJ); 111 112 int cnt2022 = 0; 113 int cnteucj = 0; 114 int cntsjis = 0; 115 int cntBAD = 0; 116 for (char c = '\u0000'; c < '\uffff'; c++) { 117 if (c == '\u001b' || // ESC 118 c == '\u2014') // Em-Dash? 119 continue; 120 String s = new String (new char[] {c}); 121 122 //---------------------------------------------------------------- 123 // JISAutoDetect can handle all chars that EUC-JP can, 124 // unless there is an ambiguity with SJIS. 125 //---------------------------------------------------------------- 126 byte[] beucj = s.getBytes(EUCJ); 127 String seucj = new String(beucj, EUCJ); 128 if (seucj.equals(s)) { 129 cnteucj++; 130 String sauto = new String(beucj, "JISAutoDetect"); 131 132 if (! sauto.equals(seucj)) { 133 cntBAD++; 134 String ssjis = new String(beucj, SJIS); 135 if (! sauto.equals(ssjis)) { 136 fail("Autodetection agrees with neither EUC nor SJIS"); 137 } 138 } 139 } else 140 continue; // Optimization 141 142 //---------------------------------------------------------------- 143 // JISAutoDetect can handle all chars that ISO-2022-JP can. 144 //---------------------------------------------------------------- 145 byte[] b2022 = s.getBytes("ISO-2022-JP"); 146 if (new String(b2022, "ISO-2022-JP").equals(s)) { 147 cnt2022++; 148 check(new String(b2022,"JISAutoDetect").equals(s), 149 "ISO2022 autodetection"); 150 } 151 152 //---------------------------------------------------------------- 153 // JISAutoDetect can handle almost all chars that SJIS can. 154 //---------------------------------------------------------------- 155 byte[] bsjis = s.getBytes(SJIS); 156 if (new String(bsjis, SJIS).equals(s)) { 157 cntsjis++; 158 check(new String(bsjis,"JISAutoDetect").equals(s), 159 "SJIS autodetection"); 160 } 161 } 162 out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022); 163 out.printf("There are %d SJIS-encodable characters.%n", cntsjis); 164 out.printf("There are %d EUC-JP-encodable characters.%n", cnteucj); 165 out.printf("There are %d characters that are " + 166 "misdetected as SJIS after being EUC-encoded.%n", cntBAD); 167 168 169 //---------------------------------------------------------------- 170 // tests for specific byte sequences 171 //---------------------------------------------------------------- 172 test("ISO-2022-JP", new byte[] {'A', 'B', 'C'}); 173 test("EUC-JP", new byte[] {'A', 'B', 'C'}); 174 test("SJIS", new byte[] {'A', 'B', 'C'}); 175 176 test("SJIS", 177 new byte[] { 'C', 'o', 'p', 'y', 'r', 'i', 'g', 'h', 't', 178 ' ', (byte)0xa9, ' ', '1', '9', '9', '8' }); 179 180 test("SJIS", 181 new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, 182 (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, 183 (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, 184 (byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 }); 185 186 test("EUC-JP", 187 new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9, 188 (byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca }); 189 190 test("SJIS", 191 new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, 192 (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, 193 (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde}); 194 195 test("SJIS", 196 new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, 197 (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, 198 (byte)0xc3, (byte)0xd1, (byte)0xbd }); 199 200 test("SJIS", 201 new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa }); 202 203 test("EUC-JP", 204 new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20}); 205 206 test("EUC-JP", 207 new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, 208 (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, 209 (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, 210 (byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 }); 211 212 test("ISO-2022-JP", 213 new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' }); 214 215 216 //---------------------------------------------------------------- 217 // Check handling of ambiguous end-of-input in middle of first char 218 //---------------------------------------------------------------- 219 { 220 CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder(); 221 ByteBuffer bb = ByteBuffer.allocate(128); 222 CharBuffer cb = CharBuffer.allocate(128); 223 bb.put((byte)'A').put((byte)0x8f); 224 bb.flip(); 225 CoderResult res = dc.decode(bb,cb,false); 226 check(res.isUnderflow(), "isUnderflow"); 227 check(bb.position() == 1, "bb.position()"); 228 check(cb.position() == 1, "cb.position()"); 229 res = dc.decode(bb,cb,false); 230 check(res.isUnderflow(), "isUnderflow"); 231 check(bb.position() == 1, "bb.position()"); 232 check(cb.position() == 1, "cb.position()"); 233 bb.compact(); 234 bb.put((byte)0xa1); 235 bb.flip(); 236 res = dc.decode(bb,cb,true); 237 check(res.isUnderflow(), "isUnderflow"); 238 check(bb.position() == 2, "bb.position()"); 239 check(cb.position() == 2, "cb.position()"); 240 } 241 242 243 if (failures > 0) 244 throw new RuntimeException(failures + " tests failed"); 245 } 246 247 static void checkCoderResult(CoderResult result) { 248 check(result.isUnderflow(), 249 "Unexpected coder result: " + result); 250 } 251 252 static void test(String expectedCharset, byte[] input) throws Exception { 253 Charset cs = Charset.forName("x-JISAutoDetect"); 254 CharsetDecoder autoDetect = cs.newDecoder(); 255 256 Charset cs2 = Charset.forName(expectedCharset); 257 CharsetDecoder decoder = cs2.newDecoder(); 258 259 ByteBuffer bb = ByteBuffer.allocate(128); 260 CharBuffer charOutput = CharBuffer.allocate(128); 261 CharBuffer charExpected = CharBuffer.allocate(128); 262 263 bb.put(input); 264 bb.flip(); 265 bb.mark(); 266 267 CoderResult result = autoDetect.decode(bb, charOutput, true); 268 checkCoderResult(result); 269 charOutput.flip(); 270 String actual = charOutput.toString(); 271 272 bb.reset(); 273 274 result = decoder.decode(bb, charExpected, true); 275 checkCoderResult(result); 276 charExpected.flip(); 277 String expected = charExpected.toString(); 278 279 check(actual.equals(expected), 280 String.format("actual=%s expected=%s", actual, expected)); 281 } 282 }