1 /*
   2  * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 4831163 5053096 5056440
  27  * @summary NIO charset basic verification of JISAutodetect decoder
  28  * @author Martin Buchholz
  29  */
  30 
  31 import java.io.*;
  32 import java.nio.ByteBuffer;
  33 import java.nio.CharBuffer;
  34 import java.nio.charset.Charset;
  35 import java.nio.charset.CharsetDecoder;
  36 import java.nio.charset.CoderResult;
  37 import static java.lang.System.*;
  38 
  39 public class NIOJISAutoDetectTest {
  40     private static int failures = 0;
  41 
  42     private static void fail(String failureMsg) {
  43         System.out.println(failureMsg);
  44         failures++;
  45     }
  46 
  47     private static void check(boolean cond, String msg) {
  48         if (!cond) {
  49             fail("test failed: " + msg);
  50             new Exception().printStackTrace();
  51         }
  52     }
  53 
  54     private static String SJISName() throws Exception {
  55         return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd,
  56                                             (byte)0xcf, (byte)0xb2});
  57     }
  58 
  59     private static String EUCJName() throws Exception {
  60         return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2,
  61                                             (byte)0xa4, (byte)0xe9});
  62     }
  63 
  64     private static String detectingCharset(byte[] bytes) throws Exception {
  65         //----------------------------------------------------------------
  66         // Test special public methods of CharsetDecoder while we're here
  67         //----------------------------------------------------------------
  68         CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder();
  69         check(cd.isAutoDetecting(), "isAutodecting()");
  70         check(! cd.isCharsetDetected(), "isCharsetDetected");
  71         cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'}));
  72         check(! cd.isCharsetDetected(), "isCharsetDetected");
  73         try {
  74             cd.detectedCharset();
  75             fail("no IllegalStateException");
  76         } catch (IllegalStateException e) {}
  77         cd.decode(ByteBuffer.wrap(bytes));
  78         check(cd.isCharsetDetected(), "isCharsetDetected");
  79         Charset cs = cd.detectedCharset();
  80         check(cs != null, "cs != null");
  81         check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()");
  82         return cs.name();
  83     }
  84 
  85     public static void main(String[] argv) throws Exception {
  86         //----------------------------------------------------------------
  87         // Used to throw BufferOverflowException
  88         //----------------------------------------------------------------
  89         out.println(new String(new byte[] {0x61}, "JISAutoDetect"));
  90 
  91         //----------------------------------------------------------------
  92         // InputStreamReader(...JISAutoDetect) used to infloop
  93         //----------------------------------------------------------------
  94         {
  95             byte[] bytes = "ABCD\n".getBytes();
  96             ByteArrayInputStream bais = new  ByteArrayInputStream(bytes);
  97             InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect");
  98             BufferedReader reader = new BufferedReader(isr);
  99             check (reader.readLine().equals("ABCD"), "first read gets text");
 100             // used to return "ABCD" on second and subsequent reads
 101             check (reader.readLine() == null, "second read gets null");
 102         }
 103 
 104         //----------------------------------------------------------------
 105         // Check all Japanese chars for sanity
 106         //----------------------------------------------------------------
 107         String SJIS = SJISName();
 108         String EUCJ = EUCJName();
 109         out.printf("SJIS charset is %s%n", SJIS);
 110         out.printf("EUCJ charset is %s%n", EUCJ);
 111 
 112         int cnt2022 = 0;
 113         int cnteucj = 0;
 114         int cntsjis = 0;
 115         int cntBAD  = 0;
 116         for (char c = '\u0000'; c < '\uffff'; c++) {
 117             if (c == '\u001b' || // ESC
 118                 c == '\u2014')   // Em-Dash?
 119                 continue;
 120             String s = new String (new char[] {c});
 121 
 122             //----------------------------------------------------------------
 123             // JISAutoDetect can handle all chars that EUC-JP can,
 124             // unless there is an ambiguity with SJIS.
 125             //----------------------------------------------------------------
 126             byte[] beucj = s.getBytes(EUCJ);
 127             String seucj = new String(beucj, EUCJ);
 128             if (seucj.equals(s)) {
 129                 cnteucj++;
 130                 String sauto = new String(beucj, "JISAutoDetect");
 131 
 132                 if (! sauto.equals(seucj)) {
 133                     cntBAD++;
 134                     String ssjis = new String(beucj, SJIS);
 135                     if (! sauto.equals(ssjis)) {
 136                         fail("Autodetection agrees with neither EUC nor SJIS");
 137                     }
 138                 }
 139             } else
 140                 continue; // Optimization
 141 
 142             //----------------------------------------------------------------
 143             // JISAutoDetect can handle all chars that ISO-2022-JP can.
 144             //----------------------------------------------------------------
 145             byte[] b2022 = s.getBytes("ISO-2022-JP");
 146             if (new String(b2022, "ISO-2022-JP").equals(s)) {
 147                 cnt2022++;
 148                 check(new String(b2022,"JISAutoDetect").equals(s),
 149                       "ISO2022 autodetection");
 150             }
 151 
 152             //----------------------------------------------------------------
 153             // JISAutoDetect can handle almost all chars that SJIS can.
 154             //----------------------------------------------------------------
 155             byte[] bsjis = s.getBytes(SJIS);
 156             if (new String(bsjis, SJIS).equals(s)) {
 157                 cntsjis++;
 158                 check(new String(bsjis,"JISAutoDetect").equals(s),
 159                       "SJIS autodetection");
 160             }
 161         }
 162         out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022);
 163         out.printf("There are %d SJIS-encodable characters.%n",        cntsjis);
 164         out.printf("There are %d EUC-JP-encodable characters.%n",      cnteucj);
 165         out.printf("There are %d characters that are " +
 166                    "misdetected as SJIS after being EUC-encoded.%n", cntBAD);
 167 
 168 
 169         //----------------------------------------------------------------
 170         // tests for specific byte sequences
 171         //----------------------------------------------------------------
 172         test("ISO-2022-JP", new byte[] {'A', 'B', 'C'});
 173         test("EUC-JP",      new byte[] {'A', 'B', 'C'});
 174         test("SJIS",        new byte[] {'A', 'B', 'C'});
 175 
 176         test("SJIS",
 177              new byte[] { 'C', 'o', 'p',  'y',  'r', 'i', 'g',  'h', 't',
 178                           ' ', (byte)0xa9, ' ', '1', '9', '9',  '8' });
 179 
 180         test("SJIS",
 181              new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
 182                           (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
 183                           (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
 184                           (byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 });
 185 
 186         test("EUC-JP",
 187              new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9,
 188                           (byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca });
 189 
 190         test("SJIS",
 191              new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
 192                           (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
 193                           (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde});
 194 
 195         test("SJIS",
 196              new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
 197                           (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
 198                           (byte)0xc3, (byte)0xd1, (byte)0xbd });
 199 
 200         test("SJIS",
 201              new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa });
 202 
 203         test("EUC-JP",
 204              new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20});
 205 
 206         test("EUC-JP",
 207              new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
 208                           (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
 209                           (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
 210                           (byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 });
 211 
 212         test("ISO-2022-JP",
 213              new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' });
 214 
 215 
 216         //----------------------------------------------------------------
 217         // Check handling of ambiguous end-of-input in middle of first char
 218         //----------------------------------------------------------------
 219         {
 220             CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder();
 221             ByteBuffer bb = ByteBuffer.allocate(128);
 222             CharBuffer cb = CharBuffer.allocate(128);
 223             bb.put((byte)'A').put((byte)0x8f);
 224             bb.flip();
 225             CoderResult res = dc.decode(bb,cb,false);
 226             check(res.isUnderflow(), "isUnderflow");
 227             check(bb.position() == 1, "bb.position()");
 228             check(cb.position() == 1, "cb.position()");
 229             res = dc.decode(bb,cb,false);
 230             check(res.isUnderflow(), "isUnderflow");
 231             check(bb.position() == 1, "bb.position()");
 232             check(cb.position() == 1, "cb.position()");
 233             bb.compact();
 234             bb.put((byte)0xa1);
 235             bb.flip();
 236             res = dc.decode(bb,cb,true);
 237             check(res.isUnderflow(), "isUnderflow");
 238             check(bb.position() == 2, "bb.position()");
 239             check(cb.position() == 2, "cb.position()");
 240         }
 241 
 242 
 243         if (failures > 0)
 244             throw new RuntimeException(failures + " tests failed");
 245     }
 246 
 247     static void checkCoderResult(CoderResult result) {
 248         check(result.isUnderflow(),
 249               "Unexpected coder result: " + result);
 250     }
 251 
 252     static void test(String expectedCharset, byte[] input) throws Exception {
 253         Charset cs = Charset.forName("x-JISAutoDetect");
 254         CharsetDecoder autoDetect = cs.newDecoder();
 255 
 256         Charset cs2 = Charset.forName(expectedCharset);
 257         CharsetDecoder decoder = cs2.newDecoder();
 258 
 259         ByteBuffer bb = ByteBuffer.allocate(128);
 260         CharBuffer charOutput = CharBuffer.allocate(128);
 261         CharBuffer charExpected = CharBuffer.allocate(128);
 262 
 263         bb.put(input);
 264         bb.flip();
 265         bb.mark();
 266 
 267         CoderResult result = autoDetect.decode(bb, charOutput, true);
 268         checkCoderResult(result);
 269         charOutput.flip();
 270         String actual = charOutput.toString();
 271 
 272         bb.reset();
 273 
 274         result = decoder.decode(bb, charExpected, true);
 275         checkCoderResult(result);
 276         charExpected.flip();
 277         String expected = charExpected.toString();
 278 
 279         check(actual.equals(expected),
 280               String.format("actual=%s expected=%s", actual, expected));
 281     }
 282 }