1 /*
   2  * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /* @test
  25  * @bug 6371437 6371422 6371416 6371619 5058184 6371431 6639450 6569191 6577466 8212794 8220281 8235834
  26  * @summary Check if the problems reported in above bugs have been fixed
  27  * @modules jdk.charsets
  28  */
  29 
  30 import java.io.*;
  31 import java.nio.*;
  32 import java.nio.charset.*;
  33 import java.util.Arrays;
  34 import java.util.Locale;
  35 import java.util.HashSet;
  36 
  37 public class TestIBMBugs {
  38 
  39     private static void bug6371437() throws Exception {
  40         CharsetEncoder converter = Charset.forName("Cp933").newEncoder();
  41         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  42         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  43         CharBuffer in = CharBuffer.wrap(new char[] { (char)4352 });
  44         try {
  45               ByteBuffer out = converter.encode(in);
  46         } catch (CharacterCodingException e) { }
  47     }
  48 
  49     private static void bug6371422() throws Exception {
  50         String[] charsets = { "Cp949", "Cp949C" };
  51         for (int n = 0; n < charsets.length; n++) {
  52             String charset = charsets[n];
  53             CharsetEncoder converter = Charset.forName(charset).newEncoder();
  54             converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  55             converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  56             int errors = 0;
  57             for (int i = 1; i < 0x1ffff; i++) {
  58                 if (i >= 0x1100 && i <= 0x11f9)
  59                     continue;  //Dont try leading consonant, vowel and trailing
  60                                //consonant as a single char
  61                 char[] in = (i < 0x10000
  62                          ? new char[] { (char)i }
  63                              : new char[] { (char)(0xd800 + ((i - 0x10000) >> 10)),
  64                               (char)(0xdc00 + ((i - 0x10000) & 0x3ff)) });
  65 
  66                 try {
  67                     ByteBuffer out = converter.encode(CharBuffer.wrap(in));
  68                     if (out.remaining() == 0 ||
  69                         (out.remaining() == 1 && out.get(0) == 0x00)) {
  70                     errors++;
  71                     }
  72                 } catch (CharacterCodingException e) { }
  73             }
  74             if (errors > 0)
  75                 throw new Exception("Charset "+charset+": "+errors+" errors");
  76         }
  77     }
  78 
  79     private static void bug6371416() throws Exception {
  80         String[] charsets = { "Cp933", "Cp949", "Cp949C", "Cp970"};
  81         for (int n = 0; n < charsets.length; n++) {
  82             String charset = charsets[n];
  83             CharsetEncoder converter = Charset.forName(charset).newEncoder();
  84             converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  85             converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  86             int errors = 0;
  87             for (int i = 0xd800; i < 0xe000; i++) {
  88                 char[] in = new char[] { (char)i };
  89                 try {
  90                     ByteBuffer out = converter.encode(CharBuffer.wrap(in));
  91                     if (out.remaining() == 0)
  92                         errors++;
  93                 } catch (CharacterCodingException e) { }
  94             }
  95             if (errors > 0)
  96                 throw new Exception("Charset "+charset+": "+errors+" errors");
  97         }
  98     }
  99 
 100     private static void bug6371619() throws Exception {
 101         String encoding = "Cp964";
 102         Charset charset = Charset.forName(encoding);
 103         CharsetDecoder converter = charset.newDecoder();
 104         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
 105         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
 106         int errors = 0;
 107         for (int b = 0x80; b < 0x100; b++)
 108             if (!(b == 0x8e ||  // 0x8e is a SS2
 109                   (b >= 0x80 && b <= 0x8d) || (b >= 0x90 && b <= 0x9f))) {
 110                 ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b });
 111                 try {
 112                     CharBuffer out = converter.decode(in);
 113                     if (out.length() == 0) {
 114                         errors++;
 115                     }
 116                 } catch (CharacterCodingException e) { }
 117             }
 118         if (errors > 0)
 119             throw new Exception("Charset "+charset+": "+errors+" errors");
 120     }
 121 
 122 
 123     private static void bug6371431() throws Exception {
 124         String encoding = "Cp33722";
 125         Charset charset = Charset.forName(encoding);
 126         CharsetDecoder converter = charset.newDecoder();
 127         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
 128         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
 129         int errors = 0;
 130         for (int b = 0xa0; b < 0x100; b++) {
 131             ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b });
 132             try {
 133                 CharBuffer out = converter.decode(in);
 134                 if (out.length() == 0) {
 135                     errors++;
 136                 }
 137             } catch (CharacterCodingException e) { }
 138         }
 139         if (errors > 0)
 140             throw new Exception("Charset "+charset+": "+errors+" errors");
 141     }
 142 
 143     private static void bug6639450 () throws Exception {
 144         byte[] bytes1 = "\\".getBytes("IBM949");
 145         "\\".getBytes("IBM949C");
 146         byte[] bytes2 = "\\".getBytes("IBM949");
 147         if (bytes1.length != 1 || bytes2.length != 1 ||
 148             bytes1[0] != (byte)0x82 ||
 149             bytes2[0] != (byte)0x82)
 150         throw new Exception("IBM949/IBM949C failed");
 151     }
 152 
 153     private static void bug6569191 () throws Exception {
 154         byte[] bs = new byte[] { (byte)0x81, (byte)0xad,  // fffd ff6d
 155                                  (byte)0x81, (byte)0xae,  // fffd ff6e
 156                                  (byte)0x81, (byte)0xaf,  // fffd ff6f
 157                                  (byte)0x81, (byte)0xb0,  // fffd ff70
 158                                  (byte)0x85, (byte)0x81,  // fffd ->
 159                                  (byte)0x85, (byte)0x87,  // 2266 ->
 160                                  (byte)0x85, (byte)0xe0,  // 32a4 ->
 161                                  (byte)0x85, (byte)0xf0 };// 7165 fffd
 162         String s = new String(bs, "Cp943");
 163         // see DoubleByte for how the unmappables are handled
 164         if (!"\ufffd\uff6d\ufffd\uff6e\ufffd\uff6f\ufffd\uff70\ufffd\u2266\u32a4\u7165\ufffd"
 165             .equals(s))
 166             throw new Exception("Cp943 failed");
 167     }
 168 
 169 
 170     private static void bug6577466 () throws Exception {
 171         for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++){
 172             if (!Character.isDefined((char)c)) continue;
 173             String s = String.valueOf((char)c);
 174             byte[] bb = null;
 175             bb = s.getBytes("x-IBM970");
 176         }
 177     }
 178 
 179     private static void bug8213618 () throws Exception {
 180         String cs = "x-IBM970";
 181         byte[] ba = new byte[]{(byte)0xA2,(byte)0xC1};
 182         String s = "\u25C9";
 183         if (!(new String(ba, cs)).equals(s))
 184             throw new Exception("Cp970 failed");
 185         if (!Arrays.equals(ba, s.getBytes(cs)))
 186             throw new Exception("Cp970 failed");
 187         ba = new byte[]{0x3f,0x3f,0x3f};
 188         if (!Arrays.equals(ba, "\u6950\u84f1\ucf7f".getBytes(cs)))
 189             throw new Exception("Cp970 failed");
 190     }
 191 
 192     private static void bug8202329() throws Exception {
 193         String original = "\\\u007E\u00A5\u203E"; // [backslash][tilde][yen][overscore]
 194         byte[] expectedBytes; // bytes after conversion
 195         String expectedStringfromBytes; // String constructed from bytes
 196 
 197         Charset charset; // charset used for conversion
 198 
 199         ByteBuffer bb; // Buffer that holds encoded bytes
 200         byte[]  ba; // byte array that holds encoded bytes
 201 
 202         CharBuffer cb; // Buffer that holds decoded chars
 203 
 204 
 205         // Test IBM943, where \ and ~ are encoded to unmappable i.e., 0x3f
 206         // and [yen] and [overscore] are encoded to 0x5c and 0x7e
 207         charset = Charset.forName("IBM943");
 208         expectedBytes = new byte[] {0x3f, 0x3f, 0x5c, 0x7e};
 209         expectedStringfromBytes = "??\u00A5\u203E";
 210         bb = charset.encode(original);
 211         ba = new byte[bb.remaining()];
 212         bb.get(ba, 0, ba.length);
 213         if(!Arrays.equals(ba, expectedBytes)) {
 214             throw new Exception("IBM943 failed to encode");
 215         }
 216         cb = charset.decode(ByteBuffer.wrap(expectedBytes));
 217         if(!cb.toString().equals(expectedStringfromBytes)) {
 218             throw new Exception("IBM943 failed to decode");
 219         }
 220 
 221 
 222         // Test IBM943C, where \ and ~ are encoded to 0x5c and 0x7e
 223         // and [yen] and [overscore] are encoded to 0x5c and 0x7e
 224         charset = Charset.forName("IBM943C");
 225         expectedBytes = new byte[] {0x5c, 0x7e, 0x5c, 0x7e};
 226         expectedStringfromBytes = "\\~\\~";
 227         bb = charset.encode(original);
 228         ba = new byte[bb.remaining()];
 229         bb.get(ba, 0, ba.length);
 230         if(!Arrays.equals(ba, expectedBytes)) {
 231             throw new Exception("IBM943C failed to encode");
 232         }
 233         cb = charset.decode(ByteBuffer.wrap(expectedBytes));
 234         if(!cb.toString().equals(expectedStringfromBytes)) {
 235             throw new Exception("IBM943C failed to decode");
 236         }
 237     }
 238 
 239     private static void bug8212794 () throws Exception {
 240         Charset cs = Charset.forName("x-IBM964");
 241         byte[] ba = new byte[] {(byte)0x5c, (byte)0x90, (byte)0xa1, (byte)0xa1};
 242         char[] ca = new char[] {'\\', '\u0090', '\u3000'};
 243         ByteBuffer bb = ByteBuffer.wrap(ba);
 244         CharBuffer cb = cs.decode(bb);
 245         if(!Arrays.equals(ca, Arrays.copyOf(cb.array(), cb.limit()))) {
 246             throw new Exception("IBM964 failed to decode");
 247         }
 248         cb = CharBuffer.wrap(ca);
 249         bb = cs.encode(cb);
 250         if(!Arrays.equals(ba, Arrays.copyOf(bb.array(), bb.limit()))) {
 251             throw new Exception("IBM964 failed to encode");
 252         }
 253     }
 254 
 255     private static void bug8220281 () throws Exception {
 256         if (System.getProperty("os.name").contains("AIX")) {
 257             /* Following AIX codesets are used for Java default charset. */
 258             /* They should be in sun.nio.cs package on AIX platform.     */
 259             String[] codesets = new String[] {
 260                 "IBM-950", "BIG5-HKSCS", "GB18030", "IBM-1046",
 261                 "IBM-1124", "IBM-1129", "IBM-1252", "IBM-856",
 262                 "IBM-858", "IBM-921", "IBM-922", "IBM-932", "IBM-943C",
 263                 "IBM-eucCN", "IBM-eucJP", "IBM-eucKR", "IBM-eucTW",
 264                 "ISO8859-1", "ISO8859-15", "ISO8859-2", "ISO8859-4",
 265                 "ISO8859-5", "ISO8859-6", "ISO8859-7", "ISO8859-8",
 266                 "ISO8859-9", "TIS-620", "UTF-8", };
 267             String[] charsets = new String[] {
 268                 "x-IBM950", "Big5-HKSCS", "GB18030", "x-IBM1046",
 269                 "x-IBM1124", "x-IBM1129", "windows-1252", "x-IBM856",
 270                 "IBM00858", "x-IBM921", "x-IBM922", "x-IBM942C",
 271                 "x-IBM943C", "x-IBM1383", "x-IBM29626C", "x-IBM970",
 272                 "x-IBM964", "ISO-8859-1", "ISO-8859-15", "ISO-8859-2",
 273                 "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7",
 274                 "ISO-8859-8", "ISO-8859-9", "TIS-620", "UTF-8", };
 275             for(int i = 0; i < codesets.length; i++) {
 276                 Charset cs0 = Charset.forName(codesets[i]);
 277                 if (!"sun.nio.cs".equals(cs0.getClass().getPackage().getName())) {
 278                     throw new Exception(cs0.getClass().getCanonicalName()+" faild");
 279                 }
 280                 Charset cs1 = Charset.forName(charsets[i]);
 281                 if (!cs0.equals(cs1)) {
 282                     throw new Exception(codesets[i]+"("+cs0.name()+") failed");
 283                 }
 284             }
 285         }
 286         for(Charset cs : Charset.availableCharsets().values()) {
 287             String csName = cs.name().toLowerCase(Locale.ROOT);
 288             String suffix = null;
 289             HashSet<String> aliases = new HashSet<String>();
 290             for(String s : cs.aliases()) {
 291                 aliases.add(s.toLowerCase(Locale.ROOT));
 292             }
 293             aliases.add(csName);
 294             if (csName.startsWith("x-ibm-")) {
 295                 suffix = csName.replaceAll("x-ibm-0*", "");
 296             } else if (csName.startsWith("x-ibm")) {
 297                 suffix = csName.replaceAll("x-ibm0*", "");
 298             } else if (csName.startsWith("ibm-")) {
 299                 suffix = csName.replaceAll("ibm-0*", "");
 300             } else if (csName.startsWith("ibm")) {
 301                 suffix = csName.replaceAll("ibm0*", "");
 302             }
 303             if ("ibm-thai".equals(csName)) {
 304                 suffix = "838";
 305             }
 306             if (null != suffix) {
 307                 while (suffix.length() < 3) {
 308                     suffix = "0"+suffix;
 309                 }
 310                 if (!aliases.contains("cp"+suffix)) {
 311                     throw new Exception(cs.name()+"\t"+"cp"+suffix);
 312                 }
 313                 if (!aliases.contains("ibm"+suffix)) {
 314                     throw new Exception(cs.name()+"\t"+"ibm"+suffix);
 315                 }
 316                 if (!aliases.contains("ibm-"+suffix)) {
 317                     throw new Exception(cs.name()+"\t"+"ibm-"+suffix);
 318                 }
 319                 if (!aliases.contains(suffix)) {
 320                     throw new Exception(cs.name()+"\t"+suffix);
 321                 }
 322             }
 323         }
 324     }
 325 
 326     private static void bug8235834 () throws Exception {
 327         byte[] byteIBM943c2b = new byte[] {
 328             (byte)0x81, (byte)0x5C, (byte)0x81, (byte)0x60,
 329             (byte)0x81, (byte)0x61, (byte)0x81, (byte)0x7C,
 330             (byte)0x88, (byte)0xA0, (byte)0x89, (byte)0x8B,
 331             (byte)0x89, (byte)0xA8, (byte)0x8A, (byte)0x9A,
 332             (byte)0x8B, (byte)0xA0, (byte)0x8B, (byte)0xEB,
 333             (byte)0x8C, (byte)0x71, (byte)0x8C, (byte)0x74,
 334             (byte)0x8C, (byte)0xB2, (byte)0x8D, (byte)0x8D,
 335             (byte)0x8D, (byte)0xF2, (byte)0x8E, (byte)0xC6,
 336             (byte)0x8F, (byte)0x4A, (byte)0x8F, (byte)0xD3,
 337             (byte)0x8F, (byte)0xDD, (byte)0x90, (byte)0xE4,
 338             (byte)0x91, (byte)0x7E, (byte)0x91, (byte)0x89,
 339             (byte)0x91, (byte)0xCB, (byte)0x92, (byte)0x5C,
 340             (byte)0x92, (byte)0xCD, (byte)0x93, (byte)0x55,
 341             (byte)0x93, (byte)0x5E, (byte)0x93, (byte)0x98,
 342             (byte)0x93, (byte)0xC0, (byte)0x94, (byte)0x58,
 343             (byte)0x94, (byte)0x8D, (byte)0x94, (byte)0xAC,
 344             (byte)0x94, (byte)0xAE, (byte)0x96, (byte)0x6A,
 345             (byte)0x96, (byte)0xCB, (byte)0x97, (byte)0x89,
 346             (byte)0x98, (byte)0x58, (byte)0x9B, (byte)0xA0,
 347             (byte)0x9D, (byte)0xB7, (byte)0x9E, (byte)0x94,
 348             (byte)0xE3, (byte)0x79, (byte)0xE4, (byte)0x45,
 349             (byte)0xE8, (byte)0xF6, (byte)0xFA, (byte)0x55,
 350             (byte)0xFA, (byte)0x59, 
 351         };
 352         String strIBM943c2b1 =
 353             "\u2015\uFF5E\u2225\uFF0D\u555E\u7130\u9DD7\u5699" +
 354             "\u4FE0\u8EC0\u7E6B\u8346\u9E7C\u9EB4\u6805\u5C62" +
 355             "\u7E61\u8523\u91AC\u87EC\u6414\u7626\u9A52\u7C1E" +
 356             "\u6451\u5861\u985A\u79B1\u7006\u56CA\u525D\u6F51" +
 357             "\u91B1\u9830\u9EB5\u840A\u881F\u5C5B\u6522\u688E" +
 358             "\u7E48\u8141\u9839\uFFE4\uF86F";
 359         String strIBM943c2b2 =
 360             "\u2014\u301C\u2016\u2212\u5516\u7114\u9D0E\u565B" +
 361             "\u4FA0\u8EAF\u7E4B\u834A\u9E78\u9EB9\u67F5\u5C61" +
 362             "\u7E4D\u848B\u91A4\u8749\u63BB\u75E9\u9A28\u7BAA" +
 363             "\u63B4\u586B\u985B\u7977\u6D9C\u56A2\u5265\u6E8C" +
 364             "\u9197\u982C\u9EBA\u83B1\u874B\u5C4F\u6505\u688D" +
 365             "\u7E66\u80FC\u983D\u00A6\u2116";
 366         Charset csIBM943 = Charset.forName("x-IBM943");
 367         if (!Arrays.equals(byteIBM943c2b, strIBM943c2b1.getBytes(csIBM943))) {
 368             throw new Exception(csIBM943.name()+" failed to encode");
 369         }
 370         if (!strIBM943c2b2.equals(new String(byteIBM943c2b, csIBM943))) {
 371             throw new Exception(csIBM943.name()+" failed to round-tip conversion");
 372         }
 373         Charset csIBM943C = Charset.forName("x-IBM943C");
 374         if (!Arrays.equals(byteIBM943c2b, strIBM943c2b1.getBytes(csIBM943C))) {
 375             throw new Exception(csIBM943C.name()+" failed to encode");
 376         }
 377         if (!strIBM943c2b2.equals(new String(byteIBM943c2b, csIBM943C))) {
 378             throw new Exception(csIBM943C.name()+" failed to round-tip conversion");
 379         }
 380     }
 381 
 382     public static void main (String[] args) throws Exception {
 383         bug6577466();
 384         // need to be tested before any other IBM949C test case
 385         bug6639450();
 386         bug6371437();
 387         bug6371422();
 388         bug6371416();
 389         bug6371619();
 390         bug6371431();
 391         bug6569191();
 392         bug8202329();
 393         bug8212794();
 394         bug8213618();
 395         bug8220281();
 396         bug8235834();
 397     }
 398 }