1 /*
   2  * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /* @test
  25  * @bug 6371437 6371422 6371416 6371619 5058184 6371431 6639450 6569191 6577466 8212794 8220281
  26  * @summary Check if the problems reported in above bugs have been fixed
  27  * @modules jdk.charsets
  28  */
  29 
  30 import java.io.*;
  31 import java.nio.*;
  32 import java.nio.charset.*;
  33 import java.util.Arrays;
  34 import java.util.Locale;
  35 import java.util.HashSet;
  36 
  37 public class TestIBMBugs {
  38 
  39     private static void bug6371437() throws Exception {
  40         CharsetEncoder converter = Charset.forName("Cp933").newEncoder();
  41         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  42         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  43         CharBuffer in = CharBuffer.wrap(new char[] { (char)4352 });
  44         try {
  45               ByteBuffer out = converter.encode(in);
  46         } catch (CharacterCodingException e) { }
  47     }
  48 
  49     private static void bug6371422() throws Exception {
  50         String[] charsets = { "Cp949", "Cp949C" };
  51         for (int n = 0; n < charsets.length; n++) {
  52             String charset = charsets[n];
  53             CharsetEncoder converter = Charset.forName(charset).newEncoder();
  54             converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  55             converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  56             int errors = 0;
  57             for (int i = 1; i < 0x1ffff; i++) {
  58                 if (i >= 0x1100 && i <= 0x11f9)
  59                     continue;  //Dont try leading consonant, vowel and trailing
  60                                //consonant as a single char
  61                 char[] in = (i < 0x10000
  62                          ? new char[] { (char)i }
  63                              : new char[] { (char)(0xd800 + ((i - 0x10000) >> 10)),
  64                               (char)(0xdc00 + ((i - 0x10000) & 0x3ff)) });
  65 
  66                 try {
  67                     ByteBuffer out = converter.encode(CharBuffer.wrap(in));
  68                     if (out.remaining() == 0 ||
  69                         (out.remaining() == 1 && out.get(0) == 0x00)) {
  70                     errors++;
  71                     }
  72                 } catch (CharacterCodingException e) { }
  73             }
  74             if (errors > 0)
  75                 throw new Exception("Charset "+charset+": "+errors+" errors");
  76         }
  77     }
  78 
  79     private static void bug6371416() throws Exception {
  80         String[] charsets = { "Cp933", "Cp949", "Cp949C", "Cp970"};
  81         for (int n = 0; n < charsets.length; n++) {
  82             String charset = charsets[n];
  83             CharsetEncoder converter = Charset.forName(charset).newEncoder();
  84             converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  85             converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  86             int errors = 0;
  87             for (int i = 0xd800; i < 0xe000; i++) {
  88                 char[] in = new char[] { (char)i };
  89                 try {
  90                     ByteBuffer out = converter.encode(CharBuffer.wrap(in));
  91                     if (out.remaining() == 0)
  92                         errors++;
  93                 } catch (CharacterCodingException e) { }
  94             }
  95             if (errors > 0)
  96                 throw new Exception("Charset "+charset+": "+errors+" errors");
  97         }
  98     }
  99 
 100     private static void bug6371619() throws Exception {
 101         String encoding = "Cp964";
 102         Charset charset = Charset.forName(encoding);
 103         CharsetDecoder converter = charset.newDecoder();
 104         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
 105         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
 106         int errors = 0;
 107         for (int b = 0x80; b < 0x100; b++)
 108             if (!(b == 0x8e ||  // 0x8e is a SS2
 109                   (b >= 0x80 && b <= 0x8d) || (b >= 0x90 && b <= 0x9f))) {
 110                 ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b });
 111                 try {
 112                     CharBuffer out = converter.decode(in);
 113                     if (out.length() == 0) {
 114                         errors++;
 115                     }
 116                 } catch (CharacterCodingException e) { }
 117             }
 118         if (errors > 0)
 119             throw new Exception("Charset "+charset+": "+errors+" errors");
 120     }
 121 
 122 
 123     private static void bug6371431() throws Exception {
 124         String encoding = "Cp33722";
 125         Charset charset = Charset.forName(encoding);
 126         CharsetDecoder converter = charset.newDecoder();
 127         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
 128         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
 129         int errors = 0;
 130         for (int b = 0xa0; b < 0x100; b++) {
 131             ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b });
 132             try {
 133                 CharBuffer out = converter.decode(in);
 134                 if (out.length() == 0) {
 135                     errors++;
 136                 }
 137             } catch (CharacterCodingException e) { }
 138         }
 139         if (errors > 0)
 140             throw new Exception("Charset "+charset+": "+errors+" errors");
 141     }
 142 
 143     private static void bug6639450 () throws Exception {
 144         byte[] bytes1 = "\\".getBytes("IBM949");
 145         "\\".getBytes("IBM949C");
 146         byte[] bytes2 = "\\".getBytes("IBM949");
 147         if (bytes1.length != 1 || bytes2.length != 1 ||
 148             bytes1[0] != (byte)0x82 ||
 149             bytes2[0] != (byte)0x82)
 150         throw new Exception("IBM949/IBM949C failed");
 151     }
 152 
 153     private static void bug6569191 () throws Exception {
 154         byte[] bs = new byte[] { (byte)0x81, (byte)0xad,  // fffd ff6d
 155                                  (byte)0x81, (byte)0xae,  // fffd ff6e
 156                                  (byte)0x81, (byte)0xaf,  // fffd ff6f
 157                                  (byte)0x81, (byte)0xb0,  // fffd ff70
 158                                  (byte)0x85, (byte)0x81,  // fffd ->
 159                                  (byte)0x85, (byte)0x87,  // 2266 ->
 160                                  (byte)0x85, (byte)0xe0,  // 32a4 ->
 161                                  (byte)0x85, (byte)0xf0 };// 7165 fffd
 162         String s = new String(bs, "Cp943");
 163         // see DoubleByte for how the unmappables are handled
 164         if (!"\ufffd\uff6d\ufffd\uff6e\ufffd\uff6f\ufffd\uff70\ufffd\u2266\u32a4\u7165\ufffd"
 165             .equals(s))
 166             throw new Exception("Cp943 failed");
 167     }
 168 
 169 
 170     private static void bug6577466 () throws Exception {
 171         for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++){
 172             if (!Character.isDefined((char)c)) continue;
 173             String s = String.valueOf((char)c);
 174             byte[] bb = null;
 175             bb = s.getBytes("x-IBM970");
 176         }
 177     }
 178 
 179     private static void bug8213618 () throws Exception {
 180         String cs = "x-IBM970";
 181         byte[] ba = new byte[]{(byte)0xA2,(byte)0xC1};
 182         String s = "\u25C9";
 183         if (!(new String(ba, cs)).equals(s))
 184             throw new Exception("Cp970 failed");
 185         if (!Arrays.equals(ba, s.getBytes(cs)))
 186             throw new Exception("Cp970 failed");
 187         ba = new byte[]{0x3f,0x3f,0x3f};
 188         if (!Arrays.equals(ba, "\u6950\u84f1\ucf7f".getBytes(cs)))
 189             throw new Exception("Cp970 failed");
 190     }
 191 
 192     private static void bug8202329() throws Exception {
 193         String original = "\\\u007E\u00A5\u203E"; // [backslash][tilde][yen][overscore]
 194         byte[] expectedBytes; // bytes after conversion
 195         String expectedStringfromBytes; // String constructed from bytes
 196 
 197         Charset charset; // charset used for conversion
 198 
 199         ByteBuffer bb; // Buffer that holds encoded bytes
 200         byte[]  ba; // byte array that holds encoded bytes
 201 
 202         CharBuffer cb; // Buffer that holds decoded chars
 203 
 204 
 205         // Test IBM943, where \ and ~ are encoded to unmappable i.e., 0x3f
 206         // and [yen] and [overscore] are encoded to 0x5c and 0x7e
 207         charset = Charset.forName("IBM943");
 208         expectedBytes = new byte[] {0x3f, 0x3f, 0x5c, 0x7e};
 209         expectedStringfromBytes = "??\u00A5\u203E";
 210         bb = charset.encode(original);
 211         ba = new byte[bb.remaining()];
 212         bb.get(ba, 0, ba.length);
 213         if(!Arrays.equals(ba, expectedBytes)) {
 214             throw new Exception("IBM943 failed to encode");
 215         }
 216         cb = charset.decode(ByteBuffer.wrap(expectedBytes));
 217         if(!cb.toString().equals(expectedStringfromBytes)) {
 218             throw new Exception("IBM943 failed to decode");
 219         }
 220 
 221 
 222         // Test IBM943C, where \ and ~ are encoded to 0x5c and 0x7e
 223         // and [yen] and [overscore] are encoded to 0x5c and 0x7e
 224         charset = Charset.forName("IBM943C");
 225         expectedBytes = new byte[] {0x5c, 0x7e, 0x5c, 0x7e};
 226         expectedStringfromBytes = "\\~\\~";
 227         bb = charset.encode(original);
 228         ba = new byte[bb.remaining()];
 229         bb.get(ba, 0, ba.length);
 230         if(!Arrays.equals(ba, expectedBytes)) {
 231             throw new Exception("IBM943C failed to encode");
 232         }
 233         cb = charset.decode(ByteBuffer.wrap(expectedBytes));
 234         if(!cb.toString().equals(expectedStringfromBytes)) {
 235             throw new Exception("IBM943C failed to decode");
 236         }
 237     }
 238 
 239     private static void bug8212794 () throws Exception {
 240         Charset cs = Charset.forName("x-IBM964");
 241         byte[] ba = new byte[] {(byte)0x5c, (byte)0x90, (byte)0xa1, (byte)0xa1};
 242         char[] ca = new char[] {'\\', '\u0090', '\u3000'};
 243         ByteBuffer bb = ByteBuffer.wrap(ba);
 244         CharBuffer cb = cs.decode(bb);
 245         if(!Arrays.equals(ca, Arrays.copyOf(cb.array(), cb.limit()))) {
 246             throw new Exception("IBM964 failed to decode");
 247         }
 248         cb = CharBuffer.wrap(ca);
 249         bb = cs.encode(cb);
 250         if(!Arrays.equals(ba, Arrays.copyOf(bb.array(), bb.limit()))) {
 251             throw new Exception("IBM964 failed to encode");
 252         }
 253     }
 254 
 255     private static void bug8220281 () throws Exception {
 256         if (System.getProperty("os.name").contains("AIX")) {
 257             /* Following AIX codesets are used for Java default charset. */
 258             /* They should be in sun.nio.cs package on AIX platform.     */
 259             String[] codesets = new String[] {
 260                 "IBM-950", "BIG5-HKSCS", "GB18030", "IBM-1046",
 261                 "IBM-1124", "IBM-1129", "IBM-1252", "IBM-856",
 262                 "IBM-858", "IBM-921", "IBM-922", "IBM-932", "IBM-943C",
 263                 "IBM-eucCN", "IBM-eucJP", "IBM-eucKR", "IBM-eucTW",
 264                 "ISO8859-1", "ISO8859-15", "ISO8859-2", "ISO8859-4",
 265                 "ISO8859-5", "ISO8859-6", "ISO8859-7", "ISO8859-8",
 266                 "ISO8859-9", "TIS-620", "UTF-8", };
 267             String[] charsets = new String[] {
 268                 "x-IBM950", "Big5-HKSCS", "GB18030", "x-IBM1046",
 269                 "x-IBM1124", "x-IBM1129", "windows-1252", "x-IBM856",
 270                 "IBM00858", "x-IBM921", "x-IBM922", "x-IBM942C",
 271                 "x-IBM943C", "x-IBM1383", "x-IBM29626C", "x-IBM970",
 272                 "x-IBM964", "ISO-8859-1", "ISO-8859-15", "ISO-8859-2",
 273                 "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7",
 274                 "ISO-8859-8", "ISO-8859-9", "TIS-620", "UTF-8", };
 275             for(int i = 0; i < codesets.length; i++) {
 276                 Charset cs0 = Charset.forName(codesets[i]);
 277                 if (!"sun.nio.cs".equals(cs0.getClass().getPackage().getName())) {
 278                     throw new Exception(cs0.getClass().getCanonicalName()+" faild");
 279                 }
 280                 Charset cs1 = Charset.forName(charsets[i]);
 281                 if (!cs0.equals(cs1)) {
 282                     throw new Exception(codesets[i]+"("+cs0.name()+") failed");
 283                 }
 284             }
 285         }
 286         for(Charset cs : Charset.availableCharsets().values()) {
 287             String csName = cs.name().toLowerCase(Locale.ROOT);
 288             String suffix = null;
 289             HashSet<String> aliases = new HashSet<String>();
 290             for(String s : cs.aliases()) {
 291                 aliases.add(s.toLowerCase(Locale.ROOT));
 292             }
 293             aliases.add(csName);
 294             if (csName.startsWith("x-ibm-")) {
 295                 suffix = csName.replaceAll("x-ibm-0*", "");
 296             } else if (csName.startsWith("x-ibm")) {
 297                 suffix = csName.replaceAll("x-ibm0*", "");
 298             } else if (csName.startsWith("ibm-")) {
 299                 suffix = csName.replaceAll("ibm-0*", "");
 300             } else if (csName.startsWith("ibm")) {
 301                 suffix = csName.replaceAll("ibm0*", "");
 302             }
 303             if ("ibm-thai".equals(csName)) {
 304                 suffix = "838";
 305             }
 306             if (null != suffix) {
 307                 while (suffix.length() < 3) {
 308                     suffix = "0"+suffix;
 309                 }
 310                 if (!aliases.contains("cp"+suffix)) {
 311                     throw new Exception(cs.name()+"\t"+"cp"+suffix);
 312                 }
 313                 if (!aliases.contains("ibm"+suffix)) {
 314                     throw new Exception(cs.name()+"\t"+"ibm"+suffix);
 315                 }
 316                 if (!aliases.contains("ibm-"+suffix)) {
 317                     throw new Exception(cs.name()+"\t"+"ibm-"+suffix);
 318                 }
 319                 if (!aliases.contains(suffix)) {
 320                     throw new Exception(cs.name()+"\t"+suffix);
 321                 }
 322             }
 323         }
 324     }
 325 
 326     public static void main (String[] args) throws Exception {
 327         bug6577466();
 328         // need to be tested before any other IBM949C test case
 329         bug6639450();
 330         bug6371437();
 331         bug6371422();
 332         bug6371416();
 333         bug6371619();
 334         bug6371431();
 335         bug6569191();
 336         bug8202329();
 337         bug8212794();
 338         bug8213618();
 339         bug8220281();
 340     }
 341 }