8213618: IBM970 charset has missing entry and remove unexpected entries
Reviewed-by: srl, martin

   1 /*
   2  * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /* @test
  25  * @bug 6371437 6371422 6371416 6371619 5058184 6371431 6639450 6569191 6577466
  26  * @summary Check if the problems reported in above bugs have been fixed
  27  * @modules jdk.charsets
  28  */
  29 
  30 import java.io.*;
  31 import java.nio.*;
  32 import java.nio.charset.*;
  33 import java.util.Arrays;
  34 
  35 public class TestIBMBugs {
  36 
  37     private static void bug6371437() throws Exception {
  38         CharsetEncoder converter = Charset.forName("Cp933").newEncoder();
  39         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  40         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  41         CharBuffer in = CharBuffer.wrap(new char[] { (char)4352 });
  42         try {
  43               ByteBuffer out = converter.encode(in);
  44         } catch (CharacterCodingException e) { }
  45     }
  46 
  47     private static void bug6371422() throws Exception {
  48         String[] charsets = { "Cp949", "Cp949C" };
  49         for (int n = 0; n < charsets.length; n++) {
  50             String charset = charsets[n];
  51             CharsetEncoder converter = Charset.forName(charset).newEncoder();
  52             converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  53             converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  54             int errors = 0;
  55             for (int i = 1; i < 0x1ffff; i++) {
  56                 if (i >= 0x1100 && i <= 0x11f9)
  57                     continue;  //Dont try leading consonant, vowel and trailing
  58                                //consonant as a single char
  59                 char[] in = (i < 0x10000
  60                          ? new char[] { (char)i }
  61                              : new char[] { (char)(0xd800 + ((i - 0x10000) >> 10)),
  62                               (char)(0xdc00 + ((i - 0x10000) & 0x3ff)) });
  63 
  64                 try {
  65                     ByteBuffer out = converter.encode(CharBuffer.wrap(in));
  66                     if (out.remaining() == 0 ||
  67                         (out.remaining() == 1 && out.get(0) == 0x00)) {
  68                     errors++;
  69                     }
  70                 } catch (CharacterCodingException e) { }
  71             }
  72             if (errors > 0)
  73                 throw new Exception("Charset "+charset+": "+errors+" errors");
  74         }
  75     }
  76 
  77     private static void bug6371416() throws Exception {
  78         String[] charsets = { "Cp933", "Cp949", "Cp949C", "Cp970"};
  79         for (int n = 0; n < charsets.length; n++) {
  80             String charset = charsets[n];
  81             CharsetEncoder converter = Charset.forName(charset).newEncoder();
  82             converter = converter.onMalformedInput(CodingErrorAction.REPORT);
  83             converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
  84             int errors = 0;
  85             for (int i = 0xd800; i < 0xe000; i++) {
  86                 char[] in = new char[] { (char)i };
  87                 try {
  88                     ByteBuffer out = converter.encode(CharBuffer.wrap(in));
  89                     if (out.remaining() == 0)
  90                         errors++;
  91                 } catch (CharacterCodingException e) { }
  92             }
  93             if (errors > 0)
  94                 throw new Exception("Charset "+charset+": "+errors+" errors");
  95         }
  96     }
  97 
  98     private static void bug6371619() throws Exception {
  99         String encoding = "Cp964";
 100         Charset charset = Charset.forName(encoding);
 101         CharsetDecoder converter = charset.newDecoder();
 102         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
 103         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
 104         int errors = 0;
 105         for (int b = 0x80; b < 0x100; b++)
 106             if (!(b == 0x8e ||  // 0x8e is a SS2
 107                   (b >= 0x80 && b <= 0x8d) || (b >= 0x90 && b <= 0x9f))) {
 108                 ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b });
 109                 try {
 110                     CharBuffer out = converter.decode(in);
 111                     if (out.length() == 0) {
 112                         errors++;
 113                     }
 114                 } catch (CharacterCodingException e) { }
 115             }
 116         if (errors > 0)
 117             throw new Exception("Charset "+charset+": "+errors+" errors");
 118     }
 119 
 120 
 121     private static void bug6371431() throws Exception {
 122         String encoding = "Cp33722";
 123         Charset charset = Charset.forName(encoding);
 124         CharsetDecoder converter = charset.newDecoder();
 125         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
 126         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
 127         int errors = 0;
 128         for (int b = 0xa0; b < 0x100; b++) {
 129             ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b });
 130             try {
 131                 CharBuffer out = converter.decode(in);
 132                 if (out.length() == 0) {
 133                     errors++;
 134                 }
 135             } catch (CharacterCodingException e) { }
 136         }
 137         if (errors > 0)
 138             throw new Exception("Charset "+charset+": "+errors+" errors");
 139     }
 140 
 141     private static void bug6639450 () throws Exception {
 142         byte[] bytes1 = "\\".getBytes("IBM949");
 143         "\\".getBytes("IBM949C");
 144         byte[] bytes2 = "\\".getBytes("IBM949");
 145         if (bytes1.length != 1 || bytes2.length != 1 ||
 146             bytes1[0] != (byte)0x82 ||
 147             bytes2[0] != (byte)0x82)
 148         throw new Exception("IBM949/IBM949C failed");
 149     }
 150 
 151     private static void bug6569191 () throws Exception {
 152         byte[] bs = new byte[] { (byte)0x81, (byte)0xad,  // fffd ff6d
 153                                  (byte)0x81, (byte)0xae,  // fffd ff6e
 154                                  (byte)0x81, (byte)0xaf,  // fffd ff6f
 155                                  (byte)0x81, (byte)0xb0,  // fffd ff70
 156                                  (byte)0x85, (byte)0x81,  // fffd ->
 157                                  (byte)0x85, (byte)0x87,  // 2266 ->
 158                                  (byte)0x85, (byte)0xe0,  // 32a4 ->
 159                                  (byte)0x85, (byte)0xf0 };// 7165 fffd
 160         String s = new String(bs, "Cp943");
 161         // see DoubleByte for how the unmappables are handled
 162         if (!"\ufffd\uff6d\ufffd\uff6e\ufffd\uff6f\ufffd\uff70\ufffd\u2266\u32a4\u7165\ufffd"
 163             .equals(s))
 164             throw new Exception("Cp943 failed");
 165     }
 166 
 167 
 168     private static void bug6577466 () throws Exception {
 169         for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++){
 170             if (!Character.isDefined((char)c)) continue;
 171             String s = String.valueOf((char)c);
 172             byte[] bb = null;
 173             bb = s.getBytes("x-IBM970");
 174         }
 175     }
 176 
 177     private static void bug8213618 () throws Exception {
 178         String cs = "x-IBM970";
 179         byte[] ba = new byte[]{(byte)0xA2,(byte)0xC1};
 180         String s = "\u25C9";
 181         if (!(new String(ba, cs)).equals(s))
 182             throw new Exception("Cp970 failed");
 183         if (!Arrays.equals(ba, s.getBytes(cs)))
 184             throw new Exception("Cp970 failed");
 185         ba = new byte[]{0x3f,0x3f,0x3f};
 186         if (!Arrays.equals(ba, "\u6950\u84f1\ucf7f".getBytes(cs)))
 187             throw new Exception("Cp970 failed");
 188     }
 189 
 190     private static void bug8202329() throws Exception {
 191         String original = "\\\u007E\u00A5\u203E"; // [backslash][tilde][yen][overscore]
 192         byte[] expectedBytes; // bytes after conversion
 193         String expectedStringfromBytes; // String constructed from bytes
 194 
 195         Charset charset; // charset used for conversion
 196 
 197         ByteBuffer bb; // Buffer that holds encoded bytes
 198         byte[]  ba; // byte array that holds encoded bytes
 199 
 200         CharBuffer cb; // Buffer that holds decoded chars
 201 
 202 
 203         // Test IBM943, where \ and ~ are encoded to unmappable i.e., 0x3f
 204         // and [yen] and [overscore] are encoded to 0x5c and 0x7e
 205         charset = Charset.forName("IBM943");
 206         expectedBytes = new byte[] {0x3f, 0x3f, 0x5c, 0x7e};
 207         expectedStringfromBytes = "??\u00A5\u203E";
 208         bb = charset.encode(original);
 209         ba = new byte[bb.remaining()];
 210         bb.get(ba, 0, ba.length);
 211         if(!Arrays.equals(ba, expectedBytes)) {
 212             throw new Exception("IBM943 failed to encode");
 213         }
 214         cb = charset.decode(ByteBuffer.wrap(expectedBytes));
 215         if(!cb.toString().equals(expectedStringfromBytes)) {
 216             throw new Exception("IBM943 failed to decode");
 217         }
 218 
 219 
 220         // Test IBM943C, where \ and ~ are encoded to 0x5c and 0x7e
 221         // and [yen] and [overscore] are encoded to 0x5c and 0x7e
 222         charset = Charset.forName("IBM943C");
 223         expectedBytes = new byte[] {0x5c, 0x7e, 0x5c, 0x7e};
 224         expectedStringfromBytes = "\\~\\~";
 225         bb = charset.encode(original);
 226         ba = new byte[bb.remaining()];
 227         bb.get(ba, 0, ba.length);
 228         if(!Arrays.equals(ba, expectedBytes)) {
 229             throw new Exception("IBM943C failed to encode");
 230         }
 231         cb = charset.decode(ByteBuffer.wrap(expectedBytes));
 232         if(!cb.toString().equals(expectedStringfromBytes)) {
 233             throw new Exception("IBM943C failed to decode");
 234         }
 235     }
 236 
 237     public static void main (String[] args) throws Exception {
 238         bug6577466();
 239         // need to be tested before any other IBM949C test case
 240         bug6639450();
 241         bug6371437();
 242         bug6371422();
 243         bug6371416();
 244         bug6371619();
 245         bug6371431();
 246         bug6569191();
 247         bug8202329();
 248         bug8213618();
 249     }
 250 }
--- EOF ---