1 /* 2 * Copyright 1997-1999 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 package sun.io; 26 27 public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter 28 { 29 30 private static final int SBCS = 0; 31 private static final int DBCS = 1; 32 33 private static final byte SO = 0x0e; 34 private static final byte SI = 0x0f; 35 36 private int currentState; 37 private char highHalfZoneCode; 38 private byte[] outputByte = new byte[2]; 39 40 protected short index1[]; 41 protected String index2; 42 protected String index2a; 43 protected int mask1; 44 protected int mask2; 45 protected int shift; 46 47 48 public CharToByteDBCS_EBCDIC() { 49 super(); 50 highHalfZoneCode = 0; 51 currentState = SBCS; 52 } 53 54 /** 55 * flush out any residual data and reset the buffer state 56 */ 57 public int flush(byte [] output, int outStart, int outEnd) 58 throws MalformedInputException, ConversionBufferFullException 59 { 60 int bytesOut = 0; 61 62 if (highHalfZoneCode != 0) { 63 reset(); 64 badInputLength = 0; 65 throw new MalformedInputException(); 66 } 67 68 if (currentState == DBCS) { 69 if (outStart >= outEnd) 70 throw new ConversionBufferFullException(); 71 output[outStart] = SI; 72 bytesOut++; 73 } 74 75 reset(); 76 return bytesOut; 77 } 78 79 /** 80 * Character conversion 81 */ 82 public int convert(char[] input, int inOff, int inEnd, 83 byte[] output, int outOff, int outEnd) 84 throws UnknownCharacterException, MalformedInputException, 85 ConversionBufferFullException 86 { 87 char inputChar; 88 int inputSize; 89 90 byteOff = outOff; 91 charOff = inOff; 92 93 while(charOff < inEnd) { 94 95 int index; 96 int theBytes; 97 int spaceNeeded; 98 99 if (highHalfZoneCode == 0) { 100 inputChar = input[charOff]; 101 inputSize = 1; 102 } else { 103 inputChar = highHalfZoneCode; 104 inputSize = 0; 105 highHalfZoneCode = 0; 106 } 107 108 // Is this a high surrogate? 109 if(inputChar >= '\ud800' && inputChar <= '\udbff') { 110 // Is this the last character of the input? 111 if (charOff + inputSize >= inEnd) { 112 highHalfZoneCode = inputChar; 113 charOff += inputSize; 114 break; 115 } 116 117 // Is there a low surrogate following? 118 inputChar = input[charOff + inputSize]; 119 if (inputChar >= '\udc00' && inputChar <= '\udfff') { 120 121 // We have a valid surrogate pair. Too bad we don't do 122 // surrogates. Is substitution enabled? 123 if (subMode) { 124 if (subBytes.length == 1) { 125 outputByte[0] = 0x00; 126 outputByte[1] = subBytes[0]; 127 } 128 else { 129 outputByte[0] = subBytes[0]; 130 outputByte[1] = subBytes[1]; 131 } 132 133 inputSize++; 134 } else { 135 badInputLength = 2; 136 throw new UnknownCharacterException(); 137 } 138 } else { 139 140 // We have a malformed surrogate pair 141 badInputLength = 1; 142 throw new MalformedInputException(); 143 } 144 } 145 146 // Is this an unaccompanied low surrogate? 147 else 148 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { 149 badInputLength = 1; 150 throw new MalformedInputException(); 151 } else { 152 153 // We have a valid character, get the bytes for it 154 index = index1[((inputChar & mask1) >> shift)] + (inputChar & mask2); 155 //System.out.println("Index for U+" + Integer.toHexString(inputChar) + " = " + index); // for debugging purposes 156 if (index < 15000) 157 theBytes = (int)(index2.charAt(index)); 158 else 159 theBytes = (int)(index2a.charAt(index-15000)); 160 outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); 161 outputByte[1] = (byte)(theBytes & 0x000000ff); 162 } 163 164 165 // if there was no mapping - look for substitution characters 166 if (outputByte[0] == 0x00 && outputByte[1] == 0x00 167 && inputChar != '\u0000') 168 { 169 if (subMode) { 170 if (subBytes.length == 1) { 171 outputByte[0] = 0x00; 172 outputByte[1] = subBytes[0]; 173 } else { 174 outputByte[0] = subBytes[0]; 175 outputByte[1] = subBytes[1]; 176 } 177 } else { 178 badInputLength = 1; 179 throw new UnknownCharacterException(); 180 } 181 } 182 183 184 //Set the output buffer into the correct state 185 186 if (currentState == DBCS && outputByte[0] == 0x00) { 187 if (byteOff >= outEnd) 188 throw new ConversionBufferFullException(); 189 currentState = SBCS; 190 output[byteOff++] = SI; 191 } else 192 if (currentState == SBCS && outputByte[0] != 0x00) { 193 if (byteOff >= outEnd) { 194 throw new ConversionBufferFullException(); 195 } 196 currentState = DBCS; 197 output[byteOff++] = SO; 198 } 199 200 if (currentState == DBCS) 201 spaceNeeded = 2; 202 else 203 spaceNeeded = 1; 204 205 if (byteOff + spaceNeeded > outEnd) { 206 throw new ConversionBufferFullException(); 207 } 208 209 if (currentState == SBCS) 210 output[byteOff++] = outputByte[1]; 211 else { 212 output[byteOff++] = outputByte[0]; 213 output[byteOff++] = outputByte[1]; 214 } 215 216 charOff += inputSize; 217 } 218 219 return byteOff - outOff; 220 } 221 222 223 224 /** 225 * Resets converter to its initial state. 226 */ 227 public void reset() { 228 charOff = byteOff = 0; 229 highHalfZoneCode = 0; 230 currentState = SBCS; 231 } 232 233 234 /** 235 * Returns the maximum number of bytes needed to convert a char. 236 */ 237 public int getMaxBytesPerChar() { 238 return 4; //Fixed with bug 4199599 so tests would pass. 239 } 240 241 242 /** 243 * Sets the substitution bytes to use when the converter is in 244 * substitution mode. The given bytes should represent a valid 245 * character in the target character encoding. 246 */ 247 248 public void setSubstitutionBytes( byte[] newSubBytes ) 249 throws IllegalArgumentException 250 { 251 if( newSubBytes.length > 2 || newSubBytes.length == 0) { 252 throw new IllegalArgumentException(); 253 } 254 255 subBytes = new byte[ newSubBytes.length ]; 256 System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length ); 257 258 } 259 260 261 /** 262 * Returns true if the given character can be converted to the 263 * target character encoding. 264 */ 265 public boolean canConvert(char ch) { 266 int index; 267 int theBytes; 268 269 index = index1[((ch & mask1) >> shift)] + (ch & mask2); 270 if (index < 15000) 271 theBytes = (int)(index2.charAt(index)); 272 else 273 theBytes = (int)(index2a.charAt(index-15000)); 274 275 if (theBytes != 0) 276 return (true); 277 278 // only return true if input char was unicode null - all others are 279 // undefined 280 return( ch == '\u0000'); 281 282 } 283 284 }