Old src/share/classes/sun/io/CharToByteDBCS

   1 /*
   2  * Copyright 1997-1999 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 package sun.io;
  26 
  27 public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter
  28 {
  29 
  30     private static final int SBCS = 0;
  31     private static final int DBCS = 1;
  32 
  33     private static final byte SO = 0x0e;
  34     private static final byte SI = 0x0f;
  35 
  36     private int  currentState;
  37     private char highHalfZoneCode;
  38     private byte[] outputByte = new byte[2];
  39 
  40     protected short index1[];
  41     protected String index2;
  42     protected String index2a;
  43     protected int   mask1;
  44     protected int   mask2;
  45     protected int   shift;
  46 
  47 
  48     public CharToByteDBCS_EBCDIC() {
  49         super();
  50         highHalfZoneCode = 0;
  51         currentState = SBCS;
  52     }
  53 
  54     /**
  55       * flush out any residual data and reset the buffer state
  56       */
  57     public int flush(byte [] output, int outStart, int outEnd)
  58         throws MalformedInputException, ConversionBufferFullException
  59     {
  60         int bytesOut = 0;
  61 
  62         if (highHalfZoneCode != 0) {
  63             reset();
  64             badInputLength = 0;
  65             throw new MalformedInputException();
  66         }
  67 
  68         if (currentState == DBCS) {
  69           if (outStart >= outEnd)
  70             throw new ConversionBufferFullException();
  71           output[outStart] = SI;
  72           bytesOut++;
  73         }
  74 
  75         reset();
  76         return bytesOut;
  77     }
  78 
  79     /**
  80      * Character conversion
  81      */
  82     public int convert(char[] input, int inOff, int inEnd,
  83                        byte[] output, int outOff, int outEnd)
  84         throws UnknownCharacterException, MalformedInputException,
  85                ConversionBufferFullException
  86     {
  87         char    inputChar;
  88         int     inputSize;
  89 
  90         byteOff = outOff;
  91         charOff = inOff;
  92 
  93         while(charOff < inEnd) {
  94 
  95            int   index;
  96            int   theBytes;
  97            int   spaceNeeded;
  98 
  99            if (highHalfZoneCode == 0) {
 100               inputChar = input[charOff];
 101               inputSize = 1;
 102            } else {
 103               inputChar = highHalfZoneCode;
 104               inputSize = 0;
 105               highHalfZoneCode = 0;
 106            }
 107 
 108            // Is this a high surrogate?
 109            if(inputChar >= '\ud800' && inputChar <= '\udbff') {
 110               // Is this the last character of the input?
 111               if (charOff + inputSize >= inEnd) {
 112                  highHalfZoneCode = inputChar;
 113                  charOff += inputSize;
 114                  break;
 115               }
 116 
 117               // Is there a low surrogate following?
 118               inputChar = input[charOff + inputSize];
 119               if (inputChar >= '\udc00' && inputChar <= '\udfff') {
 120 
 121                  // We have a valid surrogate pair.  Too bad we don't do
 122                  // surrogates.  Is substitution enabled?
 123                  if (subMode) {
 124                     if (subBytes.length == 1) {
 125                        outputByte[0] = 0x00;
 126                        outputByte[1] = subBytes[0];
 127                     }
 128                     else {
 129                        outputByte[0] = subBytes[0];
 130                        outputByte[1] = subBytes[1];
 131                     }
 132 
 133                     inputSize++;
 134                  } else {
 135                     badInputLength = 2;
 136                     throw new UnknownCharacterException();
 137                  }
 138               } else {
 139 
 140                  // We have a malformed surrogate pair
 141                  badInputLength = 1;
 142                  throw new MalformedInputException();
 143               }
 144            }
 145 
 146            // Is this an unaccompanied low surrogate?
 147            else
 148               if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
 149                  badInputLength = 1;
 150                  throw new MalformedInputException();
 151               } else {
 152 
 153                  // We have a valid character, get the bytes for it
 154                  index = index1[((inputChar & mask1) >> shift)] + (inputChar & mask2);
 155 //System.out.println("Index for U+" + Integer.toHexString(inputChar) + " = " + index); // for debugging purposes
 156                  if (index < 15000)
 157                    theBytes = (int)(index2.charAt(index));
 158                  else
 159                    theBytes = (int)(index2a.charAt(index-15000));
 160                  outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
 161                  outputByte[1] = (byte)(theBytes & 0x000000ff);
 162               }
 163 
 164 
 165            // if there was no mapping - look for substitution characters
 166            if (outputByte[0] == 0x00 && outputByte[1] == 0x00
 167                              && inputChar != '\u0000')
 168            {
 169               if (subMode) {
 170                  if (subBytes.length == 1) {
 171                     outputByte[0] = 0x00;
 172                     outputByte[1] = subBytes[0];
 173                  } else {
 174                     outputByte[0] = subBytes[0];
 175                     outputByte[1] = subBytes[1];
 176                  }
 177               } else {
 178                 badInputLength = 1;
 179                 throw new UnknownCharacterException();
 180               }
 181            }
 182 
 183 
 184            //Set the output buffer into the correct state
 185 
 186            if (currentState == DBCS && outputByte[0] == 0x00) {
 187               if (byteOff >= outEnd)
 188                  throw new ConversionBufferFullException();
 189               currentState = SBCS;
 190               output[byteOff++] = SI;
 191            } else
 192               if (currentState == SBCS && outputByte[0] != 0x00) {
 193                  if (byteOff >= outEnd) {
 194                     throw new ConversionBufferFullException();
 195                  }
 196                  currentState = DBCS;
 197                  output[byteOff++] = SO;
 198               }
 199 
 200            if (currentState == DBCS)
 201               spaceNeeded = 2;
 202            else
 203               spaceNeeded = 1;
 204 
 205            if (byteOff + spaceNeeded > outEnd) {
 206               throw new ConversionBufferFullException();
 207            }
 208 
 209            if (currentState == SBCS)
 210               output[byteOff++] = outputByte[1];
 211            else {
 212               output[byteOff++] = outputByte[0];
 213               output[byteOff++] = outputByte[1];
 214            }
 215 
 216            charOff += inputSize;
 217         }
 218 
 219         return byteOff - outOff;
 220     }
 221 
 222 
 223 
 224     /**
 225      * Resets converter to its initial state.
 226      */
 227     public void reset() {
 228        charOff = byteOff = 0;
 229        highHalfZoneCode = 0;
 230        currentState = SBCS;
 231     }
 232 
 233 
 234     /**
 235      * Returns the maximum number of bytes needed to convert a char.
 236      */
 237     public int getMaxBytesPerChar() {
 238        return 4;    //Fixed with bug 4199599 so tests would pass.
 239     }
 240 
 241 
 242     /**
 243      * Sets the substitution bytes to use when the converter is in
 244      * substitution mode.  The given bytes should represent a valid
 245      * character in the target character encoding.
 246      */
 247 
 248     public void setSubstitutionBytes( byte[] newSubBytes )
 249        throws IllegalArgumentException
 250     {
 251        if( newSubBytes.length > 2 || newSubBytes.length == 0) {
 252            throw new IllegalArgumentException();
 253        }
 254 
 255        subBytes = new byte[ newSubBytes.length ];
 256        System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length );
 257 
 258     }
 259 
 260 
 261     /**
 262      * Returns true if the given character can be converted to the
 263      * target character encoding.
 264      */
 265     public boolean canConvert(char ch) {
 266        int  index;
 267        int  theBytes;
 268 
 269        index = index1[((ch & mask1) >> shift)] + (ch & mask2);
 270        if (index  < 15000)
 271          theBytes = (int)(index2.charAt(index));
 272        else
 273          theBytes = (int)(index2a.charAt(index-15000));
 274 
 275        if (theBytes != 0)
 276          return (true);
 277 
 278        // only return true if input char was unicode null - all others are
 279        //     undefined
 280        return( ch == '\u0000');
 281 
 282     }
 283 
 284 }