1 /*
   2  * Copyright (c) 1995, 2005, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.misc;
  27 
  28 import java.io.InputStream;
  29 import java.io.ByteArrayInputStream;
  30 import java.io.OutputStream;
  31 import java.io.ByteArrayOutputStream;
  32 import java.io.PrintStream;
  33 import java.io.IOException;
  34 import java.nio.ByteBuffer;
  35 
  36 
  37 /**
  38  * This class defines the encoding half of character encoders.
  39  * A character encoder is an algorithim for transforming 8 bit binary
  40  * data into text (generally 7 bit ASCII or 8 bit ISO-Latin-1 text)
  41  * for transmition over text channels such as e-mail and network news.
  42  *
  43  * The character encoders have been structured around a central theme
  44  * that, in general, the encoded text has the form:
  45  *
  46  * <pre>
  47  *      [Buffer Prefix]
  48  *      [Line Prefix][encoded data atoms][Line Suffix]
  49  *      [Buffer Suffix]
  50  * </pre>
  51  *
  52  * In the CharacterEncoder and CharacterDecoder classes, one complete
  53  * chunk of data is referred to as a <i>buffer</i>. Encoded buffers
  54  * are all text, and decoded buffers (sometimes just referred to as
  55  * buffers) are binary octets.
  56  *
  57  * To create a custom encoder, you must, at a minimum,  overide three
  58  * abstract methods in this class.
  59  * <DL>
  60  * <DD>bytesPerAtom which tells the encoder how many bytes to
  61  * send to encodeAtom
  62  * <DD>encodeAtom which encodes the bytes sent to it as text.
  63  * <DD>bytesPerLine which tells the encoder the maximum number of
  64  * bytes per line.
  65  * </DL>
  66  *
  67  * Several useful encoders have already been written and are
  68  * referenced in the See Also list below.
  69  *
  70  * @author      Chuck McManis
  71  * @see         CharacterDecoder
  72  * @see         UCEncoder
  73  * @see         UUEncoder
  74  * @see         BASE64Encoder
  75  */
  76 public abstract class CharacterEncoder {
  77 
  78     /** Stream that understands "printing" */
  79     protected PrintStream pStream;
  80 
  81     /** Return the number of bytes per atom of encoding */
  82     abstract protected int bytesPerAtom();
  83 
  84     /** Return the number of bytes that can be encoded per line */
  85     abstract protected int bytesPerLine();
  86 
  87     /**
  88      * Encode the prefix for the entire buffer. By default is simply
  89      * opens the PrintStream for use by the other functions.
  90      */
  91     protected void encodeBufferPrefix(OutputStream aStream) throws IOException {
  92         pStream = new PrintStream(aStream);
  93     }
  94 
  95     /**
  96      * Encode the suffix for the entire buffer.
  97      */
  98     protected void encodeBufferSuffix(OutputStream aStream) throws IOException {
  99     }
 100 
 101     /**
 102      * Encode the prefix that starts every output line.
 103      */
 104     protected void encodeLinePrefix(OutputStream aStream, int aLength)
 105     throws IOException {
 106     }
 107 
 108     /**
 109      * Encode the suffix that ends every output line. By default
 110      * this method just prints a {@code <newline>} into the output stream.
 111      */
 112     protected void encodeLineSuffix(OutputStream aStream) throws IOException {
 113         pStream.println();
 114     }
 115 
 116     /** Encode one "atom" of information into characters. */
 117     abstract protected void encodeAtom(OutputStream aStream, byte someBytes[],
 118                 int anOffset, int aLength) throws IOException;
 119 
 120     /**
 121      * This method works around the bizarre semantics of BufferedInputStream's
 122      * read method.
 123      */
 124     protected int readFully(InputStream in, byte buffer[])
 125         throws java.io.IOException {
 126         for (int i = 0; i < buffer.length; i++) {
 127             int q = in.read();
 128             if (q == -1)
 129                 return i;
 130             buffer[i] = (byte)q;
 131         }
 132         return buffer.length;
 133     }
 134 
 135     /**
 136      * Encode bytes from the input stream, and write them as text characters
 137      * to the output stream. This method will run until it exhausts the
 138      * input stream, but does not print the line suffix for a final
 139      * line that is shorter than bytesPerLine().
 140      */
 141     public void encode(InputStream inStream, OutputStream outStream)
 142         throws IOException {
 143         int     j;
 144         int     numBytes;
 145         byte    tmpbuffer[] = new byte[bytesPerLine()];
 146 
 147         encodeBufferPrefix(outStream);
 148 
 149         while (true) {
 150             numBytes = readFully(inStream, tmpbuffer);
 151             if (numBytes == 0) {
 152                 break;
 153             }
 154             encodeLinePrefix(outStream, numBytes);
 155             for (j = 0; j < numBytes; j += bytesPerAtom()) {
 156 
 157                 if ((j + bytesPerAtom()) <= numBytes) {
 158                     encodeAtom(outStream, tmpbuffer, j, bytesPerAtom());
 159                 } else {
 160                     encodeAtom(outStream, tmpbuffer, j, (numBytes)- j);
 161                 }
 162             }
 163             if (numBytes < bytesPerLine()) {
 164                 break;
 165             } else {
 166                 encodeLineSuffix(outStream);
 167             }
 168         }
 169         encodeBufferSuffix(outStream);
 170     }
 171 
 172     /**
 173      * Encode the buffer in <i>aBuffer</i> and write the encoded
 174      * result to the OutputStream <i>aStream</i>.
 175      */
 176     public void encode(byte aBuffer[], OutputStream aStream)
 177     throws IOException {
 178         ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer);
 179         encode(inStream, aStream);
 180     }
 181 
 182     /**
 183      * A 'streamless' version of encode that simply takes a buffer of
 184      * bytes and returns a string containing the encoded buffer.
 185      */
 186     public String encode(byte aBuffer[]) {
 187         ByteArrayOutputStream   outStream = new ByteArrayOutputStream();
 188         ByteArrayInputStream    inStream = new ByteArrayInputStream(aBuffer);
 189         String retVal = null;
 190         try {
 191             encode(inStream, outStream);
 192             // explicit ascii->unicode conversion
 193             retVal = outStream.toString("ISO-8859-1");
 194         } catch (Exception IOException) {
 195             // This should never happen.
 196             throw new Error("CharacterEncoder.encode internal error");
 197         }
 198         return (retVal);
 199     }
 200 
 201     /**
 202      * Return a byte array from the remaining bytes in this ByteBuffer.
 203      * <P>
 204      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
 205      * <P>
 206      * To avoid an extra copy, the implementation will attempt to return the
 207      * byte array backing the ByteBuffer.  If this is not possible, a
 208      * new byte array will be created.
 209      */
 210     private byte [] getBytes(ByteBuffer bb) {
 211         /*
 212          * This should never return a BufferOverflowException, as we're
 213          * careful to allocate just the right amount.
 214          */
 215         byte [] buf = null;
 216 
 217         /*
 218          * If it has a usable backing byte buffer, use it.  Use only
 219          * if the array exactly represents the current ByteBuffer.
 220          */
 221         if (bb.hasArray()) {
 222             byte [] tmp = bb.array();
 223             if ((tmp.length == bb.capacity()) &&
 224                     (tmp.length == bb.remaining())) {
 225                 buf = tmp;
 226                 bb.position(bb.limit());
 227             }
 228         }
 229 
 230         if (buf == null) {
 231             /*
 232              * This class doesn't have a concept of encode(buf, len, off),
 233              * so if we have a partial buffer, we must reallocate
 234              * space.
 235              */
 236             buf = new byte[bb.remaining()];
 237 
 238             /*
 239              * position() automatically updated
 240              */
 241             bb.get(buf);
 242         }
 243 
 244         return buf;
 245     }
 246 
 247     /**
 248      * Encode the <i>aBuffer</i> ByteBuffer and write the encoded
 249      * result to the OutputStream <i>aStream</i>.
 250      * <P>
 251      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
 252      */
 253     public void encode(ByteBuffer aBuffer, OutputStream aStream)
 254         throws IOException {
 255         byte [] buf = getBytes(aBuffer);
 256         encode(buf, aStream);
 257     }
 258 
 259     /**
 260      * A 'streamless' version of encode that simply takes a ByteBuffer
 261      * and returns a string containing the encoded buffer.
 262      * <P>
 263      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
 264      */
 265     public String encode(ByteBuffer aBuffer) {
 266         byte [] buf = getBytes(aBuffer);
 267         return encode(buf);
 268     }
 269 
 270     /**
 271      * Encode bytes from the input stream, and write them as text characters
 272      * to the output stream. This method will run until it exhausts the
 273      * input stream. It differs from encode in that it will add the
 274      * line at the end of a final line that is shorter than bytesPerLine().
 275      */
 276     public void encodeBuffer(InputStream inStream, OutputStream outStream)
 277         throws IOException {
 278         int     j;
 279         int     numBytes;
 280         byte    tmpbuffer[] = new byte[bytesPerLine()];
 281 
 282         encodeBufferPrefix(outStream);
 283 
 284         while (true) {
 285             numBytes = readFully(inStream, tmpbuffer);
 286             if (numBytes == 0) {
 287                 break;
 288             }
 289             encodeLinePrefix(outStream, numBytes);
 290             for (j = 0; j < numBytes; j += bytesPerAtom()) {
 291                 if ((j + bytesPerAtom()) <= numBytes) {
 292                     encodeAtom(outStream, tmpbuffer, j, bytesPerAtom());
 293                 } else {
 294                     encodeAtom(outStream, tmpbuffer, j, (numBytes)- j);
 295                 }
 296             }
 297             encodeLineSuffix(outStream);
 298             if (numBytes < bytesPerLine()) {
 299                 break;
 300             }
 301         }
 302         encodeBufferSuffix(outStream);
 303     }
 304 
 305     /**
 306      * Encode the buffer in <i>aBuffer</i> and write the encoded
 307      * result to the OutputStream <i>aStream</i>.
 308      */
 309     public void encodeBuffer(byte aBuffer[], OutputStream aStream)
 310     throws IOException {
 311         ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer);
 312         encodeBuffer(inStream, aStream);
 313     }
 314 
 315     /**
 316      * A 'streamless' version of encode that simply takes a buffer of
 317      * bytes and returns a string containing the encoded buffer.
 318      */
 319     public String encodeBuffer(byte aBuffer[]) {
 320         ByteArrayOutputStream   outStream = new ByteArrayOutputStream();
 321         ByteArrayInputStream    inStream = new ByteArrayInputStream(aBuffer);
 322         try {
 323             encodeBuffer(inStream, outStream);
 324         } catch (Exception IOException) {
 325             // This should never happen.
 326             throw new Error("CharacterEncoder.encodeBuffer internal error");
 327         }
 328         return (outStream.toString());
 329     }
 330 
 331     /**
 332      * Encode the <i>aBuffer</i> ByteBuffer and write the encoded
 333      * result to the OutputStream <i>aStream</i>.
 334      * <P>
 335      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
 336      */
 337     public void encodeBuffer(ByteBuffer aBuffer, OutputStream aStream)
 338         throws IOException {
 339         byte [] buf = getBytes(aBuffer);
 340         encodeBuffer(buf, aStream);
 341     }
 342 
 343     /**
 344      * A 'streamless' version of encode that simply takes a ByteBuffer
 345      * and returns a string containing the encoded buffer.
 346      * <P>
 347      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
 348      */
 349     public String encodeBuffer(ByteBuffer aBuffer) {
 350         byte [] buf = getBytes(aBuffer);
 351         return encodeBuffer(buf);
 352     }
 353 
 354 }