Old jdk/src/jdk.charsets/unix/classes/sun/nio/cs/ext/COMPOUND_TEXT

   1 /*
   2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package sun.nio.cs.ext;
  26 
  27 import java.io.ByteArrayOutputStream;
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.charset.*;
  31 
  32 import java.util.Collections;
  33 import java.util.HashMap;
  34 import java.util.Iterator;
  35 import java.util.List;
  36 import java.util.Map;
  37 
  38 public class COMPOUND_TEXT_Encoder extends CharsetEncoder {
  39 
  40     /**
  41      * NOTE: The following four static variables should be used *only* for
  42      * testing whether a encoder can encode a specific character. They
  43      * cannot be used for actual encoding because they are shared across all
  44      * COMPOUND_TEXT encoders and may be stateful.
  45      */
  46     private static final Map<String,CharsetEncoder> encodingToEncoderMap =
  47       Collections.synchronizedMap(new HashMap<String,CharsetEncoder>(21, 1.0f));
  48     private static final CharsetEncoder latin1Encoder;
  49     private static final CharsetEncoder defaultEncoder;
  50     private static final boolean defaultEncodingSupported;
  51 
  52     static {
  53         CharsetEncoder encoder = Charset.defaultCharset().newEncoder();
  54         String encoding = encoder.charset().name();
  55         if ("ISO8859_1".equals(encoding)) {
  56             latin1Encoder = encoder;
  57             defaultEncoder = encoder;
  58             defaultEncodingSupported = true;
  59         } else {
  60             try {
  61                 latin1Encoder =
  62                     Charset.forName("ISO8859_1").newEncoder();
  63             } catch (IllegalArgumentException e) {
  64                 throw new ExceptionInInitializerError
  65                     ("ISO8859_1 unsupported");
  66             }
  67             defaultEncoder = encoder;
  68             defaultEncodingSupported = CompoundTextSupport.getEncodings().
  69                 contains(defaultEncoder.charset().name());
  70         }
  71     }
  72 
  73     private CharsetEncoder encoder;
  74     private char[] charBuf = new char[1];
  75     private CharBuffer charbuf = CharBuffer.wrap(charBuf);
  76     private ByteArrayOutputStream nonStandardCharsetBuffer;
  77     private byte[] byteBuf;
  78     private ByteBuffer bytebuf;
  79     private int numNonStandardChars, nonStandardEncodingLen;
  80 
  81     public COMPOUND_TEXT_Encoder(Charset cs) {
  82         super(cs,
  83               (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2),
  84               (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2));
  85         try {
  86             encoder = Charset.forName("ISO8859_1").newEncoder();
  87         } catch (IllegalArgumentException cannotHappen) {}
  88         initEncoder(encoder);
  89     }
  90 
  91     protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) {
  92         CoderResult cr = CoderResult.UNDERFLOW;
  93         char[] input = src.array();
  94         int inOff = src.arrayOffset() + src.position();
  95         int inEnd = src.arrayOffset() + src.limit();
  96 
  97         try {
  98             while (inOff < inEnd && cr.isUnderflow()) {
  99                 charBuf[0] = input[inOff];
 100                 if (charBuf[0] <= '\u0008' ||
 101                     (charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') ||
 102                     (charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) {
 103                     // The compound text specification only permits the octets
 104                     // 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and
 105                     // 9B must also be removed because they initiate control
 106                     // sequences.
 107                     charBuf[0] = '?';
 108                 }
 109 
 110                 CharsetEncoder enc = getEncoder(charBuf[0]);
 111                 //System.out.println("char=" + charBuf[0] + ", enc=" + enc);
 112                 if (enc == null) {
 113                     if (unmappableCharacterAction()
 114                         == CodingErrorAction.REPORT) {
 115                         charBuf[0] = '?';
 116                         enc = latin1Encoder;
 117                     } else {
 118                         return CoderResult.unmappableForLength(1);
 119                     }
 120                 }
 121                 if (enc != encoder) {
 122                     if (nonStandardCharsetBuffer != null) {
 123                         cr = flushNonStandardCharsetBuffer(des);
 124                     } else {
 125                         //cr= encoder.flush(des);
 126                         flushEncoder(encoder, des);
 127                     }
 128                     if (!cr.isUnderflow())
 129                         return cr;
 130                     byte[] escSequence = CompoundTextSupport.
 131                         getEscapeSequence(enc.charset().name());
 132                     if (escSequence == null) {
 133                         throw new InternalError("Unknown encoding: " +
 134                                                 enc.charset().name());
 135                     } else if (escSequence[1] == (byte)0x25 &&
 136                                escSequence[2] == (byte)0x2F) {
 137                         initNonStandardCharsetBuffer(enc, escSequence);
 138                     } else if (des.remaining() >= escSequence.length) {
 139                         des.put(escSequence, 0, escSequence.length);
 140                     } else {
 141                         return CoderResult.OVERFLOW;
 142                     }
 143                     encoder = enc;
 144                     continue;
 145                 }
 146                 charbuf.rewind();
 147                 if (nonStandardCharsetBuffer == null) {
 148                     cr = encoder.encode(charbuf, des, false);
 149                 } else {
 150                     bytebuf.clear();
 151                     cr = encoder.encode(charbuf, bytebuf, false);
 152                     bytebuf.flip();
 153                     nonStandardCharsetBuffer.write(byteBuf,
 154                                                    0, bytebuf.limit());
 155                     numNonStandardChars++;
 156                 }
 157                 inOff++;
 158             }
 159             return cr;
 160         } finally {
 161             src.position(inOff - src.arrayOffset());
 162         }
 163     }
 164 
 165     protected CoderResult implFlush(ByteBuffer out) {
 166         CoderResult cr = (nonStandardCharsetBuffer != null)
 167             ? flushNonStandardCharsetBuffer(out)
 168             //: encoder.flush(out);
 169             : flushEncoder(encoder, out);
 170         reset();
 171         return cr;
 172     }
 173 
 174     private void initNonStandardCharsetBuffer(CharsetEncoder c,
 175                                               byte[] escSequence)
 176     {
 177         nonStandardCharsetBuffer = new ByteArrayOutputStream();
 178         byteBuf = new byte[(int)c.maxBytesPerChar()];
 179         bytebuf = ByteBuffer.wrap(byteBuf);
 180         nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length);
 181         nonStandardCharsetBuffer.write(0); // M placeholder
 182         nonStandardCharsetBuffer.write(0); // L placeholder
 183         byte[] encoding = CompoundTextSupport.
 184             getEncoding(c.charset().name());
 185         if (encoding == null) {
 186             throw new InternalError
 187                 ("Unknown encoding: " + encoder.charset().name());
 188         }
 189         nonStandardCharsetBuffer.write(encoding, 0, encoding.length);
 190         nonStandardCharsetBuffer.write(0x02); // divider
 191         nonStandardEncodingLen = encoding.length + 1;
 192     }
 193 
 194     private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) {
 195         if (numNonStandardChars > 0) {
 196             byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() *
 197                                        numNonStandardChars];
 198             ByteBuffer bb = ByteBuffer.wrap(flushBuf);
 199             flushEncoder(encoder, bb);
 200             bb.flip();
 201             nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit());
 202             numNonStandardChars = 0;
 203         }
 204 
 205         int numBytes = nonStandardCharsetBuffer.size();
 206         int nonStandardBytesOff = 6 + nonStandardEncodingLen;
 207 
 208         if (out.remaining() < (numBytes - nonStandardBytesOff) +
 209             nonStandardBytesOff * (((numBytes - nonStandardBytesOff) /
 210                                     ((1 << 14) - 1)) + 1))
 211         {
 212             return CoderResult.OVERFLOW;
 213         }
 214 
 215         byte[] nonStandardBytes =
 216             nonStandardCharsetBuffer.toByteArray();
 217 
 218         // The non-standard charset header only supports 2^14-1 bytes of data.
 219         // If we have more than that, we have to repeat the header.
 220         do {
 221             out.put((byte)0x1B);
 222             out.put((byte)0x25);
 223             out.put((byte)0x2F);
 224             out.put(nonStandardBytes[3]);
 225 
 226             int toWrite = Math.min(numBytes - nonStandardBytesOff,
 227                                    (1 << 14) - 1 - nonStandardEncodingLen);
 228 
 229             out.put((byte)
 230                 (((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M
 231             out.put((byte)
 232                 (((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L
 233             out.put(nonStandardBytes, 6, nonStandardEncodingLen);
 234             out.put(nonStandardBytes, nonStandardBytesOff, toWrite);
 235             nonStandardBytesOff += toWrite;
 236         } while (nonStandardBytesOff < numBytes);
 237 
 238         nonStandardCharsetBuffer = null;
 239         byteBuf = null;
 240         nonStandardEncodingLen = 0;
 241         return CoderResult.UNDERFLOW;
 242     }
 243 
 244     /**
 245      * Resets the encoder.
 246      * Call this method to reset the encoder to its initial state
 247      */
 248     protected void implReset() {
 249         numNonStandardChars = nonStandardEncodingLen = 0;
 250         nonStandardCharsetBuffer = null;
 251         byteBuf = null;
 252         try {
 253             encoder = Charset.forName("ISO8859_1").newEncoder();
 254         } catch (IllegalArgumentException cannotHappen) {
 255         }
 256         initEncoder(encoder);
 257     }
 258 
 259     /**
 260      * Return whether a character is mappable or not
 261      * @return true if a character is mappable
 262      */
 263     public boolean canEncode(char ch) {
 264         return getEncoder(ch) != null;
 265     }
 266 
 267     protected void implOnMalformedInput(CodingErrorAction newAction) {
 268         encoder.onUnmappableCharacter(newAction);
 269     }
 270 
 271     protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
 272         encoder.onUnmappableCharacter(newAction);
 273     }
 274 
 275     protected void implReplaceWith(byte[] newReplacement) {
 276         if (encoder != null)
 277             encoder.replaceWith(newReplacement);
 278     }
 279 
 280     /**
 281      * Try to figure out which CharsetEncoder to use for conversion
 282      * of the specified Unicode character. The target character encoding
 283      * of the returned encoder is approved to be used with Compound Text.
 284      *
 285      * @param ch Unicode character
 286      * @return CharsetEncoder to convert the given character
 287      */
 288     private CharsetEncoder getEncoder(char ch) {
 289         // 1. Try the current encoder.
 290         if (encoder.canEncode(ch)) {
 291             return encoder;
 292         }
 293 
 294         // 2. Try the default encoder.
 295         if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) {
 296             CharsetEncoder retval = null;
 297             try {
 298                 retval = defaultEncoder.charset().newEncoder();
 299             } catch (UnsupportedOperationException cannotHappen) {
 300             }
 301             initEncoder(retval);
 302             return retval;
 303         }
 304 
 305         // 3. Try ISO8859-1.
 306         if (latin1Encoder.canEncode(ch)) {
 307             CharsetEncoder retval = null;
 308             try {
 309                 retval = latin1Encoder.charset().newEncoder();
 310             } catch (UnsupportedOperationException cannotHappen) {}
 311             initEncoder(retval);
 312             return retval;
 313         }
 314 
 315         // 4. Brute force search of all supported encodings.
 316         for (String encoding : CompoundTextSupport.getEncodings())
 317         {
 318             CharsetEncoder enc = encodingToEncoderMap.get(encoding);
 319             if (enc == null) {
 320                 enc = CompoundTextSupport.getEncoder(encoding);
 321                 if (enc == null) {
 322                     throw new InternalError("Unsupported encoding: " +
 323                                             encoding);
 324                 }
 325                 encodingToEncoderMap.put(encoding, enc);
 326             }
 327             if (enc.canEncode(ch)) {
 328                 CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding);
 329                 initEncoder(retval);
 330                 return retval;
 331             }
 332         }
 333 
 334         return null;
 335     }
 336 
 337     private void initEncoder(CharsetEncoder enc) {
 338         try {
 339             enc.onUnmappableCharacter(CodingErrorAction.REPLACE)
 340                 .replaceWith(replacement());
 341         } catch (IllegalArgumentException x) {}
 342     }
 343 
 344     private CharBuffer fcb= CharBuffer.allocate(0);
 345     private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) {
 346         enc.encode(fcb, bb, true);
 347         return enc.flush(bb);
 348     }
 349 }