1 /* 2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package sun.nio.cs.ext; 26 27 import java.io.ByteArrayOutputStream; 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.*; 31 32 import java.util.Collections; 33 import java.util.HashMap; 34 import java.util.Iterator; 35 import java.util.List; 36 import java.util.Map; 37 38 public class COMPOUND_TEXT_Encoder extends CharsetEncoder { 39 40 /** 41 * NOTE: The following four static variables should be used *only* for 42 * testing whether a encoder can encode a specific character. They 43 * cannot be used for actual encoding because they are shared across all 44 * COMPOUND_TEXT encoders and may be stateful. 45 */ 46 private static final Map<String,CharsetEncoder> encodingToEncoderMap = 47 Collections.synchronizedMap(new HashMap<String,CharsetEncoder>(21, 1.0f)); 48 private static final CharsetEncoder latin1Encoder; 49 private static final CharsetEncoder defaultEncoder; 50 private static final boolean defaultEncodingSupported; 51 52 static { 53 CharsetEncoder encoder = Charset.defaultCharset().newEncoder(); 54 String encoding = encoder.charset().name(); 55 if ("ISO8859_1".equals(encoding)) { 56 latin1Encoder = encoder; 57 defaultEncoder = encoder; 58 defaultEncodingSupported = true; 59 } else { 60 try { 61 latin1Encoder = 62 Charset.forName("ISO8859_1").newEncoder(); 63 } catch (IllegalArgumentException e) { 64 throw new ExceptionInInitializerError 65 ("ISO8859_1 unsupported"); 66 } 67 defaultEncoder = encoder; 68 defaultEncodingSupported = CompoundTextSupport.getEncodings(). 69 contains(defaultEncoder.charset().name()); 70 } 71 } 72 73 private CharsetEncoder encoder; 74 private char[] charBuf = new char[1]; 75 private CharBuffer charbuf = CharBuffer.wrap(charBuf); 76 private ByteArrayOutputStream nonStandardCharsetBuffer; 77 private byte[] byteBuf; 78 private ByteBuffer bytebuf; 79 private int numNonStandardChars, nonStandardEncodingLen; 80 81 public COMPOUND_TEXT_Encoder(Charset cs) { 82 super(cs, 83 (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2), 84 (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2)); 85 try { 86 encoder = Charset.forName("ISO8859_1").newEncoder(); 87 } catch (IllegalArgumentException cannotHappen) {} 88 initEncoder(encoder); 89 } 90 91 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) { 92 CoderResult cr = CoderResult.UNDERFLOW; 93 char[] input = src.array(); 94 int inOff = src.arrayOffset() + src.position(); 95 int inEnd = src.arrayOffset() + src.limit(); 96 97 try { 98 while (inOff < inEnd && cr.isUnderflow()) { 99 charBuf[0] = input[inOff]; 100 if (charBuf[0] <= '\u0008' || 101 (charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') || 102 (charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) { 103 // The compound text specification only permits the octets 104 // 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and 105 // 9B must also be removed because they initiate control 106 // sequences. 107 charBuf[0] = '?'; 108 } 109 110 CharsetEncoder enc = getEncoder(charBuf[0]); 111 //System.out.println("char=" + charBuf[0] + ", enc=" + enc); 112 if (enc == null) { 113 if (unmappableCharacterAction() 114 == CodingErrorAction.REPORT) { 115 charBuf[0] = '?'; 116 enc = latin1Encoder; 117 } else { 118 return CoderResult.unmappableForLength(1); 119 } 120 } 121 if (enc != encoder) { 122 if (nonStandardCharsetBuffer != null) { 123 cr = flushNonStandardCharsetBuffer(des); 124 } else { 125 //cr= encoder.flush(des); 126 flushEncoder(encoder, des); 127 } 128 if (!cr.isUnderflow()) 129 return cr; 130 byte[] escSequence = CompoundTextSupport. 131 getEscapeSequence(enc.charset().name()); 132 if (escSequence == null) { 133 throw new InternalError("Unknown encoding: " + 134 enc.charset().name()); 135 } else if (escSequence[1] == (byte)0x25 && 136 escSequence[2] == (byte)0x2F) { 137 initNonStandardCharsetBuffer(enc, escSequence); 138 } else if (des.remaining() >= escSequence.length) { 139 des.put(escSequence, 0, escSequence.length); 140 } else { 141 return CoderResult.OVERFLOW; 142 } 143 encoder = enc; 144 continue; 145 } 146 charbuf.rewind(); 147 if (nonStandardCharsetBuffer == null) { 148 cr = encoder.encode(charbuf, des, false); 149 } else { 150 bytebuf.clear(); 151 cr = encoder.encode(charbuf, bytebuf, false); 152 bytebuf.flip(); 153 nonStandardCharsetBuffer.write(byteBuf, 154 0, bytebuf.limit()); 155 numNonStandardChars++; 156 } 157 inOff++; 158 } 159 return cr; 160 } finally { 161 src.position(inOff - src.arrayOffset()); 162 } 163 } 164 165 protected CoderResult implFlush(ByteBuffer out) { 166 CoderResult cr = (nonStandardCharsetBuffer != null) 167 ? flushNonStandardCharsetBuffer(out) 168 //: encoder.flush(out); 169 : flushEncoder(encoder, out); 170 reset(); 171 return cr; 172 } 173 174 private void initNonStandardCharsetBuffer(CharsetEncoder c, 175 byte[] escSequence) 176 { 177 nonStandardCharsetBuffer = new ByteArrayOutputStream(); 178 byteBuf = new byte[(int)c.maxBytesPerChar()]; 179 bytebuf = ByteBuffer.wrap(byteBuf); 180 nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length); 181 nonStandardCharsetBuffer.write(0); // M placeholder 182 nonStandardCharsetBuffer.write(0); // L placeholder 183 byte[] encoding = CompoundTextSupport. 184 getEncoding(c.charset().name()); 185 if (encoding == null) { 186 throw new InternalError 187 ("Unknown encoding: " + encoder.charset().name()); 188 } 189 nonStandardCharsetBuffer.write(encoding, 0, encoding.length); 190 nonStandardCharsetBuffer.write(0x02); // divider 191 nonStandardEncodingLen = encoding.length + 1; 192 } 193 194 private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) { 195 if (numNonStandardChars > 0) { 196 byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() * 197 numNonStandardChars]; 198 ByteBuffer bb = ByteBuffer.wrap(flushBuf); 199 flushEncoder(encoder, bb); 200 bb.flip(); 201 nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit()); 202 numNonStandardChars = 0; 203 } 204 205 int numBytes = nonStandardCharsetBuffer.size(); 206 int nonStandardBytesOff = 6 + nonStandardEncodingLen; 207 208 if (out.remaining() < (numBytes - nonStandardBytesOff) + 209 nonStandardBytesOff * (((numBytes - nonStandardBytesOff) / 210 ((1 << 14) - 1)) + 1)) 211 { 212 return CoderResult.OVERFLOW; 213 } 214 215 byte[] nonStandardBytes = 216 nonStandardCharsetBuffer.toByteArray(); 217 218 // The non-standard charset header only supports 2^14-1 bytes of data. 219 // If we have more than that, we have to repeat the header. 220 do { 221 out.put((byte)0x1B); 222 out.put((byte)0x25); 223 out.put((byte)0x2F); 224 out.put(nonStandardBytes[3]); 225 226 int toWrite = Math.min(numBytes - nonStandardBytesOff, 227 (1 << 14) - 1 - nonStandardEncodingLen); 228 229 out.put((byte) 230 (((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M 231 out.put((byte) 232 (((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L 233 out.put(nonStandardBytes, 6, nonStandardEncodingLen); 234 out.put(nonStandardBytes, nonStandardBytesOff, toWrite); 235 nonStandardBytesOff += toWrite; 236 } while (nonStandardBytesOff < numBytes); 237 238 nonStandardCharsetBuffer = null; 239 byteBuf = null; 240 nonStandardEncodingLen = 0; 241 return CoderResult.UNDERFLOW; 242 } 243 244 /** 245 * Resets the encoder. 246 * Call this method to reset the encoder to its initial state 247 */ 248 protected void implReset() { 249 numNonStandardChars = nonStandardEncodingLen = 0; 250 nonStandardCharsetBuffer = null; 251 byteBuf = null; 252 try { 253 encoder = Charset.forName("ISO8859_1").newEncoder(); 254 } catch (IllegalArgumentException cannotHappen) { 255 } 256 initEncoder(encoder); 257 } 258 259 /** 260 * Return whether a character is mappable or not 261 * @return true if a character is mappable 262 */ 263 public boolean canEncode(char ch) { 264 return getEncoder(ch) != null; 265 } 266 267 protected void implOnMalformedInput(CodingErrorAction newAction) { 268 encoder.onUnmappableCharacter(newAction); 269 } 270 271 protected void implOnUnmappableCharacter(CodingErrorAction newAction) { 272 encoder.onUnmappableCharacter(newAction); 273 } 274 275 protected void implReplaceWith(byte[] newReplacement) { 276 if (encoder != null) 277 encoder.replaceWith(newReplacement); 278 } 279 280 /** 281 * Try to figure out which CharsetEncoder to use for conversion 282 * of the specified Unicode character. The target character encoding 283 * of the returned encoder is approved to be used with Compound Text. 284 * 285 * @param ch Unicode character 286 * @return CharsetEncoder to convert the given character 287 */ 288 private CharsetEncoder getEncoder(char ch) { 289 // 1. Try the current encoder. 290 if (encoder.canEncode(ch)) { 291 return encoder; 292 } 293 294 // 2. Try the default encoder. 295 if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) { 296 CharsetEncoder retval = null; 297 try { 298 retval = defaultEncoder.charset().newEncoder(); 299 } catch (UnsupportedOperationException cannotHappen) { 300 } 301 initEncoder(retval); 302 return retval; 303 } 304 305 // 3. Try ISO8859-1. 306 if (latin1Encoder.canEncode(ch)) { 307 CharsetEncoder retval = null; 308 try { 309 retval = latin1Encoder.charset().newEncoder(); 310 } catch (UnsupportedOperationException cannotHappen) {} 311 initEncoder(retval); 312 return retval; 313 } 314 315 // 4. Brute force search of all supported encodings. 316 for (String encoding : CompoundTextSupport.getEncodings()) 317 { 318 CharsetEncoder enc = encodingToEncoderMap.get(encoding); 319 if (enc == null) { 320 enc = CompoundTextSupport.getEncoder(encoding); 321 if (enc == null) { 322 throw new InternalError("Unsupported encoding: " + 323 encoding); 324 } 325 encodingToEncoderMap.put(encoding, enc); 326 } 327 if (enc.canEncode(ch)) { 328 CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding); 329 initEncoder(retval); 330 return retval; 331 } 332 } 333 334 return null; 335 } 336 337 private void initEncoder(CharsetEncoder enc) { 338 try { 339 enc.onUnmappableCharacter(CodingErrorAction.REPLACE) 340 .replaceWith(replacement()); 341 } catch (IllegalArgumentException x) {} 342 } 343 344 private CharBuffer fcb= CharBuffer.allocate(0); 345 private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) { 346 enc.encode(fcb, bb, true); 347 return enc.flush(bb); 348 } 349 }