1 /* 2 * Copyright 2000-2008 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 package java.lang; 27 28 import java.io.CharConversionException; 29 import java.io.UnsupportedEncodingException; 30 import java.lang.ref.SoftReference; 31 import java.nio.ByteBuffer; 32 import java.nio.CharBuffer; 33 import java.nio.BufferOverflowException; 34 import java.nio.BufferUnderflowException; 35 import java.nio.charset.Charset; 36 import java.nio.charset.CharsetDecoder; 37 import java.nio.charset.CharsetEncoder; 38 import java.nio.charset.CharacterCodingException; 39 import java.nio.charset.CoderResult; 40 import java.nio.charset.CodingErrorAction; 41 import java.nio.charset.IllegalCharsetNameException; 42 import java.nio.charset.MalformedInputException; 43 import java.nio.charset.UnsupportedCharsetException; 44 import java.util.Arrays; 45 import sun.misc.MessageUtils; 46 import sun.nio.cs.HistoricallyNamedCharset; 47 48 /** 49 * Utility class for string encoding and decoding. 50 */ 51 52 class StringCoding { 53 54 private StringCoding() { } 55 56 /** The cached coders for each thread */ 57 private final static ThreadLocal<SoftReference<StringDecoder>> decoder = 58 new ThreadLocal<SoftReference<StringDecoder>>(); 59 private final static ThreadLocal<SoftReference<StringEncoder>> encoder = 60 new ThreadLocal<SoftReference<StringEncoder>>(); 61 62 private static boolean warnUnsupportedCharset = true; 63 64 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { 65 SoftReference<T> sr = tl.get(); 66 if (sr == null) 67 return null; 68 return sr.get(); 69 } 70 71 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { 72 tl.set(new SoftReference<T>(ob)); 73 } 74 75 // Trim the given byte array to the given length 76 // 77 private static byte[] safeTrim(byte[] ba, int len, Charset cs) { 78 if (len == ba.length 79 && (System.getSecurityManager() == null 80 || cs.getClass().getClassLoader0() == null)) 81 return ba; 82 else 83 return Arrays.copyOf(ba, len); 84 } 85 86 // Trim the given char array to the given length 87 // 88 private static char[] safeTrim(char[] ca, int len, Charset cs) { 89 if (len == ca.length 90 && (System.getSecurityManager() == null 91 || cs.getClass().getClassLoader0() == null)) 92 return ca; 93 else 94 return Arrays.copyOf(ca, len); 95 } 96 97 private static int scale(int len, float expansionFactor) { 98 // We need to perform double, not float, arithmetic; otherwise 99 // we lose low order bits when len is larger than 2**24. 100 return (int)(len * (double)expansionFactor); 101 } 102 103 private static Charset lookupCharset(String csn) { 104 if (Charset.isSupported(csn)) { 105 try { 106 return Charset.forName(csn); 107 } catch (UnsupportedCharsetException x) { 108 throw new Error(x); 109 } 110 } 111 return null; 112 } 113 114 private static void warnUnsupportedCharset(String csn) { 115 if (warnUnsupportedCharset) { 116 // Use sun.misc.MessageUtils rather than the Logging API or 117 // System.err since this method may be called during VM 118 // initialization before either is available. 119 MessageUtils.err("WARNING: Default charset " + csn + 120 " not supported, using ISO-8859-1 instead"); 121 warnUnsupportedCharset = false; 122 } 123 } 124 125 126 // -- Decoding -- 127 private static class StringDecoder { 128 private final String requestedCharsetName; 129 private final Charset cs; 130 private final CharsetDecoder cd; 131 132 private StringDecoder(Charset cs, String rcn) { 133 this.requestedCharsetName = rcn; 134 this.cs = cs; 135 this.cd = cs.newDecoder() 136 .onMalformedInput(CodingErrorAction.REPLACE) 137 .onUnmappableCharacter(CodingErrorAction.REPLACE); 138 } 139 140 String charsetName() { 141 if (cs instanceof HistoricallyNamedCharset) 142 return ((HistoricallyNamedCharset)cs).historicalName(); 143 return cs.name(); 144 } 145 146 final String requestedCharsetName() { 147 return requestedCharsetName; 148 } 149 150 char[] decode(byte[] ba, int off, int len) { 151 int en = scale(len, cd.maxCharsPerByte()); 152 char[] ca = new char[en]; 153 if (len == 0) 154 return ca; 155 cd.reset(); 156 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 157 CharBuffer cb = CharBuffer.wrap(ca); 158 try { 159 CoderResult cr = cd.decode(bb, cb, true); 160 if (!cr.isUnderflow()) 161 cr.throwException(); 162 cr = cd.flush(cb); 163 if (!cr.isUnderflow()) 164 cr.throwException(); 165 } catch (CharacterCodingException x) { 166 // Substitution is always enabled, 167 // so this shouldn't happen 168 throw new Error(x); 169 } 170 return safeTrim(ca, cb.position(), cs); 171 } 172 173 } 174 175 static char[] decode(String charsetName, byte[] ba, int off, int len) 176 throws UnsupportedEncodingException 177 { 178 StringDecoder sd = deref(decoder); 179 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 180 if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) 181 || csn.equals(sd.charsetName()))) { 182 sd = null; 183 try { 184 Charset cs = lookupCharset(csn); 185 if (cs != null) 186 sd = new StringDecoder(cs, csn); 187 } catch (IllegalCharsetNameException x) {} 188 if (sd == null) 189 throw new UnsupportedEncodingException(csn); 190 set(decoder, sd); 191 } 192 return sd.decode(ba, off, len); 193 } 194 195 static char[] decode(Charset cs, byte[] ba, int off, int len) { 196 StringDecoder sd = new StringDecoder(cs, cs.name()); 197 return sd.decode(Arrays.copyOfRange(ba, off, off + len), 0, len); 198 } 199 200 static char[] decode(byte[] ba, int off, int len) { 201 String csn = Charset.defaultCharset().name(); 202 try { 203 return decode(csn, ba, off, len); 204 } catch (UnsupportedEncodingException x) { 205 warnUnsupportedCharset(csn); 206 } 207 try { 208 return decode("ISO-8859-1", ba, off, len); 209 } catch (UnsupportedEncodingException x) { 210 // If this code is hit during VM initialization, MessageUtils is 211 // the only way we will be able to get any kind of error message. 212 MessageUtils.err("ISO-8859-1 charset not available: " 213 + x.toString()); 214 // If we can not find ISO-8859-1 (a required encoding) then things 215 // are seriously wrong with the installation. 216 System.exit(1); 217 return null; 218 } 219 } 220 221 222 223 224 // -- Encoding -- 225 private static class StringEncoder { 226 private Charset cs; 227 private CharsetEncoder ce; 228 private final String requestedCharsetName; 229 230 private StringEncoder(Charset cs, String rcn) { 231 this.requestedCharsetName = rcn; 232 this.cs = cs; 233 this.ce = cs.newEncoder() 234 .onMalformedInput(CodingErrorAction.REPLACE) 235 .onUnmappableCharacter(CodingErrorAction.REPLACE); 236 } 237 238 String charsetName() { 239 if (cs instanceof HistoricallyNamedCharset) 240 return ((HistoricallyNamedCharset)cs).historicalName(); 241 return cs.name(); 242 } 243 244 final String requestedCharsetName() { 245 return requestedCharsetName; 246 } 247 248 byte[] encode(char[] ca, int off, int len) { 249 int en = scale(len, ce.maxBytesPerChar()); 250 byte[] ba = new byte[en]; 251 if (len == 0) 252 return ba; 253 254 ce.reset(); 255 ByteBuffer bb = ByteBuffer.wrap(ba); 256 CharBuffer cb = CharBuffer.wrap(ca, off, len); 257 try { 258 CoderResult cr = ce.encode(cb, bb, true); 259 if (!cr.isUnderflow()) 260 cr.throwException(); 261 cr = ce.flush(bb); 262 if (!cr.isUnderflow()) 263 cr.throwException(); 264 } catch (CharacterCodingException x) { 265 // Substitution is always enabled, 266 // so this shouldn't happen 267 throw new Error(x); 268 } 269 return safeTrim(ba, bb.position(), cs); 270 } 271 } 272 273 static byte[] encode(String charsetName, char[] ca, int off, int len) 274 throws UnsupportedEncodingException 275 { 276 StringEncoder se = deref(encoder); 277 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 278 if ((se == null) || !(csn.equals(se.requestedCharsetName()) 279 || csn.equals(se.charsetName()))) { 280 se = null; 281 try { 282 Charset cs = lookupCharset(csn); 283 if (cs != null) 284 se = new StringEncoder(cs, csn); 285 } catch (IllegalCharsetNameException x) {} 286 if (se == null) 287 throw new UnsupportedEncodingException (csn); 288 set(encoder, se); 289 } 290 return se.encode(ca, off, len); 291 } 292 293 static byte[] encode(Charset cs, char[] ca, int off, int len) { 294 StringEncoder se = new StringEncoder(cs, cs.name()); 295 return se.encode(Arrays.copyOfRange(ca, off, off + len), 0, len); 296 } 297 298 static byte[] encode(char[] ca, int off, int len) { 299 String csn = Charset.defaultCharset().name(); 300 try { 301 return encode(csn, ca, off, len); 302 } catch (UnsupportedEncodingException x) { 303 warnUnsupportedCharset(csn); 304 } 305 try { 306 return encode("ISO-8859-1", ca, off, len); 307 } catch (UnsupportedEncodingException x) { 308 // If this code is hit during VM initialization, MessageUtils is 309 // the only way we will be able to get any kind of error message. 310 MessageUtils.err("ISO-8859-1 charset not available: " 311 + x.toString()); 312 // If we can not find ISO-8859-1 (a required encoding) then things 313 // are seriously wrong with the installation. 314 System.exit(1); 315 return null; 316 } 317 } 318 }