1 /* 2 * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.UnsupportedEncodingException; 29 import java.lang.ref.SoftReference; 30 import java.nio.ByteBuffer; 31 import java.nio.CharBuffer; 32 import java.nio.charset.Charset; 33 import java.nio.charset.CharsetDecoder; 34 import java.nio.charset.CharsetEncoder; 35 import java.nio.charset.CharacterCodingException; 36 import java.nio.charset.CoderResult; 37 import java.nio.charset.CodingErrorAction; 38 import java.nio.charset.IllegalCharsetNameException; 39 import java.nio.charset.UnsupportedCharsetException; 40 import java.util.Arrays; 41 import sun.misc.MessageUtils; 42 import sun.nio.cs.HistoricallyNamedCharset; 43 import sun.nio.cs.ArrayDecoder; 44 import sun.nio.cs.ArrayEncoder; 45 46 /** 47 * Utility class for string encoding and decoding. 48 */ 49 50 class StringCoding { 51 52 private StringCoding() { } 53 54 /** The cached coders for each thread */ 55 private static final ThreadLocal<SoftReference<StringDecoder>> decoder = 56 new ThreadLocal<>(); 57 private static final ThreadLocal<SoftReference<StringEncoder>> encoder = 58 new ThreadLocal<>(); 59 60 private static boolean warnUnsupportedCharset = true; 61 62 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { 63 SoftReference<T> sr = tl.get(); 64 if (sr == null) 65 return null; 66 return sr.get(); 67 } 68 69 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { 70 tl.set(new SoftReference<>(ob)); 71 } 72 73 // Trim the given byte array to the given length 74 // 75 private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) { 76 if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) 77 return ba; 78 else 79 return Arrays.copyOf(ba, len); 80 } 81 82 // Trim the given char array to the given length 83 // 84 private static char[] safeTrim(char[] ca, int len, 85 Charset cs, boolean isTrusted) { 86 if (len == ca.length && (isTrusted || System.getSecurityManager() == null)) 87 return ca; 88 else 89 return Arrays.copyOf(ca, len); 90 } 91 92 private static int scale(int len, float expansionFactor) { 93 // We need to perform double, not float, arithmetic; otherwise 94 // we lose low order bits when len is larger than 2**24. 95 return (int)(len * (double)expansionFactor); 96 } 97 98 private static Charset lookupCharset(String csn) { 99 if (Charset.isSupported(csn)) { 100 try { 101 return Charset.forName(csn); 102 } catch (UnsupportedCharsetException x) { 103 throw new Error(x); 104 } 105 } 106 return null; 107 } 108 109 private static void warnUnsupportedCharset(String csn) { 110 if (warnUnsupportedCharset) { 111 // Use sun.misc.MessageUtils rather than the Logging API or 112 // System.err since this method may be called during VM 113 // initialization before either is available. 114 MessageUtils.err("WARNING: Default charset " + csn + 115 " not supported, using ISO-8859-1 instead"); 116 warnUnsupportedCharset = false; 117 } 118 } 119 120 121 // -- Decoding -- 122 private static class StringDecoder { 123 private final String requestedCharsetName; 124 private final Charset cs; 125 private final CharsetDecoder cd; 126 private final boolean isTrusted; 127 128 private StringDecoder(Charset cs, String rcn) { 129 this.requestedCharsetName = rcn; 130 this.cs = cs; 131 this.cd = cs.newDecoder() 132 .onMalformedInput(CodingErrorAction.REPLACE) 133 .onUnmappableCharacter(CodingErrorAction.REPLACE); 134 this.isTrusted = (cs.getClass().getClassLoader0() == null); 135 } 136 137 String charsetName() { 138 if (cs instanceof HistoricallyNamedCharset) 139 return ((HistoricallyNamedCharset)cs).historicalName(); 140 return cs.name(); 141 } 142 143 final String requestedCharsetName() { 144 return requestedCharsetName; 145 } 146 147 char[] decode(byte[] ba, int off, int len) { 148 int en = scale(len, cd.maxCharsPerByte()); 149 char[] ca = new char[en]; 150 if (len == 0) 151 return ca; 152 if (cd instanceof ArrayDecoder) { 153 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 154 return safeTrim(ca, clen, cs, isTrusted); 155 } else { 156 cd.reset(); 157 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 158 CharBuffer cb = CharBuffer.wrap(ca); 159 try { 160 CoderResult cr = cd.decode(bb, cb, true); 161 if (!cr.isUnderflow()) 162 cr.throwException(); 163 cr = cd.flush(cb); 164 if (!cr.isUnderflow()) 165 cr.throwException(); 166 } catch (CharacterCodingException x) { 167 // Substitution is always enabled, 168 // so this shouldn't happen 169 throw new Error(x); 170 } 171 return safeTrim(ca, cb.position(), cs, isTrusted); 172 } 173 } 174 } 175 176 static char[] decode(String charsetName, byte[] ba, int off, int len) 177 throws UnsupportedEncodingException 178 { 179 StringDecoder sd = deref(decoder); 180 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 181 if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) 182 || csn.equals(sd.charsetName()))) { 183 sd = null; 184 try { 185 Charset cs = lookupCharset(csn); 186 if (cs != null) 187 sd = new StringDecoder(cs, csn); 188 } catch (IllegalCharsetNameException x) {} 189 if (sd == null) 190 throw new UnsupportedEncodingException(csn); 191 set(decoder, sd); 192 } 193 return sd.decode(ba, off, len); 194 } 195 196 static char[] decode(Charset cs, byte[] ba, int off, int len) { 197 // (1)We never cache the "external" cs, the only benefit of creating 198 // an additional StringDe/Encoder object to wrap it is to share the 199 // de/encode() method. These SD/E objects are short-lived, the young-gen 200 // gc should be able to take care of them well. But the best approach 201 // is still not to generate them if not really necessary. 202 // (2)The defensive copy of the input byte/char[] has a big performance 203 // impact, as well as the outgoing result byte/char[]. Need to do the 204 // optimization check of (sm==null && classLoader0==null) for both. 205 // (3)getClass().getClassLoader0() is expensive 206 // (4)There might be a timing gap in isTrusted setting. getClassLoader0() 207 // is only checked (and then isTrusted gets set) when (SM==null). It is 208 // possible that the SM==null for now but then SM is NOT null later 209 // when safeTrim() is invoked...the "safe" way to do is to redundant 210 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim 211 // but it then can be argued that the SM is null when the operation 212 // is started... 213 CharsetDecoder cd = cs.newDecoder(); 214 int en = scale(len, cd.maxCharsPerByte()); 215 char[] ca = new char[en]; 216 if (len == 0) 217 return ca; 218 boolean isTrusted = false; 219 if (System.getSecurityManager() != null) { 220 if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) { 221 ba = Arrays.copyOfRange(ba, off, off + len); 222 off = 0; 223 } 224 } 225 cd.onMalformedInput(CodingErrorAction.REPLACE) 226 .onUnmappableCharacter(CodingErrorAction.REPLACE) 227 .reset(); 228 if (cd instanceof ArrayDecoder) { 229 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 230 return safeTrim(ca, clen, cs, isTrusted); 231 } else { 232 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 233 CharBuffer cb = CharBuffer.wrap(ca); 234 try { 235 CoderResult cr = cd.decode(bb, cb, true); 236 if (!cr.isUnderflow()) 237 cr.throwException(); 238 cr = cd.flush(cb); 239 if (!cr.isUnderflow()) 240 cr.throwException(); 241 } catch (CharacterCodingException x) { 242 // Substitution is always enabled, 243 // so this shouldn't happen 244 throw new Error(x); 245 } 246 return safeTrim(ca, cb.position(), cs, isTrusted); 247 } 248 } 249 250 static char[] decode(byte[] ba, int off, int len) { 251 String csn = Charset.defaultCharset().name(); 252 try { 253 // use charset name decode() variant which provides caching. 254 return decode(csn, ba, off, len); 255 } catch (UnsupportedEncodingException x) { 256 warnUnsupportedCharset(csn); 257 } 258 try { 259 return decode("ISO-8859-1", ba, off, len); 260 } catch (UnsupportedEncodingException x) { 261 // If this code is hit during VM initialization, MessageUtils is 262 // the only way we will be able to get any kind of error message. 263 MessageUtils.err("ISO-8859-1 charset not available: " 264 + x.toString()); 265 // If we can not find ISO-8859-1 (a required encoding) then things 266 // are seriously wrong with the installation. 267 System.exit(1); 268 return null; 269 } 270 } 271 272 // -- Encoding -- 273 private static class StringEncoder { 274 private Charset cs; 275 private CharsetEncoder ce; 276 private final String requestedCharsetName; 277 private final boolean isTrusted; 278 279 private StringEncoder(Charset cs, String rcn) { 280 this.requestedCharsetName = rcn; 281 this.cs = cs; 282 this.ce = cs.newEncoder() 283 .onMalformedInput(CodingErrorAction.REPLACE) 284 .onUnmappableCharacter(CodingErrorAction.REPLACE); 285 this.isTrusted = (cs.getClass().getClassLoader0() == null); 286 } 287 288 String charsetName() { 289 if (cs instanceof HistoricallyNamedCharset) 290 return ((HistoricallyNamedCharset)cs).historicalName(); 291 return cs.name(); 292 } 293 294 final String requestedCharsetName() { 295 return requestedCharsetName; 296 } 297 298 byte[] encode(char[] ca, int off, int len) { 299 int en = scale(len, ce.maxBytesPerChar()); 300 byte[] ba = new byte[en]; 301 if (len == 0) 302 return ba; 303 if (ce instanceof ArrayEncoder) { 304 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 305 return safeTrim(ba, blen, cs, isTrusted); 306 } else { 307 ce.reset(); 308 ByteBuffer bb = ByteBuffer.wrap(ba); 309 CharBuffer cb = CharBuffer.wrap(ca, off, len); 310 try { 311 CoderResult cr = ce.encode(cb, bb, true); 312 if (!cr.isUnderflow()) 313 cr.throwException(); 314 cr = ce.flush(bb); 315 if (!cr.isUnderflow()) 316 cr.throwException(); 317 } catch (CharacterCodingException x) { 318 // Substitution is always enabled, 319 // so this shouldn't happen 320 throw new Error(x); 321 } 322 return safeTrim(ba, bb.position(), cs, isTrusted); 323 } 324 } 325 } 326 327 static byte[] encode(String charsetName, char[] ca, int off, int len) 328 throws UnsupportedEncodingException 329 { 330 StringEncoder se = deref(encoder); 331 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 332 if ((se == null) || !(csn.equals(se.requestedCharsetName()) 333 || csn.equals(se.charsetName()))) { 334 se = null; 335 try { 336 Charset cs = lookupCharset(csn); 337 if (cs != null) 338 se = new StringEncoder(cs, csn); 339 } catch (IllegalCharsetNameException x) {} 340 if (se == null) 341 throw new UnsupportedEncodingException (csn); 342 set(encoder, se); 343 } 344 return se.encode(ca, off, len); 345 } 346 347 static byte[] encode(Charset cs, char[] ca, int off, int len) { 348 CharsetEncoder ce = cs.newEncoder(); 349 int en = scale(len, ce.maxBytesPerChar()); 350 byte[] ba = new byte[en]; 351 if (len == 0) 352 return ba; 353 boolean isTrusted = false; 354 if (System.getSecurityManager() != null) { 355 if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) { 356 ca = Arrays.copyOfRange(ca, off, off + len); 357 off = 0; 358 } 359 } 360 ce.onMalformedInput(CodingErrorAction.REPLACE) 361 .onUnmappableCharacter(CodingErrorAction.REPLACE) 362 .reset(); 363 if (ce instanceof ArrayEncoder) { 364 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 365 return safeTrim(ba, blen, cs, isTrusted); 366 } else { 367 ByteBuffer bb = ByteBuffer.wrap(ba); 368 CharBuffer cb = CharBuffer.wrap(ca, off, len); 369 try { 370 CoderResult cr = ce.encode(cb, bb, true); 371 if (!cr.isUnderflow()) 372 cr.throwException(); 373 cr = ce.flush(bb); 374 if (!cr.isUnderflow()) 375 cr.throwException(); 376 } catch (CharacterCodingException x) { 377 throw new Error(x); 378 } 379 return safeTrim(ba, bb.position(), cs, isTrusted); 380 } 381 } 382 383 static byte[] encode(char[] ca, int off, int len) { 384 String csn = Charset.defaultCharset().name(); 385 try { 386 // use charset name encode() variant which provides caching. 387 return encode(csn, ca, off, len); 388 } catch (UnsupportedEncodingException x) { 389 warnUnsupportedCharset(csn); 390 } 391 try { 392 return encode("ISO-8859-1", ca, off, len); 393 } catch (UnsupportedEncodingException x) { 394 // If this code is hit during VM initialization, MessageUtils is 395 // the only way we will be able to get any kind of error message. 396 MessageUtils.err("ISO-8859-1 charset not available: " 397 + x.toString()); 398 // If we can not find ISO-8859-1 (a required encoding) then things 399 // are seriously wrong with the installation. 400 System.exit(1); 401 return null; 402 } 403 } 404 }