1 /* 2 * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.UnsupportedEncodingException; 29 import java.lang.ref.SoftReference; 30 import java.nio.ByteBuffer; 31 import java.nio.CharBuffer; 32 import java.nio.charset.Charset; 33 import java.nio.charset.CharsetDecoder; 34 import java.nio.charset.CharsetEncoder; 35 import java.nio.charset.CharacterCodingException; 36 import java.nio.charset.CoderResult; 37 import java.nio.charset.CodingErrorAction; 38 import java.nio.charset.IllegalCharsetNameException; 39 import java.nio.charset.UnsupportedCharsetException; 40 import java.util.Arrays; 41 import sun.misc.MessageUtils; 42 import sun.nio.cs.HistoricallyNamedCharset; 43 import sun.nio.cs.ArrayDecoder; 44 import sun.nio.cs.ArrayEncoder; 45 46 /** 47 * Utility class for string encoding and decoding. 48 */ 49 50 class StringCoding { 51 52 private StringCoding() { } 53 54 /** The cached coders for each thread */ 55 private final static ThreadLocal<SoftReference<StringDecoder>> decoder = 56 new ThreadLocal<>(); 57 private final static ThreadLocal<SoftReference<StringEncoder>> encoder = 58 new ThreadLocal<>(); 59 60 private static boolean warnUnsupportedCharset = true; 61 62 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { 63 SoftReference<T> sr = tl.get(); 64 if (sr == null) 65 return null; 66 return sr.get(); 67 } 68 69 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { 70 tl.set(new SoftReference<T>(ob)); 71 } 72 73 // Trim the given byte array to the given length 74 // 75 private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) { 76 if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) 77 return ba; 78 else 79 return Arrays.copyOf(ba, len); 80 } 81 82 // Trim the given char array to the given length 83 // 84 private static char[] safeTrim(char[] ca, int len, 85 Charset cs, boolean isTrusted) { 86 if (len == ca.length && (isTrusted || System.getSecurityManager() == null)) 87 return ca; 88 else 89 return Arrays.copyOf(ca, len); 90 } 91 92 private static int scale(int len, float expansionFactor) { 93 // We need to perform double, not float, arithmetic; otherwise 94 // we lose low order bits when len is larger than 2**24. 95 return (int)(len * (double)expansionFactor); 96 } 97 98 private static Charset lookupCharset(String csn) { 99 if (Charset.isSupported(csn)) { 100 try { 101 return Charset.forName(csn); 102 } catch (UnsupportedCharsetException x) { 103 throw new Error(x); 104 } 105 } 106 return null; 107 } 108 109 private static void warnUnsupportedCharset(String csn) { 110 if (warnUnsupportedCharset) { 111 // Use sun.misc.MessageUtils rather than the Logging API or 112 // System.err since this method may be called during VM 113 // initialization before either is available. 114 MessageUtils.err("WARNING: Default charset " + csn + 115 " not supported, using ISO-8859-1 instead"); 116 warnUnsupportedCharset = false; 117 } 118 } 119 120 121 // -- Decoding -- 122 private static class StringDecoder { 123 private final String requestedCharsetName; 124 private final Charset cs; 125 private final CharsetDecoder cd; 126 private final boolean isTrusted; 127 128 private StringDecoder(Charset cs, String rcn) { 129 this.requestedCharsetName = rcn; 130 this.cs = cs; 131 this.cd = cs.newDecoder() 132 .onMalformedInput(CodingErrorAction.REPLACE) 133 .onUnmappableCharacter(CodingErrorAction.REPLACE); 134 this.isTrusted = (cs.getClass().getClassLoader0() == null); 135 } 136 137 String charsetName() { 138 if (cs instanceof HistoricallyNamedCharset) 139 return ((HistoricallyNamedCharset)cs).historicalName(); 140 return cs.name(); 141 } 142 143 final String requestedCharsetName() { 144 return requestedCharsetName; 145 } 146 147 char[] decode(byte[] ba, int off, int len) { 148 int en = scale(len, cd.maxCharsPerByte()); 149 char[] ca = new char[en]; 150 if (len == 0) 151 return ca; 152 if (cd instanceof ArrayDecoder) { 153 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 154 return safeTrim(ca, clen, cs, isTrusted); 155 } else { 156 cd.reset(); 157 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 158 CharBuffer cb = CharBuffer.wrap(ca); 159 try { 160 CoderResult cr = cd.decode(bb, cb, true); 161 if (!cr.isUnderflow()) 162 cr.throwException(); 163 cr = cd.flush(cb); 164 if (!cr.isUnderflow()) 165 cr.throwException(); 166 } catch (CharacterCodingException x) { 167 // Substitution is always enabled, 168 // so this shouldn't happen 169 throw new Error(x); 170 } 171 return safeTrim(ca, cb.position(), cs, isTrusted); 172 } 173 } 174 } 175 176 static char[] decode(String charsetName, byte[] ba, int off, int len) 177 throws UnsupportedEncodingException 178 { 179 StringDecoder sd = deref(decoder); 180 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 181 if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) 182 || csn.equals(sd.charsetName()))) { 183 sd = null; 184 try { 185 Charset cs = lookupCharset(csn); 186 if (cs != null) 187 sd = new StringDecoder(cs, csn); 188 } catch (IllegalCharsetNameException x) {} 189 if (sd == null) 190 throw new UnsupportedEncodingException(csn); 191 set(decoder, sd); 192 } 193 return sd.decode(ba, off, len); 194 } 195 196 static char[] decode(Charset cs, byte[] ba, int off, int len) { 197 // (1)We never cache the "external" cs, the only benefit of creating 198 // an additional StringDe/Encoder object to wrap it is to share the 199 // de/encode() method. These SD/E objects are short-lifed, the young-gen 200 // gc should be able to take care of them well. But the best approash 201 // is still not to generate them if not really necessary. 202 // (2)The defensive copy of the input byte/char[] has a big performance 203 // impact, as well as the outgoing result byte/char[]. Need to do the 204 // optimization check of (sm==null && classLoader0==null) for both. 205 // (3)getClass().getClassLoader0() is expensive 206 // (4)There might be a timing gap in isTrusted setting. getClassLoader0() 207 // is only chcked (and then isTrusted gets set) when (SM==null). It is 208 // possible that the SM==null for now but then SM is NOT null later 209 // when safeTrim() is invoked...the "safe" way to do is to redundant 210 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim 211 // but it then can be argued that the SM is null when the opertaion 212 // is started... 213 CharsetDecoder cd = cs.newDecoder(); 214 int en = scale(len, cd.maxCharsPerByte()); 215 char[] ca = new char[en]; 216 if (len == 0) 217 return ca; 218 boolean isTrusted = false; 219 if (System.getSecurityManager() != null) { 220 if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) { 221 ba = Arrays.copyOfRange(ba, off, off + len); 222 off = 0; 223 } 224 } 225 if (cd instanceof ArrayDecoder) { 226 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 227 return safeTrim(ca, clen, cs, isTrusted); 228 } else { 229 cd.onMalformedInput(CodingErrorAction.REPLACE) 230 .onUnmappableCharacter(CodingErrorAction.REPLACE) 231 .reset(); 232 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 233 CharBuffer cb = CharBuffer.wrap(ca); 234 try { 235 CoderResult cr = cd.decode(bb, cb, true); 236 if (!cr.isUnderflow()) 237 cr.throwException(); 238 cr = cd.flush(cb); 239 if (!cr.isUnderflow()) 240 cr.throwException(); 241 } catch (CharacterCodingException x) { 242 // Substitution is always enabled, 243 // so this shouldn't happen 244 throw new Error(x); 245 } 246 return safeTrim(ca, cb.position(), cs, isTrusted); 247 } 248 } 249 250 static char[] decode(byte[] ba, int off, int len) { 251 String csn = Charset.defaultCharset().name(); 252 try { 253 return decode(csn, ba, off, len); 254 } catch (UnsupportedEncodingException x) { 255 warnUnsupportedCharset(csn); 256 } 257 try { 258 return decode("ISO-8859-1", ba, off, len); 259 } catch (UnsupportedEncodingException x) { 260 // If this code is hit during VM initialization, MessageUtils is 261 // the only way we will be able to get any kind of error message. 262 MessageUtils.err("ISO-8859-1 charset not available: " 263 + x.toString()); 264 // If we can not find ISO-8859-1 (a required encoding) then things 265 // are seriously wrong with the installation. 266 System.exit(1); 267 return null; 268 } 269 } 270 271 // -- Encoding -- 272 private static class StringEncoder { 273 private Charset cs; 274 private CharsetEncoder ce; 275 private final String requestedCharsetName; 276 private final boolean isTrusted; 277 278 private StringEncoder(Charset cs, String rcn) { 279 this.requestedCharsetName = rcn; 280 this.cs = cs; 281 this.ce = cs.newEncoder() 282 .onMalformedInput(CodingErrorAction.REPLACE) 283 .onUnmappableCharacter(CodingErrorAction.REPLACE); 284 this.isTrusted = (cs.getClass().getClassLoader0() == null); 285 } 286 287 String charsetName() { 288 if (cs instanceof HistoricallyNamedCharset) 289 return ((HistoricallyNamedCharset)cs).historicalName(); 290 return cs.name(); 291 } 292 293 final String requestedCharsetName() { 294 return requestedCharsetName; 295 } 296 297 byte[] encode(char[] ca, int off, int len) { 298 int en = scale(len, ce.maxBytesPerChar()); 299 byte[] ba = new byte[en]; 300 if (len == 0) 301 return ba; 302 if (ce instanceof ArrayEncoder) { 303 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 304 return safeTrim(ba, blen, cs, isTrusted); 305 } else { 306 ce.reset(); 307 ByteBuffer bb = ByteBuffer.wrap(ba); 308 CharBuffer cb = CharBuffer.wrap(ca, off, len); 309 try { 310 CoderResult cr = ce.encode(cb, bb, true); 311 if (!cr.isUnderflow()) 312 cr.throwException(); 313 cr = ce.flush(bb); 314 if (!cr.isUnderflow()) 315 cr.throwException(); 316 } catch (CharacterCodingException x) { 317 // Substitution is always enabled, 318 // so this shouldn't happen 319 throw new Error(x); 320 } 321 return safeTrim(ba, bb.position(), cs, isTrusted); 322 } 323 } 324 } 325 326 static byte[] encode(String charsetName, char[] ca, int off, int len) 327 throws UnsupportedEncodingException 328 { 329 StringEncoder se = deref(encoder); 330 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 331 if ((se == null) || !(csn.equals(se.requestedCharsetName()) 332 || csn.equals(se.charsetName()))) { 333 se = null; 334 try { 335 Charset cs = lookupCharset(csn); 336 if (cs != null) 337 se = new StringEncoder(cs, csn); 338 } catch (IllegalCharsetNameException x) {} 339 if (se == null) 340 throw new UnsupportedEncodingException (csn); 341 set(encoder, se); 342 } 343 return se.encode(ca, off, len); 344 } 345 346 static byte[] encode(Charset cs, char[] ca, int off, int len) { 347 CharsetEncoder ce = cs.newEncoder(); 348 int en = scale(len, ce.maxBytesPerChar()); 349 byte[] ba = new byte[en]; 350 if (len == 0) 351 return ba; 352 boolean isTrusted = false; 353 if (System.getSecurityManager() != null) { 354 if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) { 355 ca = Arrays.copyOfRange(ca, off, off + len); 356 off = 0; 357 } 358 } 359 if (ce instanceof ArrayEncoder) { 360 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 361 return safeTrim(ba, blen, cs, isTrusted); 362 } else { 363 ce.onMalformedInput(CodingErrorAction.REPLACE) 364 .onUnmappableCharacter(CodingErrorAction.REPLACE) 365 .reset(); 366 ByteBuffer bb = ByteBuffer.wrap(ba); 367 CharBuffer cb = CharBuffer.wrap(ca, off, len); 368 try { 369 CoderResult cr = ce.encode(cb, bb, true); 370 if (!cr.isUnderflow()) 371 cr.throwException(); 372 cr = ce.flush(bb); 373 if (!cr.isUnderflow()) 374 cr.throwException(); 375 } catch (CharacterCodingException x) { 376 throw new Error(x); 377 } 378 return safeTrim(ba, bb.position(), cs, isTrusted); 379 } 380 } 381 382 static byte[] encode(char[] ca, int off, int len) { 383 String csn = Charset.defaultCharset().name(); 384 try { 385 return encode(csn, ca, off, len); 386 } catch (UnsupportedEncodingException x) { 387 warnUnsupportedCharset(csn); 388 } 389 try { 390 return encode("ISO-8859-1", ca, off, len); 391 } catch (UnsupportedEncodingException x) { 392 // If this code is hit during VM initialization, MessageUtils is 393 // the only way we will be able to get any kind of error message. 394 MessageUtils.err("ISO-8859-1 charset not available: " 395 + x.toString()); 396 // If we can not find ISO-8859-1 (a required encoding) then things 397 // are seriously wrong with the installation. 398 System.exit(1); 399 return null; 400 } 401 } 402 }