1 /* 2 * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.UnsupportedEncodingException; 29 import java.lang.ref.SoftReference; 30 import java.nio.ByteBuffer; 31 import java.nio.CharBuffer; 32 import java.nio.charset.Charset; 33 import java.nio.charset.CharsetDecoder; 34 import java.nio.charset.CharsetEncoder; 35 import java.nio.charset.CharacterCodingException; 36 import java.nio.charset.CoderResult; 37 import java.nio.charset.CodingErrorAction; 38 import java.nio.charset.IllegalCharsetNameException; 39 import java.nio.charset.UnsupportedCharsetException; 40 import java.util.Arrays; 41 import sun.misc.MessageUtils; 42 import sun.nio.cs.HistoricallyNamedCharset; 43 import sun.nio.cs.ArrayDecoder; 44 import sun.nio.cs.ArrayEncoder; 45 46 /** 47 * Utility class for string encoding and decoding. 48 */ 49 50 class StringCoding { 51 52 private StringCoding() { } 53 54 /** The cached coders for each thread */ 55 private final static ThreadLocal<SoftReference<StringDecoder>> decoder = 56 new ThreadLocal<>(); 57 private final static ThreadLocal<SoftReference<StringEncoder>> encoder = 58 new ThreadLocal<>(); 59 60 private static boolean warnUnsupportedCharset = true; 61 62 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { 63 SoftReference<T> sr = tl.get(); 64 if (sr == null) 65 return null; 66 return sr.get(); 67 } 68 69 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { 70 tl.set(new SoftReference<>(ob)); 71 } 72 73 // Trim the given byte array to the given length 74 // 75 private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) { 76 if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) 77 return ba; 78 else 79 return Arrays.copyOf(ba, len); 80 } 81 82 // Trim the given char array to the given length 83 // 84 private static char[] safeTrim(char[] ca, int len, 85 Charset cs, boolean isTrusted) { 86 if (len == ca.length && (isTrusted || System.getSecurityManager() == null)) 87 return ca; 88 else 89 return Arrays.copyOf(ca, len); 90 } 91 92 private static int scale(int len, float expansionFactor) { 93 // We need to perform double, not float, arithmetic; otherwise 94 // we lose low order bits when len is larger than 2**24. 95 return (int)(len * (double)expansionFactor); 96 } 97 98 private static Charset lookupCharset(String csn) { 99 if (Charset.isSupported(csn)) { 100 try { 101 return Charset.forName(csn); 102 } catch (UnsupportedCharsetException x) { 103 throw new Error(x); 104 } 105 } 106 return null; 107 } 108 109 private static void warnUnsupportedCharset(String csn) { 110 if (warnUnsupportedCharset) { 111 // Use sun.misc.MessageUtils rather than the Logging API or 112 // System.err since this method may be called during VM 113 // initialization before either is available. 114 MessageUtils.err("WARNING: Default charset " + csn + 115 " not supported, using ISO-8859-1 instead"); 116 warnUnsupportedCharset = false; 117 } 118 } 119 120 121 // -- Decoding -- 122 private static class StringDecoder { 123 private final String requestedCharsetName; 124 private final Charset cs; 125 private final CharsetDecoder cd; 126 private final boolean isTrusted; 127 128 private StringDecoder(Charset cs, String rcn) { 129 this.requestedCharsetName = rcn; 130 this.cs = cs; 131 this.cd = cs.newDecoder() 132 .onMalformedInput(CodingErrorAction.REPLACE) 133 .onUnmappableCharacter(CodingErrorAction.REPLACE); 134 this.isTrusted = (cs.getClass().getClassLoader0() == null); 135 } 136 137 String charsetName() { 138 if (cs instanceof HistoricallyNamedCharset) 139 return ((HistoricallyNamedCharset)cs).historicalName(); 140 return cs.name(); 141 } 142 143 final String requestedCharsetName() { 144 return requestedCharsetName; 145 } 146 147 char[] decode(byte[] ba, int off, int len) { 148 int en = scale(len, cd.maxCharsPerByte()); 149 char[] ca = new char[en]; 150 if (len == 0) 151 return ca; 152 if (cd instanceof ArrayDecoder) { 153 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 154 return safeTrim(ca, clen, cs, isTrusted); 155 } else { 156 cd.reset(); 157 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 158 CharBuffer cb = CharBuffer.wrap(ca); 159 try { 160 CoderResult cr = cd.decode(bb, cb, true); 161 if (!cr.isUnderflow()) 162 cr.throwException(); 163 cr = cd.flush(cb); 164 if (!cr.isUnderflow()) 165 cr.throwException(); 166 } catch (CharacterCodingException x) { 167 // Substitution is always enabled, 168 // so this shouldn't happen 169 throw new Error(x); 170 } 171 return safeTrim(ca, cb.position(), cs, isTrusted); 172 } 173 } 174 } 175 176 static char[] decode(String charsetName, byte[] ba, int off, int len) 177 throws UnsupportedEncodingException 178 { 179 StringDecoder sd = deref(decoder); 180 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 181 if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) 182 || csn.equals(sd.charsetName()))) { 183 sd = null; 184 try { 185 Charset cs = lookupCharset(csn); 186 if (cs != null) 187 sd = new StringDecoder(cs, csn); 188 } catch (IllegalCharsetNameException x) {} 189 if (sd == null) 190 throw new UnsupportedEncodingException(csn); 191 set(decoder, sd); 192 } 193 return sd.decode(ba, off, len); 194 } 195 196 static char[] decode(Charset cs, byte[] ba, int off, int len) { 197 // (1)We never cache the "external" cs, the only benefit of creating 198 // an additional StringDe/Encoder object to wrap it is to share the 199 // de/encode() method. These SD/E objects are short-lived, the young-gen 200 // gc should be able to take care of them well. But the best approach 201 // is still not to generate them if not really necessary. 202 // (2)The defensive copy of the input byte/char[] has a big performance 203 // impact, as well as the outgoing result byte/char[]. Need to do the 204 // optimization check of (sm==null && classLoader0==null) for both. 205 // (3)getClass().getClassLoader0() is expensive 206 // (4)There might be a timing gap in isTrusted setting. getClassLoader0() 207 // is only checked (and then isTrusted gets set) when (SM==null). It is 208 // possible that the SM==null for now but then SM is NOT null later 209 // when safeTrim() is invoked...the "safe" way to do is to redundant 210 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim 211 // but it then can be argued that the SM is null when the operation 212 // is started... 213 CharsetDecoder cd = cs.newDecoder(); 214 int en = scale(len, cd.maxCharsPerByte()); 215 char[] ca = new char[en]; 216 if (len == 0) 217 return ca; 218 219 boolean isTrusted = isTrusted(cs); 220 if (!isTrusted) { 221 ba = Arrays.copyOfRange(ba, off, off + len); 222 off = 0; 223 } 224 225 setupDecoder(cd); 226 227 if (cd instanceof ArrayDecoder) { 228 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 229 return safeTrim(ca, clen, cs, isTrusted); 230 } else { 231 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 232 return performDecode(cs, isTrusted, ca, cd, bb); 233 } 234 } 235 236 static char[] decode(Charset cs, ByteBuffer bb, int off, int len) { 237 // See comment at top of decode(Charset,byte[],int,int) 238 CharsetDecoder cd = cs.newDecoder(); 239 int en = scale(len, cd.maxCharsPerByte()); 240 char[] ca = new char[en]; 241 if (len == 0) 242 return ca; 243 244 boolean isTrusted = isTrusted(cs); 245 if (!isTrusted) { 246 // setup the bytebuffer for copying 247 ByteBuffer originalByteBuffer = bb; 248 int originalPosition = originalByteBuffer.position(); 249 int originalLimit = originalByteBuffer.limit(); 250 originalByteBuffer.position(off) 251 .limit(off + len); 252 253 // copy the bytebuffer 254 bb = ByteBuffer.allocateDirect(len) 255 .put(originalByteBuffer); 256 bb.position(0); 257 258 // reset the original bytebuffer 259 originalByteBuffer.position(originalPosition) 260 .limit(originalLimit); 261 262 off = 0; 263 } 264 265 setupDecoder(cd); 266 267 return performDecode(cs, isTrusted, ca, cd, bb); 268 } 269 270 private static boolean isTrusted(Charset cs) { 271 return System.getSecurityManager() != null 272 && cs.getClass().getClassLoader0() == null; 273 } 274 275 private static void setupDecoder(CharsetDecoder cd) { 276 cd.onMalformedInput(CodingErrorAction.REPLACE) 277 .onUnmappableCharacter(CodingErrorAction.REPLACE) 278 .reset(); 279 } 280 281 static char[] performDecode( 282 Charset cs, boolean isTrusted, char[] ca, 283 CharsetDecoder cd, ByteBuffer bb) { 284 285 CharBuffer cb = CharBuffer.wrap(ca); 286 try { 287 CoderResult cr = cd.decode(bb, cb, true); 288 if (!cr.isUnderflow()) 289 cr.throwException(); 290 cr = cd.flush(cb); 291 if (!cr.isUnderflow()) 292 cr.throwException(); 293 } catch (CharacterCodingException x) { 294 // Substitution is always enabled, 295 // so this shouldn't happen 296 throw new Error(x); 297 } 298 return safeTrim(ca, cb.position(), cs, isTrusted); 299 } 300 301 static char[] decode(byte[] ba, int off, int len) { 302 String csn = Charset.defaultCharset().name(); 303 try { 304 // use charset name decode() variant which provides caching. 305 return decode(csn, ba, off, len); 306 } catch (UnsupportedEncodingException x) { 307 warnUnsupportedCharset(csn); 308 } 309 try { 310 return decode("ISO-8859-1", ba, off, len); 311 } catch (UnsupportedEncodingException x) { 312 // If this code is hit during VM initialization, MessageUtils is 313 // the only way we will be able to get any kind of error message. 314 MessageUtils.err("ISO-8859-1 charset not available: " 315 + x.toString()); 316 // If we can not find ISO-8859-1 (a required encoding) then things 317 // are seriously wrong with the installation. 318 System.exit(1); 319 return null; 320 } 321 } 322 323 // -- Encoding -- 324 private static class StringEncoder { 325 private Charset cs; 326 private CharsetEncoder ce; 327 private final String requestedCharsetName; 328 private final boolean isTrusted; 329 330 private StringEncoder(Charset cs, String rcn) { 331 this.requestedCharsetName = rcn; 332 this.cs = cs; 333 this.ce = cs.newEncoder() 334 .onMalformedInput(CodingErrorAction.REPLACE) 335 .onUnmappableCharacter(CodingErrorAction.REPLACE); 336 this.isTrusted = (cs.getClass().getClassLoader0() == null); 337 } 338 339 String charsetName() { 340 if (cs instanceof HistoricallyNamedCharset) 341 return ((HistoricallyNamedCharset)cs).historicalName(); 342 return cs.name(); 343 } 344 345 final String requestedCharsetName() { 346 return requestedCharsetName; 347 } 348 349 byte[] encode(char[] ca, int off, int len) { 350 int en = scale(len, ce.maxBytesPerChar()); 351 byte[] ba = new byte[en]; 352 if (len == 0) 353 return ba; 354 if (ce instanceof ArrayEncoder) { 355 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 356 return safeTrim(ba, blen, cs, isTrusted); 357 } else { 358 ce.reset(); 359 ByteBuffer bb = ByteBuffer.wrap(ba); 360 CharBuffer cb = CharBuffer.wrap(ca, off, len); 361 try { 362 CoderResult cr = ce.encode(cb, bb, true); 363 if (!cr.isUnderflow()) 364 cr.throwException(); 365 cr = ce.flush(bb); 366 if (!cr.isUnderflow()) 367 cr.throwException(); 368 } catch (CharacterCodingException x) { 369 // Substitution is always enabled, 370 // so this shouldn't happen 371 throw new Error(x); 372 } 373 return safeTrim(ba, bb.position(), cs, isTrusted); 374 } 375 } 376 } 377 378 static byte[] encode(String charsetName, char[] ca, int off, int len) 379 throws UnsupportedEncodingException 380 { 381 StringEncoder se = deref(encoder); 382 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 383 if ((se == null) || !(csn.equals(se.requestedCharsetName()) 384 || csn.equals(se.charsetName()))) { 385 se = null; 386 try { 387 Charset cs = lookupCharset(csn); 388 if (cs != null) 389 se = new StringEncoder(cs, csn); 390 } catch (IllegalCharsetNameException x) {} 391 if (se == null) 392 throw new UnsupportedEncodingException (csn); 393 set(encoder, se); 394 } 395 return se.encode(ca, off, len); 396 } 397 398 static byte[] encode(Charset cs, char[] ca, int off, int len) { 399 CharsetEncoder ce = cs.newEncoder(); 400 int en = scale(len, ce.maxBytesPerChar()); 401 byte[] ba = new byte[en]; 402 if (len == 0) 403 return ba; 404 boolean isTrusted = isTrusted(cs); 405 if (!isTrusted) { 406 ca = Arrays.copyOfRange(ca, off, off + len); 407 off = 0; 408 } 409 setupEncoder(ce); 410 if (ce instanceof ArrayEncoder) { 411 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 412 return safeTrim(ba, blen, cs, isTrusted); 413 } else { 414 ByteBuffer bb = ByteBuffer.wrap(ba); 415 CharBuffer cb = CharBuffer.wrap(ca, off, len); 416 try { 417 CoderResult cr = ce.encode(cb, bb, true); 418 if (!cr.isUnderflow()) 419 cr.throwException(); 420 cr = ce.flush(bb); 421 if (!cr.isUnderflow()) 422 cr.throwException(); 423 } catch (CharacterCodingException x) { 424 throw new Error(x); 425 } 426 return safeTrim(ba, bb.position(), cs, isTrusted); 427 } 428 } 429 430 static int encode(Charset cs, char[] ca, int off, int len, byte[] destBuffer, int destOffset) { 431 ByteBuffer bb = ByteBuffer.wrap(destBuffer, destOffset, destBuffer.length - destOffset); 432 return encode(cs, ca, off, len, bb) - destOffset; 433 } 434 435 static int encode(Charset cs, char[] ca, int off, int len, ByteBuffer destBuffer, int destOffset) { 436 int originalPosition = destBuffer.position(); 437 destBuffer.position(destOffset); 438 try { 439 return encode(cs, ca, off, len, destBuffer) - destOffset; 440 } finally { 441 destBuffer.position(originalPosition); 442 } 443 } 444 445 private static int encode(Charset cs, char[] ca, int off, int len, ByteBuffer destBuffer) { 446 CharsetEncoder ce = cs.newEncoder(); 447 CharBuffer cb = CharBuffer.wrap(ca, off, len); 448 try { 449 CoderResult cr = ce.encode(cb, destBuffer, true); 450 if (!cr.isUnderflow()) 451 cr.throwException(); 452 cr = ce.flush(destBuffer); 453 if (!cr.isUnderflow()) 454 cr.throwException(); 455 } catch (CharacterCodingException x) { 456 throw new Error(x); 457 } 458 return destBuffer.position(); 459 } 460 461 private static void setupEncoder(CharsetEncoder encoder) { 462 encoder.onMalformedInput(CodingErrorAction.REPLACE) 463 .onUnmappableCharacter(CodingErrorAction.REPLACE) 464 .reset(); 465 } 466 467 static byte[] encode(char[] ca, int off, int len) { 468 String csn = Charset.defaultCharset().name(); 469 try { 470 // use charset name encode() variant which provides caching. 471 return encode(csn, ca, off, len); 472 } catch (UnsupportedEncodingException x) { 473 warnUnsupportedCharset(csn); 474 } 475 try { 476 return encode("ISO-8859-1", ca, off, len); 477 } catch (UnsupportedEncodingException x) { 478 // If this code is hit during VM initialization, MessageUtils is 479 // the only way we will be able to get any kind of error message. 480 MessageUtils.err("ISO-8859-1 charset not available: " 481 + x.toString()); 482 // If we can not find ISO-8859-1 (a required encoding) then things 483 // are seriously wrong with the installation. 484 System.exit(1); 485 return null; 486 } 487 } 488 }