< prev index next >

src/java.base/share/classes/java/lang/StringCoding.java

Print this page

        

*** 1,7 **** /* ! * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this --- 1,7 ---- /* ! * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this
*** 587,596 **** --- 587,600 ---- } return i; } private static byte[] encode8859_1(byte coder, byte[] val) { + return encode8859_1(coder, val, true); + } + + private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) { if (coder == LATIN1) { return Arrays.copyOf(val, val.length); } int len = val.length >> 1; byte[] dst = new byte[len];
*** 600,609 **** --- 604,616 ---- while (sp < sl) { int ret = implEncodeISOArray(val, sp, dst, dp, len); sp = sp + ret; dp = dp + ret; if (ret != len) { + if (!doReplace) { + throwMalformed(sp, 1); + } char c = StringUTF16.getChar(val, sp++); if (Character.isHighSurrogate(c) && sp < sl && Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { sp++; }
*** 674,683 **** --- 681,696 ---- private static void throwMalformed(int off, int nb) { throw new IllegalArgumentException("malformed input off : " + off + ", length : " + nb); } + private static void throwMalformed(byte[] val) { + int dp = 0; + while (dp < val.length && val[dp] >=0) { dp++; } + throwMalformed(dp, 1); + } + private static char repl = '\ufffd'; private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) { // ascii-bais, which has a relative impact to the non-ascii-only bytes if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
*** 944,949 **** --- 957,1090 ---- * Throws iae, instead of replacing, if unmappble. */ static byte[] getBytesUTF8NoRepl(String s) { return encodeUTF8(s.coder(), s.value(), false); } + + ////////////////////// for j.n.f.Files ////////////////////////// + + private static boolean isASCII(byte[] src) { + return !hasNegatives(src, 0, src.length); + } + + private static String newStringLatin1(byte[] src) { + if (COMPACT_STRINGS) + return new String(src, LATIN1); + return new String(StringLatin1.inflate(src, 0, src.length), UTF16); + } + + static String newStringNoRepl(byte[] src, Charset cs) { + if (cs == UTF_8) { + if (COMPACT_STRINGS && isASCII(src)) + return new String(src, LATIN1); + Result ret = decodeUTF8_0(src, 0, src.length, false); + return new String(ret.value, ret.coder); + } + if (cs == ISO_8859_1) { + return newStringLatin1(src); + } + if (cs == US_ASCII) { + if (isASCII(src)) { + return newStringLatin1(src); + } else { + throwMalformed(src); + } + } + + CharsetDecoder cd = cs.newDecoder(); + // ascii fastpath + if ((cd instanceof ArrayDecoder) && + ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) { + return newStringLatin1(src); + } + int len = src.length; + if (len == 0) { + return ""; + } + int en = scale(len, cd.maxCharsPerByte()); + char[] ca = new char[en]; + if (cs.getClass().getClassLoader0() != null && + System.getSecurityManager() != null) { + src = Arrays.copyOf(src, len); + } + ByteBuffer bb = ByteBuffer.wrap(src); + CharBuffer cb = CharBuffer.wrap(ca); + try { + CoderResult cr = cd.decode(bb, cb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = cd.flush(cb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new IllegalArgumentException(x); // todo + } + Result ret = resultCached.get().with(ca, 0, cb.position()); + return new String(ret.value, ret.coder); + } + + /* + * Throws iae, instead of replacing, if unmappble. + */ + static byte[] getBytesNoRepl(String s, Charset cs) { + byte[] val = s.value(); + byte coder = s.coder(); + if (cs == UTF_8) { + if (isASCII(val)) { + return val; + } + return encodeUTF8(coder, val, false); + } + if (cs == ISO_8859_1) { + if (coder == LATIN1) { + return val; + } + return encode8859_1(coder, val, false); + } + if (cs == US_ASCII) { + if (coder == LATIN1) { + if (isASCII(val)) { + return val; + } else { + throwMalformed(val); + } + } + } + CharsetEncoder ce = cs.newEncoder(); + // fastpath for ascii compatible + if (coder == LATIN1 && (((ce instanceof ArrayEncoder) && + ((ArrayEncoder)ce).isASCIICompatible() && + isASCII(val)))) { + return val; + } + int len = val.length >> coder; // assume LATIN1=0/UTF16=1; + int en = scale(len, ce.maxBytesPerChar()); + byte[] ba = new byte[en]; + if (len == 0) { + return ba; + } + if (ce instanceof ArrayEncoder) { + int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba) + : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba); + if (blen != -1) { + return safeTrim(ba, blen, true); + } + } + boolean isTrusted = cs.getClass().getClassLoader0() == null || + System.getSecurityManager() == null; + char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) + : StringUTF16.toChars(val); + ByteBuffer bb = ByteBuffer.wrap(ba); + CharBuffer cb = CharBuffer.wrap(ca, 0, len); + try { + CoderResult cr = ce.encode(cb, bb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = ce.flush(bb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new Error(x); + } + return safeTrim(ba, bb.position(), isTrusted); + } }
< prev index next >