--- old/src/java.base/share/classes/java/lang/StringDecoderUTF8.java 2017-12-13 06:58:28.928027352 -0800 +++ /dev/null 2017-07-29 19:06:40.011000000 -0700 @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package java.lang; - -import java.nio.charset.Charset; -import java.util.Arrays; - -import static java.lang.String.LATIN1; -import static java.lang.String.UTF16; -import static java.lang.String.COMPACT_STRINGS; -import static java.lang.Character.isSurrogate; -import static java.lang.Character.highSurrogate; -import static java.lang.Character.lowSurrogate; -import static java.lang.Character.isSupplementaryCodePoint; -import static java.lang.StringUTF16.putChar; - -class StringDecoderUTF8 extends StringCoding.StringDecoder { - - StringDecoderUTF8(Charset cs, String rcn) { - super(cs, rcn); - } - - private static boolean isNotContinuation(int b) { - return (b & 0xc0) != 0x80; - } - - private static boolean isMalformed3(int b1, int b2, int b3) { - return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || - (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; - } - - private static boolean isMalformed3_2(int b1, int b2) { - return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || - (b2 & 0xc0) != 0x80; - } - - private static boolean isMalformed4(int b2, int b3, int b4) { - return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || - (b4 & 0xc0) != 0x80; - } - - private static boolean isMalformed4_2(int b1, int b2) { - return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || - (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || - (b2 & 0xc0) != 0x80; - } - - private static boolean isMalformed4_3(int b3) { - return (b3 & 0xc0) != 0x80; - } - - // for nb == 3/4 - private static int malformedN(byte[] src, int sp, int nb) { - if (nb == 3) { - int b1 = src[sp++]; - int b2 = src[sp++]; // no need to lookup b3 - return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || - isNotContinuation(b2)) ? 1 : 2; - } else if (nb == 4) { // we don't care the speed here - int b1 = src[sp++] & 0xff; - int b2 = src[sp++] & 0xff; - if (b1 > 0xf4 || - (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || - (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || - isNotContinuation(b2)) - return 1; - if (isNotContinuation(src[sp++])) - return 2; - return 3; - } - assert false; - return -1; - } - - private static char repl = '\ufffd'; - - StringCoding.Result decode(byte[] src, int sp, int len) { - return decode(src, sp, len, result); - } - - static StringCoding.Result decode(byte[] src, int sp, int len, - StringCoding.Result ret) { - int sl = sp + len; - byte[] dst = new byte[len]; - int dp = 0; - if (COMPACT_STRINGS) { // Latin1 only loop - while (sp < sl) { - int b1 = src[sp]; - if (b1 >= 0) { - dst[dp++] = (byte)b1; - sp++; - continue; - } - if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) && - sp + 1 < sl) { - int b2 = src[sp + 1]; - if (!isNotContinuation(b2)) { - dst[dp++] = (byte)(((b1 << 6) ^ b2)^ - (((byte) 0xC0 << 6) ^ - ((byte) 0x80 << 0))); - sp += 2; - continue; - } - } - // anything not a latin1, including the repl - // we have to go with the utf16 - break; - } - if (sp == sl) { - if (dp != dst.length) { - dst = Arrays.copyOf(dst, dp); - } - return ret.with(dst, LATIN1); - } - } - if (dp == 0) { - dst = new byte[len << 1]; - } else { - byte[] buf = new byte[len << 1]; - StringLatin1.inflate(dst, 0, buf, 0, dp); - dst = buf; - } - while (sp < sl) { - int b1 = src[sp++]; - if (b1 >= 0) { - putChar(dst, dp++, (char) b1); - } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { - if (sp < sl) { - int b2 = src[sp++]; - if (isNotContinuation(b2)) { - putChar(dst, dp++, repl); - sp--; - } else { - putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^ - (((byte) 0xC0 << 6) ^ - ((byte) 0x80 << 0)))); - } - continue; - } - putChar(dst, dp++, repl); - break; - } else if ((b1 >> 4) == -2) { - if (sp + 1 < sl) { - int b2 = src[sp++]; - int b3 = src[sp++]; - if (isMalformed3(b1, b2, b3)) { - putChar(dst, dp++, repl); - sp -= 3; - sp += malformedN(src, sp, 3); - } else { - char c = (char)((b1 << 12) ^ - (b2 << 6) ^ - (b3 ^ - (((byte) 0xE0 << 12) ^ - ((byte) 0x80 << 6) ^ - ((byte) 0x80 << 0)))); - putChar(dst, dp++, isSurrogate(c) ? repl : c); - } - continue; - } - if (sp < sl && isMalformed3_2(b1, src[sp])) { - putChar(dst, dp++, repl); - continue; - } - putChar(dst, dp++, repl); - break; - } else if ((b1 >> 3) == -2) { - if (sp + 2 < sl) { - int b2 = src[sp++]; - int b3 = src[sp++]; - int b4 = src[sp++]; - int uc = ((b1 << 18) ^ - (b2 << 12) ^ - (b3 << 6) ^ - (b4 ^ - (((byte) 0xF0 << 18) ^ - ((byte) 0x80 << 12) ^ - ((byte) 0x80 << 6) ^ - ((byte) 0x80 << 0)))); - if (isMalformed4(b2, b3, b4) || - !isSupplementaryCodePoint(uc)) { // shortest form check - putChar(dst, dp++, repl); - sp -= 4; - sp += malformedN(src, sp, 4); - } else { - putChar(dst, dp++, highSurrogate(uc)); - putChar(dst, dp++, lowSurrogate(uc)); - } - continue; - } - b1 &= 0xff; - if (b1 > 0xf4 || - sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) { - putChar(dst, dp++, repl); - continue; - } - sp++; - putChar(dst, dp++, repl); - if (sp < sl && isMalformed4_3(src[sp])) { - continue; - } - break; - } else { - putChar(dst, dp++, repl); - } - } - if (dp != len) { - dst = Arrays.copyOf(dst, dp << 1); - } - return ret.with(dst, UTF16); - } -}