--- old/src/jdk.charsets/share/classes/sun/nio/cs/ext/DoubleByte.java 2015-02-16 10:41:15.000000000 -0800 +++ /dev/null 2014-05-19 13:38:05.886368312 -0700 @@ -1,929 +0,0 @@ -/* - * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package sun.nio.cs.ext; - -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CoderResult; -import java.util.Arrays; -import sun.nio.cs.Surrogate; -import sun.nio.cs.ArrayDecoder; -import sun.nio.cs.ArrayEncoder; -import static sun.nio.cs.CharsetMapping.*; - -/* - * Four types of "DoubleByte" charsets are implemented in this class - * (1)DoubleByte - * The "mostly widely used" multibyte charset, a combination of - * a singlebyte character set (usually the ASCII charset) and a - * doublebyte character set. The codepoint values of singlebyte - * and doublebyte don't overlap. Microsoft's multibyte charsets - * and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943, - * 948, 949 and 950 are such charsets. - * - * (2)DoubleByte_EBCDIC - * IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch) - * in and out between the singlebyte character set and doublebyte - * character set. - * - * (3)DoubleByte_SIMPLE_EUC - * It's a "simple" form of EUC encoding scheme, only have the - * singlebyte character set G0 and one doublebyte character set - * G1 are defined, G2 (with SS2) and G3 (with SS3) are not used. - * So it is actually the same as the "typical" type (1) mentioned - * above, except it return "malformed" for the SS2 and SS3 when - * decoding. - * - * (4)DoubleByte ONLY - * A "pure" doublebyte only character set. From implementation - * point of view, this is the type (1) with "decodeSingle" always - * returns unmappable. - * - * For simplicity, all implementations share the same decoding and - * encoding data structure. - * - * Decoding: - * - * char[][] b2c; - * char[] b2cSB; - * int b2Min, b2Max - * - * public char decodeSingle(int b) { - * return b2cSB.[b]; - * } - * - * public char decodeDouble(int b1, int b2) { - * if (b2 < b2Min || b2 > b2Max) - * return UNMAPPABLE_DECODING; - * return b2c[b1][b2 - b2Min]; - * } - * - * (1)b2Min, b2Max are the corresponding min and max value of the - * low-half of the double-byte. - * (2)The high 8-bit/b1 of the double-byte are used to indexed into - * b2c array. - * - * Encoding: - * - * char[] c2b; - * char[] c2bIndex; - * - * public int encodeChar(char ch) { - * return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; - * } - * - */ - -public class DoubleByte { - - public final static char[] B2C_UNMAPPABLE; - static { - B2C_UNMAPPABLE = new char[0x100]; - Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING); - } - - public static class Decoder extends CharsetDecoder - implements DelegatableDecoder, ArrayDecoder - { - final char[][] b2c; - final char[] b2cSB; - final int b2Min; - final int b2Max; - - // for SimpleEUC override - protected CoderResult crMalformedOrUnderFlow(int b) { - return CoderResult.UNDERFLOW; - } - - protected CoderResult crMalformedOrUnmappable(int b1, int b2) { - if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1) - b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2) - decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2) - return CoderResult.malformedForLength(1); - } - return CoderResult.unmappableForLength(2); - } - - Decoder(Charset cs, float avgcpb, float maxcpb, - char[][] b2c, char[] b2cSB, - int b2Min, int b2Max) { - super(cs, avgcpb, maxcpb); - this.b2c = b2c; - this.b2cSB = b2cSB; - this.b2Min = b2Min; - this.b2Max = b2Max; - } - - Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { - this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max); - } - - protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { - byte[] sa = src.array(); - int sp = src.arrayOffset() + src.position(); - int sl = src.arrayOffset() + src.limit(); - - char[] da = dst.array(); - int dp = dst.arrayOffset() + dst.position(); - int dl = dst.arrayOffset() + dst.limit(); - - try { - while (sp < sl && dp < dl) { - // inline the decodeSingle/Double() for better performance - int inSize = 1; - int b1 = sa[sp] & 0xff; - char c = b2cSB[b1]; - if (c == UNMAPPABLE_DECODING) { - if (sl - sp < 2) - return crMalformedOrUnderFlow(b1); - int b2 = sa[sp + 1] & 0xff; - if (b2 < b2Min || b2 > b2Max || - (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { - return crMalformedOrUnmappable(b1, b2); - } - inSize++; - } - da[dp++] = c; - sp += inSize; - } - return (sp >= sl) ? CoderResult.UNDERFLOW - : CoderResult.OVERFLOW; - } finally { - src.position(sp - src.arrayOffset()); - dst.position(dp - dst.arrayOffset()); - } - } - - protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { - int mark = src.position(); - try { - - while (src.hasRemaining() && dst.hasRemaining()) { - int b1 = src.get() & 0xff; - char c = b2cSB[b1]; - int inSize = 1; - if (c == UNMAPPABLE_DECODING) { - if (src.remaining() < 1) - return crMalformedOrUnderFlow(b1); - int b2 = src.get() & 0xff; - if (b2 < b2Min || b2 > b2Max || - (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) - return crMalformedOrUnmappable(b1, b2); - inSize++; - } - dst.put(c); - mark += inSize; - } - return src.hasRemaining()? CoderResult.OVERFLOW - : CoderResult.UNDERFLOW; - } finally { - src.position(mark); - } - } - - // Make some protected methods public for use by JISAutoDetect - public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { - if (src.hasArray() && dst.hasArray()) - return decodeArrayLoop(src, dst); - else - return decodeBufferLoop(src, dst); - } - - public int decode(byte[] src, int sp, int len, char[] dst) { - int dp = 0; - int sl = sp + len; - char repl = replacement().charAt(0); - while (sp < sl) { - int b1 = src[sp++] & 0xff; - char c = b2cSB[b1]; - if (c == UNMAPPABLE_DECODING) { - if (sp < sl) { - int b2 = src[sp++] & 0xff; - if (b2 < b2Min || b2 > b2Max || - (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { - if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte - b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte - decodeSingle(b2) != UNMAPPABLE_DECODING) { - sp--; - } - } - } - if (c == UNMAPPABLE_DECODING) { - c = repl; - } - } - dst[dp++] = c; - } - return dp; - } - - public void implReset() { - super.implReset(); - } - - public CoderResult implFlush(CharBuffer out) { - return super.implFlush(out); - } - - // decode loops are not using decodeSingle/Double() for performance - // reason. - public char decodeSingle(int b) { - return b2cSB[b]; - } - - public char decodeDouble(int b1, int b2) { - if (b1 < 0 || b1 > b2c.length || - b2 < b2Min || b2 > b2Max) - return UNMAPPABLE_DECODING; - return b2c[b1][b2 - b2Min]; - } - } - - // IBM_EBCDIC_DBCS - public static class Decoder_EBCDIC extends Decoder { - private static final int SBCS = 0; - private static final int DBCS = 1; - private static final int SO = 0x0e; - private static final int SI = 0x0f; - private int currentState; - - Decoder_EBCDIC(Charset cs, - char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { - super(cs, b2c, b2cSB, b2Min, b2Max); - } - - public void implReset() { - currentState = SBCS; - } - - // Check validity of dbcs ebcdic byte pair values - // - // First byte : 0x41 -- 0xFE - // Second byte: 0x41 -- 0xFE - // Doublebyte blank: 0x4040 - // - // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io - // as - // if ((b1 != 0x40 || b2 != 0x40) && - // (b2 < 0x41 || b2 > 0xfe)) {...} - // is not correct/complete (range check for b1) - // - private static boolean isDoubleByte(int b1, int b2) { - return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe) - || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE - } - - protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { - byte[] sa = src.array(); - int sp = src.arrayOffset() + src.position(); - int sl = src.arrayOffset() + src.limit(); - char[] da = dst.array(); - int dp = dst.arrayOffset() + dst.position(); - int dl = dst.arrayOffset() + dst.limit(); - - try { - // don't check dp/dl together here, it's possible to - // decdoe a SO/SI without space in output buffer. - while (sp < sl) { - int b1 = sa[sp] & 0xff; - int inSize = 1; - if (b1 == SO) { // Shift out - if (currentState != SBCS) - return CoderResult.malformedForLength(1); - else - currentState = DBCS; - } else if (b1 == SI) { - if (currentState != DBCS) - return CoderResult.malformedForLength(1); - else - currentState = SBCS; - } else { - char c = UNMAPPABLE_DECODING; - if (currentState == SBCS) { - c = b2cSB[b1]; - if (c == UNMAPPABLE_DECODING) - return CoderResult.unmappableForLength(1); - } else { - if (sl - sp < 2) - return CoderResult.UNDERFLOW; - int b2 = sa[sp + 1] & 0xff; - if (b2 < b2Min || b2 > b2Max || - (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { - if (!isDoubleByte(b1, b2)) - return CoderResult.malformedForLength(2); - return CoderResult.unmappableForLength(2); - } - inSize++; - } - if (dl - dp < 1) - return CoderResult.OVERFLOW; - - da[dp++] = c; - } - sp += inSize; - } - return CoderResult.UNDERFLOW; - } finally { - src.position(sp - src.arrayOffset()); - dst.position(dp - dst.arrayOffset()); - } - } - - protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { - int mark = src.position(); - try { - while (src.hasRemaining()) { - int b1 = src.get() & 0xff; - int inSize = 1; - if (b1 == SO) { // Shift out - if (currentState != SBCS) - return CoderResult.malformedForLength(1); - else - currentState = DBCS; - } else if (b1 == SI) { - if (currentState != DBCS) - return CoderResult.malformedForLength(1); - else - currentState = SBCS; - } else { - char c = UNMAPPABLE_DECODING; - if (currentState == SBCS) { - c = b2cSB[b1]; - if (c == UNMAPPABLE_DECODING) - return CoderResult.unmappableForLength(1); - } else { - if (src.remaining() < 1) - return CoderResult.UNDERFLOW; - int b2 = src.get()&0xff; - if (b2 < b2Min || b2 > b2Max || - (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { - if (!isDoubleByte(b1, b2)) - return CoderResult.malformedForLength(2); - return CoderResult.unmappableForLength(2); - } - inSize++; - } - - if (dst.remaining() < 1) - return CoderResult.OVERFLOW; - - dst.put(c); - } - mark += inSize; - } - return CoderResult.UNDERFLOW; - } finally { - src.position(mark); - } - } - - public int decode(byte[] src, int sp, int len, char[] dst) { - int dp = 0; - int sl = sp + len; - currentState = SBCS; - char repl = replacement().charAt(0); - while (sp < sl) { - int b1 = src[sp++] & 0xff; - if (b1 == SO) { // Shift out - if (currentState != SBCS) - dst[dp++] = repl; - else - currentState = DBCS; - } else if (b1 == SI) { - if (currentState != DBCS) - dst[dp++] = repl; - else - currentState = SBCS; - } else { - char c = UNMAPPABLE_DECODING; - if (currentState == SBCS) { - c = b2cSB[b1]; - if (c == UNMAPPABLE_DECODING) - c = repl; - } else { - if (sl == sp) { - c = repl; - } else { - int b2 = src[sp++] & 0xff; - if (b2 < b2Min || b2 > b2Max || - (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { - c = repl; - } - } - } - dst[dp++] = c; - } - } - return dp; - } - } - - // DBCS_ONLY - public static class Decoder_DBCSONLY extends Decoder { - static final char[] b2cSB_UNMAPPABLE; - static { - b2cSB_UNMAPPABLE = new char[0x100]; - Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING); - } - Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { - super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max); - } - } - - // EUC_SIMPLE - // The only thing we need to "override" is to check SS2/SS3 and - // return "malformed" if found - public static class Decoder_EUC_SIM extends Decoder { - private final int SS2 = 0x8E; - private final int SS3 = 0x8F; - - Decoder_EUC_SIM(Charset cs, - char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { - super(cs, b2c, b2cSB, b2Min, b2Max); - } - - // No support provided for G2/G3 for SimpleEUC - protected CoderResult crMalformedOrUnderFlow(int b) { - if (b == SS2 || b == SS3 ) - return CoderResult.malformedForLength(1); - return CoderResult.UNDERFLOW; - } - - protected CoderResult crMalformedOrUnmappable(int b1, int b2) { - if (b1 == SS2 || b1 == SS3 ) - return CoderResult.malformedForLength(1); - return CoderResult.unmappableForLength(2); - } - - public int decode(byte[] src, int sp, int len, char[] dst) { - int dp = 0; - int sl = sp + len; - char repl = replacement().charAt(0); - while (sp < sl) { - int b1 = src[sp++] & 0xff; - char c = b2cSB[b1]; - if (c == UNMAPPABLE_DECODING) { - if (sp < sl) { - int b2 = src[sp++] & 0xff; - if (b2 < b2Min || b2 > b2Max || - (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { - if (b1 == SS2 || b1 == SS3) { - sp--; - } - c = repl; - } - } else { - c = repl; - } - } - dst[dp++] = c; - } - return dp; - } - } - - public static class Encoder extends CharsetEncoder - implements ArrayEncoder - { - final int MAX_SINGLEBYTE = 0xff; - private final char[] c2b; - private final char[] c2bIndex; - Surrogate.Parser sgp; - - protected Encoder(Charset cs, char[] c2b, char[] c2bIndex) { - super(cs, 2.0f, 2.0f); - this.c2b = c2b; - this.c2bIndex = c2bIndex; - } - - Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex) { - super(cs, avg, max, repl); - this.c2b = c2b; - this.c2bIndex = c2bIndex; - } - - public boolean canEncode(char c) { - return encodeChar(c) != UNMAPPABLE_ENCODING; - } - - Surrogate.Parser sgp() { - if (sgp == null) - sgp = new Surrogate.Parser(); - return sgp; - } - - protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { - char[] sa = src.array(); - int sp = src.arrayOffset() + src.position(); - int sl = src.arrayOffset() + src.limit(); - - byte[] da = dst.array(); - int dp = dst.arrayOffset() + dst.position(); - int dl = dst.arrayOffset() + dst.limit(); - - try { - while (sp < sl) { - char c = sa[sp]; - int bb = encodeChar(c); - if (bb == UNMAPPABLE_ENCODING) { - if (Character.isSurrogate(c)) { - if (sgp().parse(c, sa, sp, sl) < 0) - return sgp.error(); - return sgp.unmappableResult(); - } - return CoderResult.unmappableForLength(1); - } - - if (bb > MAX_SINGLEBYTE) { // DoubleByte - if (dl - dp < 2) - return CoderResult.OVERFLOW; - da[dp++] = (byte)(bb >> 8); - da[dp++] = (byte)bb; - } else { // SingleByte - if (dl - dp < 1) - return CoderResult.OVERFLOW; - da[dp++] = (byte)bb; - } - - sp++; - } - return CoderResult.UNDERFLOW; - } finally { - src.position(sp - src.arrayOffset()); - dst.position(dp - dst.arrayOffset()); - } - } - - protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { - int mark = src.position(); - try { - while (src.hasRemaining()) { - char c = src.get(); - int bb = encodeChar(c); - if (bb == UNMAPPABLE_ENCODING) { - if (Character.isSurrogate(c)) { - if (sgp().parse(c, src) < 0) - return sgp.error(); - return sgp.unmappableResult(); - } - return CoderResult.unmappableForLength(1); - } - if (bb > MAX_SINGLEBYTE) { // DoubleByte - if (dst.remaining() < 2) - return CoderResult.OVERFLOW; - dst.put((byte)(bb >> 8)); - dst.put((byte)(bb)); - } else { - if (dst.remaining() < 1) - return CoderResult.OVERFLOW; - dst.put((byte)bb); - } - mark++; - } - return CoderResult.UNDERFLOW; - } finally { - src.position(mark); - } - } - - protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { - if (src.hasArray() && dst.hasArray()) - return encodeArrayLoop(src, dst); - else - return encodeBufferLoop(src, dst); - } - - protected byte[] repl = replacement(); - protected void implReplaceWith(byte[] newReplacement) { - repl = newReplacement; - } - - public int encode(char[] src, int sp, int len, byte[] dst) { - int dp = 0; - int sl = sp + len; - int dl = dst.length; - while (sp < sl) { - char c = src[sp++]; - int bb = encodeChar(c); - if (bb == UNMAPPABLE_ENCODING) { - if (Character.isHighSurrogate(c) && sp < sl && - Character.isLowSurrogate(src[sp])) { - sp++; - } - dst[dp++] = repl[0]; - if (repl.length > 1) - dst[dp++] = repl[1]; - continue; - } //else - if (bb > MAX_SINGLEBYTE) { // DoubleByte - dst[dp++] = (byte)(bb >> 8); - dst[dp++] = (byte)bb; - } else { // SingleByte - dst[dp++] = (byte)bb; - } - - } - return dp; - } - - public int encodeChar(char ch) { - return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; - } - - // init the c2b and c2bIndex tables from b2c. - static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, - int b2Min, int b2Max, - char[] c2b, char[] c2bIndex) - { - Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); - int off = 0x100; - - char[][] b2c_ca = new char[b2c.length][]; - char[] b2cSB_ca = null; - if (b2cSB != null) - b2cSB_ca = b2cSB.toCharArray(); - - for (int i = 0; i < b2c.length; i++) { - if (b2c[i] == null) - continue; - b2c_ca[i] = b2c[i].toCharArray(); - } - - if (b2cNR != null) { - int j = 0; - while (j < b2cNR.length()) { - char b = b2cNR.charAt(j++); - char c = b2cNR.charAt(j++); - if (b < 0x100 && b2cSB_ca != null) { - if (b2cSB_ca[b] == c) - b2cSB_ca[b] = UNMAPPABLE_DECODING; - } else { - if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c) - b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING; - } - } - } - - if (b2cSB_ca != null) { // SingleByte - for (int b = 0; b < b2cSB_ca.length; b++) { - char c = b2cSB_ca[b]; - if (c == UNMAPPABLE_DECODING) - continue; - int index = c2bIndex[c >> 8]; - if (index == 0) { - index = off; - off += 0x100; - c2bIndex[c >> 8] = (char)index; - } - c2b[index + (c & 0xff)] = (char)b; - } - } - - for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte - char[] db = b2c_ca[b1]; - if (db == null) - continue; - for (int b2 = b2Min; b2 <= b2Max; b2++) { - char c = db[b2 - b2Min]; - if (c == UNMAPPABLE_DECODING) - continue; - int index = c2bIndex[c >> 8]; - if (index == 0) { - index = off; - off += 0x100; - c2bIndex[c >> 8] = (char)index; - } - c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2); - } - } - - if (c2bNR != null) { - // add c->b only nr entries - for (int i = 0; i < c2bNR.length(); i += 2) { - char b = c2bNR.charAt(i); - char c = c2bNR.charAt(i + 1); - int index = (c >> 8); - if (c2bIndex[index] == 0) { - c2bIndex[index] = (char)off; - off += 0x100; - } - index = c2bIndex[index] + (c & 0xff); - c2b[index] = b; - } - } - } - } - - public static class Encoder_DBCSONLY extends Encoder { - Encoder_DBCSONLY(Charset cs, byte[] repl, - char[] c2b, char[] c2bIndex) { - super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex); - } - - public int encodeChar(char ch) { - int bb = super.encodeChar(ch); - if (bb <= MAX_SINGLEBYTE) - return UNMAPPABLE_ENCODING; - return bb; - } - } - - - - public static class Encoder_EBCDIC extends Encoder { - static final int SBCS = 0; - static final int DBCS = 1; - static final byte SO = 0x0e; - static final byte SI = 0x0f; - - protected int currentState = SBCS; - - Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex) { - super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex); - } - - protected void implReset() { - currentState = SBCS; - } - - protected CoderResult implFlush(ByteBuffer out) { - if (currentState == DBCS) { - if (out.remaining() < 1) - return CoderResult.OVERFLOW; - out.put(SI); - } - implReset(); - return CoderResult.UNDERFLOW; - } - - protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { - char[] sa = src.array(); - int sp = src.arrayOffset() + src.position(); - int sl = src.arrayOffset() + src.limit(); - byte[] da = dst.array(); - int dp = dst.arrayOffset() + dst.position(); - int dl = dst.arrayOffset() + dst.limit(); - - try { - while (sp < sl) { - char c = sa[sp]; - int bb = encodeChar(c); - if (bb == UNMAPPABLE_ENCODING) { - if (Character.isSurrogate(c)) { - if (sgp().parse(c, sa, sp, sl) < 0) - return sgp.error(); - return sgp.unmappableResult(); - } - return CoderResult.unmappableForLength(1); - } - if (bb > MAX_SINGLEBYTE) { // DoubleByte - if (currentState == SBCS) { - if (dl - dp < 1) - return CoderResult.OVERFLOW; - currentState = DBCS; - da[dp++] = SO; - } - if (dl - dp < 2) - return CoderResult.OVERFLOW; - da[dp++] = (byte)(bb >> 8); - da[dp++] = (byte)bb; - } else { // SingleByte - if (currentState == DBCS) { - if (dl - dp < 1) - return CoderResult.OVERFLOW; - currentState = SBCS; - da[dp++] = SI; - } - if (dl - dp < 1) - return CoderResult.OVERFLOW; - da[dp++] = (byte)bb; - - } - sp++; - } - return CoderResult.UNDERFLOW; - } finally { - src.position(sp - src.arrayOffset()); - dst.position(dp - dst.arrayOffset()); - } - } - - protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { - int mark = src.position(); - try { - while (src.hasRemaining()) { - char c = src.get(); - int bb = encodeChar(c); - if (bb == UNMAPPABLE_ENCODING) { - if (Character.isSurrogate(c)) { - if (sgp().parse(c, src) < 0) - return sgp.error(); - return sgp.unmappableResult(); - } - return CoderResult.unmappableForLength(1); - } - if (bb > MAX_SINGLEBYTE) { // DoubleByte - if (currentState == SBCS) { - if (dst.remaining() < 1) - return CoderResult.OVERFLOW; - currentState = DBCS; - dst.put(SO); - } - if (dst.remaining() < 2) - return CoderResult.OVERFLOW; - dst.put((byte)(bb >> 8)); - dst.put((byte)(bb)); - } else { // Single-byte - if (currentState == DBCS) { - if (dst.remaining() < 1) - return CoderResult.OVERFLOW; - currentState = SBCS; - dst.put(SI); - } - if (dst.remaining() < 1) - return CoderResult.OVERFLOW; - dst.put((byte)bb); - } - mark++; - } - return CoderResult.UNDERFLOW; - } finally { - src.position(mark); - } - } - - public int encode(char[] src, int sp, int len, byte[] dst) { - int dp = 0; - int sl = sp + len; - while (sp < sl) { - char c = src[sp++]; - int bb = encodeChar(c); - - if (bb == UNMAPPABLE_ENCODING) { - if (Character.isHighSurrogate(c) && sp < sl && - Character.isLowSurrogate(src[sp])) { - sp++; - } - dst[dp++] = repl[0]; - if (repl.length > 1) - dst[dp++] = repl[1]; - continue; - } //else - if (bb > MAX_SINGLEBYTE) { // DoubleByte - if (currentState == SBCS) { - currentState = DBCS; - dst[dp++] = SO; - } - dst[dp++] = (byte)(bb >> 8); - dst[dp++] = (byte)bb; - } else { // SingleByte - if (currentState == DBCS) { - currentState = SBCS; - dst[dp++] = SI; - } - dst[dp++] = (byte)bb; - } - } - - if (currentState == DBCS) { - currentState = SBCS; - dst[dp++] = SI; - } - return dp; - } - } - - // EUC_SIMPLE - public static class Encoder_EUC_SIM extends Encoder { - Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex) { - super(cs, c2b, c2bIndex); - } - } - -} --- /dev/null 2014-05-19 13:38:05.886368312 -0700 +++ new/src/java.base/share/classes/sun/nio/cs/DoubleByte.java 2015-02-16 10:41:15.000000000 -0800 @@ -0,0 +1,929 @@ +/* + * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package sun.nio.cs; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.util.Arrays; +import sun.nio.cs.Surrogate; +import sun.nio.cs.ArrayDecoder; +import sun.nio.cs.ArrayEncoder; +import static sun.nio.cs.CharsetMapping.*; + +/* + * Four types of "DoubleByte" charsets are implemented in this class + * (1)DoubleByte + * The "mostly widely used" multibyte charset, a combination of + * a singlebyte character set (usually the ASCII charset) and a + * doublebyte character set. The codepoint values of singlebyte + * and doublebyte don't overlap. Microsoft's multibyte charsets + * and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943, + * 948, 949 and 950 are such charsets. + * + * (2)DoubleByte_EBCDIC + * IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch) + * in and out between the singlebyte character set and doublebyte + * character set. + * + * (3)DoubleByte_SIMPLE_EUC + * It's a "simple" form of EUC encoding scheme, only have the + * singlebyte character set G0 and one doublebyte character set + * G1 are defined, G2 (with SS2) and G3 (with SS3) are not used. + * So it is actually the same as the "typical" type (1) mentioned + * above, except it return "malformed" for the SS2 and SS3 when + * decoding. + * + * (4)DoubleByte ONLY + * A "pure" doublebyte only character set. From implementation + * point of view, this is the type (1) with "decodeSingle" always + * returns unmappable. + * + * For simplicity, all implementations share the same decoding and + * encoding data structure. + * + * Decoding: + * + * char[][] b2c; + * char[] b2cSB; + * int b2Min, b2Max + * + * public char decodeSingle(int b) { + * return b2cSB.[b]; + * } + * + * public char decodeDouble(int b1, int b2) { + * if (b2 < b2Min || b2 > b2Max) + * return UNMAPPABLE_DECODING; + * return b2c[b1][b2 - b2Min]; + * } + * + * (1)b2Min, b2Max are the corresponding min and max value of the + * low-half of the double-byte. + * (2)The high 8-bit/b1 of the double-byte are used to indexed into + * b2c array. + * + * Encoding: + * + * char[] c2b; + * char[] c2bIndex; + * + * public int encodeChar(char ch) { + * return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; + * } + * + */ + +public class DoubleByte { + + public final static char[] B2C_UNMAPPABLE; + static { + B2C_UNMAPPABLE = new char[0x100]; + Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING); + } + + public static class Decoder extends CharsetDecoder + implements DelegatableDecoder, ArrayDecoder + { + final char[][] b2c; + final char[] b2cSB; + final int b2Min; + final int b2Max; + + // for SimpleEUC override + protected CoderResult crMalformedOrUnderFlow(int b) { + return CoderResult.UNDERFLOW; + } + + protected CoderResult crMalformedOrUnmappable(int b1, int b2) { + if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1) + b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2) + decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2) + return CoderResult.malformedForLength(1); + } + return CoderResult.unmappableForLength(2); + } + + public Decoder(Charset cs, float avgcpb, float maxcpb, + char[][] b2c, char[] b2cSB, + int b2Min, int b2Max) { + super(cs, avgcpb, maxcpb); + this.b2c = b2c; + this.b2cSB = b2cSB; + this.b2Min = b2Min; + this.b2Max = b2Max; + } + + public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { + this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max); + } + + protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { + byte[] sa = src.array(); + int sp = src.arrayOffset() + src.position(); + int sl = src.arrayOffset() + src.limit(); + + char[] da = dst.array(); + int dp = dst.arrayOffset() + dst.position(); + int dl = dst.arrayOffset() + dst.limit(); + + try { + while (sp < sl && dp < dl) { + // inline the decodeSingle/Double() for better performance + int inSize = 1; + int b1 = sa[sp] & 0xff; + char c = b2cSB[b1]; + if (c == UNMAPPABLE_DECODING) { + if (sl - sp < 2) + return crMalformedOrUnderFlow(b1); + int b2 = sa[sp + 1] & 0xff; + if (b2 < b2Min || b2 > b2Max || + (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { + return crMalformedOrUnmappable(b1, b2); + } + inSize++; + } + da[dp++] = c; + sp += inSize; + } + return (sp >= sl) ? CoderResult.UNDERFLOW + : CoderResult.OVERFLOW; + } finally { + src.position(sp - src.arrayOffset()); + dst.position(dp - dst.arrayOffset()); + } + } + + protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { + int mark = src.position(); + try { + + while (src.hasRemaining() && dst.hasRemaining()) { + int b1 = src.get() & 0xff; + char c = b2cSB[b1]; + int inSize = 1; + if (c == UNMAPPABLE_DECODING) { + if (src.remaining() < 1) + return crMalformedOrUnderFlow(b1); + int b2 = src.get() & 0xff; + if (b2 < b2Min || b2 > b2Max || + (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) + return crMalformedOrUnmappable(b1, b2); + inSize++; + } + dst.put(c); + mark += inSize; + } + return src.hasRemaining()? CoderResult.OVERFLOW + : CoderResult.UNDERFLOW; + } finally { + src.position(mark); + } + } + + // Make some protected methods public for use by JISAutoDetect + public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { + if (src.hasArray() && dst.hasArray()) + return decodeArrayLoop(src, dst); + else + return decodeBufferLoop(src, dst); + } + + public int decode(byte[] src, int sp, int len, char[] dst) { + int dp = 0; + int sl = sp + len; + char repl = replacement().charAt(0); + while (sp < sl) { + int b1 = src[sp++] & 0xff; + char c = b2cSB[b1]; + if (c == UNMAPPABLE_DECODING) { + if (sp < sl) { + int b2 = src[sp++] & 0xff; + if (b2 < b2Min || b2 > b2Max || + (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { + if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte + b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte + decodeSingle(b2) != UNMAPPABLE_DECODING) { + sp--; + } + } + } + if (c == UNMAPPABLE_DECODING) { + c = repl; + } + } + dst[dp++] = c; + } + return dp; + } + + public void implReset() { + super.implReset(); + } + + public CoderResult implFlush(CharBuffer out) { + return super.implFlush(out); + } + + // decode loops are not using decodeSingle/Double() for performance + // reason. + public char decodeSingle(int b) { + return b2cSB[b]; + } + + public char decodeDouble(int b1, int b2) { + if (b1 < 0 || b1 > b2c.length || + b2 < b2Min || b2 > b2Max) + return UNMAPPABLE_DECODING; + return b2c[b1][b2 - b2Min]; + } + } + + // IBM_EBCDIC_DBCS + public static class Decoder_EBCDIC extends Decoder { + private static final int SBCS = 0; + private static final int DBCS = 1; + private static final int SO = 0x0e; + private static final int SI = 0x0f; + private int currentState; + + public Decoder_EBCDIC(Charset cs, + char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { + super(cs, b2c, b2cSB, b2Min, b2Max); + } + + public void implReset() { + currentState = SBCS; + } + + // Check validity of dbcs ebcdic byte pair values + // + // First byte : 0x41 -- 0xFE + // Second byte: 0x41 -- 0xFE + // Doublebyte blank: 0x4040 + // + // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io + // as + // if ((b1 != 0x40 || b2 != 0x40) && + // (b2 < 0x41 || b2 > 0xfe)) {...} + // is not correct/complete (range check for b1) + // + private static boolean isDoubleByte(int b1, int b2) { + return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe) + || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE + } + + protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { + byte[] sa = src.array(); + int sp = src.arrayOffset() + src.position(); + int sl = src.arrayOffset() + src.limit(); + char[] da = dst.array(); + int dp = dst.arrayOffset() + dst.position(); + int dl = dst.arrayOffset() + dst.limit(); + + try { + // don't check dp/dl together here, it's possible to + // decdoe a SO/SI without space in output buffer. + while (sp < sl) { + int b1 = sa[sp] & 0xff; + int inSize = 1; + if (b1 == SO) { // Shift out + if (currentState != SBCS) + return CoderResult.malformedForLength(1); + else + currentState = DBCS; + } else if (b1 == SI) { + if (currentState != DBCS) + return CoderResult.malformedForLength(1); + else + currentState = SBCS; + } else { + char c = UNMAPPABLE_DECODING; + if (currentState == SBCS) { + c = b2cSB[b1]; + if (c == UNMAPPABLE_DECODING) + return CoderResult.unmappableForLength(1); + } else { + if (sl - sp < 2) + return CoderResult.UNDERFLOW; + int b2 = sa[sp + 1] & 0xff; + if (b2 < b2Min || b2 > b2Max || + (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { + if (!isDoubleByte(b1, b2)) + return CoderResult.malformedForLength(2); + return CoderResult.unmappableForLength(2); + } + inSize++; + } + if (dl - dp < 1) + return CoderResult.OVERFLOW; + + da[dp++] = c; + } + sp += inSize; + } + return CoderResult.UNDERFLOW; + } finally { + src.position(sp - src.arrayOffset()); + dst.position(dp - dst.arrayOffset()); + } + } + + protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { + int mark = src.position(); + try { + while (src.hasRemaining()) { + int b1 = src.get() & 0xff; + int inSize = 1; + if (b1 == SO) { // Shift out + if (currentState != SBCS) + return CoderResult.malformedForLength(1); + else + currentState = DBCS; + } else if (b1 == SI) { + if (currentState != DBCS) + return CoderResult.malformedForLength(1); + else + currentState = SBCS; + } else { + char c = UNMAPPABLE_DECODING; + if (currentState == SBCS) { + c = b2cSB[b1]; + if (c == UNMAPPABLE_DECODING) + return CoderResult.unmappableForLength(1); + } else { + if (src.remaining() < 1) + return CoderResult.UNDERFLOW; + int b2 = src.get()&0xff; + if (b2 < b2Min || b2 > b2Max || + (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { + if (!isDoubleByte(b1, b2)) + return CoderResult.malformedForLength(2); + return CoderResult.unmappableForLength(2); + } + inSize++; + } + + if (dst.remaining() < 1) + return CoderResult.OVERFLOW; + + dst.put(c); + } + mark += inSize; + } + return CoderResult.UNDERFLOW; + } finally { + src.position(mark); + } + } + + public int decode(byte[] src, int sp, int len, char[] dst) { + int dp = 0; + int sl = sp + len; + currentState = SBCS; + char repl = replacement().charAt(0); + while (sp < sl) { + int b1 = src[sp++] & 0xff; + if (b1 == SO) { // Shift out + if (currentState != SBCS) + dst[dp++] = repl; + else + currentState = DBCS; + } else if (b1 == SI) { + if (currentState != DBCS) + dst[dp++] = repl; + else + currentState = SBCS; + } else { + char c = UNMAPPABLE_DECODING; + if (currentState == SBCS) { + c = b2cSB[b1]; + if (c == UNMAPPABLE_DECODING) + c = repl; + } else { + if (sl == sp) { + c = repl; + } else { + int b2 = src[sp++] & 0xff; + if (b2 < b2Min || b2 > b2Max || + (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { + c = repl; + } + } + } + dst[dp++] = c; + } + } + return dp; + } + } + + // DBCS_ONLY + public static class Decoder_DBCSONLY extends Decoder { + static final char[] b2cSB_UNMAPPABLE; + static { + b2cSB_UNMAPPABLE = new char[0x100]; + Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING); + } + public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { + super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max); + } + } + + // EUC_SIMPLE + // The only thing we need to "override" is to check SS2/SS3 and + // return "malformed" if found + public static class Decoder_EUC_SIM extends Decoder { + private final int SS2 = 0x8E; + private final int SS3 = 0x8F; + + public Decoder_EUC_SIM(Charset cs, + char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { + super(cs, b2c, b2cSB, b2Min, b2Max); + } + + // No support provided for G2/G3 for SimpleEUC + protected CoderResult crMalformedOrUnderFlow(int b) { + if (b == SS2 || b == SS3 ) + return CoderResult.malformedForLength(1); + return CoderResult.UNDERFLOW; + } + + protected CoderResult crMalformedOrUnmappable(int b1, int b2) { + if (b1 == SS2 || b1 == SS3 ) + return CoderResult.malformedForLength(1); + return CoderResult.unmappableForLength(2); + } + + public int decode(byte[] src, int sp, int len, char[] dst) { + int dp = 0; + int sl = sp + len; + char repl = replacement().charAt(0); + while (sp < sl) { + int b1 = src[sp++] & 0xff; + char c = b2cSB[b1]; + if (c == UNMAPPABLE_DECODING) { + if (sp < sl) { + int b2 = src[sp++] & 0xff; + if (b2 < b2Min || b2 > b2Max || + (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { + if (b1 == SS2 || b1 == SS3) { + sp--; + } + c = repl; + } + } else { + c = repl; + } + } + dst[dp++] = c; + } + return dp; + } + } + + public static class Encoder extends CharsetEncoder + implements ArrayEncoder + { + protected final int MAX_SINGLEBYTE = 0xff; + private final char[] c2b; + private final char[] c2bIndex; + protected Surrogate.Parser sgp; + + public Encoder(Charset cs, char[] c2b, char[] c2bIndex) { + super(cs, 2.0f, 2.0f); + this.c2b = c2b; + this.c2bIndex = c2bIndex; + } + + public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex) { + super(cs, avg, max, repl); + this.c2b = c2b; + this.c2bIndex = c2bIndex; + } + + public boolean canEncode(char c) { + return encodeChar(c) != UNMAPPABLE_ENCODING; + } + + protected Surrogate.Parser sgp() { + if (sgp == null) + sgp = new Surrogate.Parser(); + return sgp; + } + + protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { + char[] sa = src.array(); + int sp = src.arrayOffset() + src.position(); + int sl = src.arrayOffset() + src.limit(); + + byte[] da = dst.array(); + int dp = dst.arrayOffset() + dst.position(); + int dl = dst.arrayOffset() + dst.limit(); + + try { + while (sp < sl) { + char c = sa[sp]; + int bb = encodeChar(c); + if (bb == UNMAPPABLE_ENCODING) { + if (Character.isSurrogate(c)) { + if (sgp().parse(c, sa, sp, sl) < 0) + return sgp.error(); + return sgp.unmappableResult(); + } + return CoderResult.unmappableForLength(1); + } + + if (bb > MAX_SINGLEBYTE) { // DoubleByte + if (dl - dp < 2) + return CoderResult.OVERFLOW; + da[dp++] = (byte)(bb >> 8); + da[dp++] = (byte)bb; + } else { // SingleByte + if (dl - dp < 1) + return CoderResult.OVERFLOW; + da[dp++] = (byte)bb; + } + + sp++; + } + return CoderResult.UNDERFLOW; + } finally { + src.position(sp - src.arrayOffset()); + dst.position(dp - dst.arrayOffset()); + } + } + + protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { + int mark = src.position(); + try { + while (src.hasRemaining()) { + char c = src.get(); + int bb = encodeChar(c); + if (bb == UNMAPPABLE_ENCODING) { + if (Character.isSurrogate(c)) { + if (sgp().parse(c, src) < 0) + return sgp.error(); + return sgp.unmappableResult(); + } + return CoderResult.unmappableForLength(1); + } + if (bb > MAX_SINGLEBYTE) { // DoubleByte + if (dst.remaining() < 2) + return CoderResult.OVERFLOW; + dst.put((byte)(bb >> 8)); + dst.put((byte)(bb)); + } else { + if (dst.remaining() < 1) + return CoderResult.OVERFLOW; + dst.put((byte)bb); + } + mark++; + } + return CoderResult.UNDERFLOW; + } finally { + src.position(mark); + } + } + + protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { + if (src.hasArray() && dst.hasArray()) + return encodeArrayLoop(src, dst); + else + return encodeBufferLoop(src, dst); + } + + protected byte[] repl = replacement(); + protected void implReplaceWith(byte[] newReplacement) { + repl = newReplacement; + } + + public int encode(char[] src, int sp, int len, byte[] dst) { + int dp = 0; + int sl = sp + len; + int dl = dst.length; + while (sp < sl) { + char c = src[sp++]; + int bb = encodeChar(c); + if (bb == UNMAPPABLE_ENCODING) { + if (Character.isHighSurrogate(c) && sp < sl && + Character.isLowSurrogate(src[sp])) { + sp++; + } + dst[dp++] = repl[0]; + if (repl.length > 1) + dst[dp++] = repl[1]; + continue; + } //else + if (bb > MAX_SINGLEBYTE) { // DoubleByte + dst[dp++] = (byte)(bb >> 8); + dst[dp++] = (byte)bb; + } else { // SingleByte + dst[dp++] = (byte)bb; + } + + } + return dp; + } + + public int encodeChar(char ch) { + return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; + } + + // init the c2b and c2bIndex tables from b2c. + public static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, + int b2Min, int b2Max, + char[] c2b, char[] c2bIndex) + { + Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); + int off = 0x100; + + char[][] b2c_ca = new char[b2c.length][]; + char[] b2cSB_ca = null; + if (b2cSB != null) + b2cSB_ca = b2cSB.toCharArray(); + + for (int i = 0; i < b2c.length; i++) { + if (b2c[i] == null) + continue; + b2c_ca[i] = b2c[i].toCharArray(); + } + + if (b2cNR != null) { + int j = 0; + while (j < b2cNR.length()) { + char b = b2cNR.charAt(j++); + char c = b2cNR.charAt(j++); + if (b < 0x100 && b2cSB_ca != null) { + if (b2cSB_ca[b] == c) + b2cSB_ca[b] = UNMAPPABLE_DECODING; + } else { + if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c) + b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING; + } + } + } + + if (b2cSB_ca != null) { // SingleByte + for (int b = 0; b < b2cSB_ca.length; b++) { + char c = b2cSB_ca[b]; + if (c == UNMAPPABLE_DECODING) + continue; + int index = c2bIndex[c >> 8]; + if (index == 0) { + index = off; + off += 0x100; + c2bIndex[c >> 8] = (char)index; + } + c2b[index + (c & 0xff)] = (char)b; + } + } + + for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte + char[] db = b2c_ca[b1]; + if (db == null) + continue; + for (int b2 = b2Min; b2 <= b2Max; b2++) { + char c = db[b2 - b2Min]; + if (c == UNMAPPABLE_DECODING) + continue; + int index = c2bIndex[c >> 8]; + if (index == 0) { + index = off; + off += 0x100; + c2bIndex[c >> 8] = (char)index; + } + c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2); + } + } + + if (c2bNR != null) { + // add c->b only nr entries + for (int i = 0; i < c2bNR.length(); i += 2) { + char b = c2bNR.charAt(i); + char c = c2bNR.charAt(i + 1); + int index = (c >> 8); + if (c2bIndex[index] == 0) { + c2bIndex[index] = (char)off; + off += 0x100; + } + index = c2bIndex[index] + (c & 0xff); + c2b[index] = b; + } + } + } + } + + public static class Encoder_DBCSONLY extends Encoder { + public Encoder_DBCSONLY(Charset cs, byte[] repl, + char[] c2b, char[] c2bIndex) { + super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex); + } + + public int encodeChar(char ch) { + int bb = super.encodeChar(ch); + if (bb <= MAX_SINGLEBYTE) + return UNMAPPABLE_ENCODING; + return bb; + } + } + + + + public static class Encoder_EBCDIC extends Encoder { + static final int SBCS = 0; + static final int DBCS = 1; + static final byte SO = 0x0e; + static final byte SI = 0x0f; + + protected int currentState = SBCS; + + public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex) { + super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex); + } + + protected void implReset() { + currentState = SBCS; + } + + protected CoderResult implFlush(ByteBuffer out) { + if (currentState == DBCS) { + if (out.remaining() < 1) + return CoderResult.OVERFLOW; + out.put(SI); + } + implReset(); + return CoderResult.UNDERFLOW; + } + + protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { + char[] sa = src.array(); + int sp = src.arrayOffset() + src.position(); + int sl = src.arrayOffset() + src.limit(); + byte[] da = dst.array(); + int dp = dst.arrayOffset() + dst.position(); + int dl = dst.arrayOffset() + dst.limit(); + + try { + while (sp < sl) { + char c = sa[sp]; + int bb = encodeChar(c); + if (bb == UNMAPPABLE_ENCODING) { + if (Character.isSurrogate(c)) { + if (sgp().parse(c, sa, sp, sl) < 0) + return sgp.error(); + return sgp.unmappableResult(); + } + return CoderResult.unmappableForLength(1); + } + if (bb > MAX_SINGLEBYTE) { // DoubleByte + if (currentState == SBCS) { + if (dl - dp < 1) + return CoderResult.OVERFLOW; + currentState = DBCS; + da[dp++] = SO; + } + if (dl - dp < 2) + return CoderResult.OVERFLOW; + da[dp++] = (byte)(bb >> 8); + da[dp++] = (byte)bb; + } else { // SingleByte + if (currentState == DBCS) { + if (dl - dp < 1) + return CoderResult.OVERFLOW; + currentState = SBCS; + da[dp++] = SI; + } + if (dl - dp < 1) + return CoderResult.OVERFLOW; + da[dp++] = (byte)bb; + + } + sp++; + } + return CoderResult.UNDERFLOW; + } finally { + src.position(sp - src.arrayOffset()); + dst.position(dp - dst.arrayOffset()); + } + } + + protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { + int mark = src.position(); + try { + while (src.hasRemaining()) { + char c = src.get(); + int bb = encodeChar(c); + if (bb == UNMAPPABLE_ENCODING) { + if (Character.isSurrogate(c)) { + if (sgp().parse(c, src) < 0) + return sgp.error(); + return sgp.unmappableResult(); + } + return CoderResult.unmappableForLength(1); + } + if (bb > MAX_SINGLEBYTE) { // DoubleByte + if (currentState == SBCS) { + if (dst.remaining() < 1) + return CoderResult.OVERFLOW; + currentState = DBCS; + dst.put(SO); + } + if (dst.remaining() < 2) + return CoderResult.OVERFLOW; + dst.put((byte)(bb >> 8)); + dst.put((byte)(bb)); + } else { // Single-byte + if (currentState == DBCS) { + if (dst.remaining() < 1) + return CoderResult.OVERFLOW; + currentState = SBCS; + dst.put(SI); + } + if (dst.remaining() < 1) + return CoderResult.OVERFLOW; + dst.put((byte)bb); + } + mark++; + } + return CoderResult.UNDERFLOW; + } finally { + src.position(mark); + } + } + + public int encode(char[] src, int sp, int len, byte[] dst) { + int dp = 0; + int sl = sp + len; + while (sp < sl) { + char c = src[sp++]; + int bb = encodeChar(c); + + if (bb == UNMAPPABLE_ENCODING) { + if (Character.isHighSurrogate(c) && sp < sl && + Character.isLowSurrogate(src[sp])) { + sp++; + } + dst[dp++] = repl[0]; + if (repl.length > 1) + dst[dp++] = repl[1]; + continue; + } //else + if (bb > MAX_SINGLEBYTE) { // DoubleByte + if (currentState == SBCS) { + currentState = DBCS; + dst[dp++] = SO; + } + dst[dp++] = (byte)(bb >> 8); + dst[dp++] = (byte)bb; + } else { // SingleByte + if (currentState == DBCS) { + currentState = SBCS; + dst[dp++] = SI; + } + dst[dp++] = (byte)bb; + } + } + + if (currentState == DBCS) { + currentState = SBCS; + dst[dp++] = SI; + } + return dp; + } + } + + // EUC_SIMPLE + public static class Encoder_EUC_SIM extends Encoder { + public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex) { + super(cs, c2b, c2bIndex); + } + } + +}