< prev index next >

src/java.base/share/classes/java/lang/StringCoding.java

Print this page

        

@@ -34,10 +34,11 @@
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.MalformedInputException;
 import java.nio.charset.UnsupportedCharsetException;
 import java.util.Arrays;
 import jdk.internal.HotSpotIntrinsicCandidate;
 import sun.nio.cs.HistoricallyNamedCharset;
 import sun.nio.cs.ArrayDecoder;

@@ -408,11 +409,11 @@
             se = null;
             try {
                 Charset cs = lookupCharset(csn);
                 if (cs != null) {
                     if (cs == UTF_8) {
-                        return encodeUTF8(coder, val, true);
+                        return encodeUTF8(coder, val, true, false);
                     }
                     if (cs == ISO_8859_1) {
                         return encode8859_1(coder, val);
                     }
                     if (cs == US_ASCII) {

@@ -429,11 +430,11 @@
         return se.encode(coder, val);
     }
 
     static byte[] encode(Charset cs, byte coder, byte[] val) {
         if (cs == UTF_8) {
-            return encodeUTF8(coder, val, true);
+            return encodeUTF8(coder, val, true, false);
         }
         if (cs == ISO_8859_1) {
             return encode8859_1(coder, val);
         }
         if (cs == US_ASCII) {

@@ -482,11 +483,11 @@
     }
 
     static byte[] encode(byte coder, byte[] val) {
         Charset cs = Charset.defaultCharset();
         if (cs == UTF_8) {
-            return encodeUTF8(coder, val, true);
+            return encodeUTF8(coder, val, true, false);
         }
         if (cs == ISO_8859_1) {
             return encode8859_1(coder, val);
         }
         if (cs == US_ASCII) {

@@ -587,14 +588,14 @@
         }
         return i;
     }
 
     private static byte[] encode8859_1(byte coder, byte[] val) {
-        return encode8859_1(coder, val, true);
+        return encode8859_1(coder, val, true, false);
     }
 
-    private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
+    private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace, boolean throwCCE) {
         if (coder == LATIN1) {
             return Arrays.copyOf(val, val.length);
         }
         int len = val.length >> 1;
         byte[] dst = new byte[len];

@@ -605,11 +606,11 @@
             int ret = implEncodeISOArray(val, sp, dst, dp, len);
             sp = sp + ret;
             dp = dp + ret;
             if (ret != len) {
                 if (!doReplace) {
-                    throwMalformed(sp, 1);
+                    throwUnmappable(sp, 1, throwCCE);
                 }
                 char c = StringUTF16.getChar(val, sp++);
                 if (Character.isHighSurrogate(c) && sp < sl &&
                     Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
                     sp++;

@@ -676,32 +677,52 @@
         }
         assert false;
         return -1;
     }
 
-    private static void throwMalformed(int off, int nb) {
+    private static void throwMalformed(int off, int nb, boolean throwCCE) {
+        // keep the existing code path (throwing IAE with a msg) by default
+        if (!throwCCE) {
         throw new IllegalArgumentException("malformed input off : " + off +
                                            ", length : " + nb);
+        } else {
+            throw new IllegalArgumentException(new MalformedInputException(nb));
+        }
     }
 
-    private static void throwMalformed(byte[] val) {
+    private static void throwMalformed(byte[] val, boolean throwCCE) {
         int dp = 0;
         while (dp < val.length && val[dp] >=0) { dp++; }
-        throwMalformed(dp, 1);
+        throwMalformed(dp, 1, throwCCE);
+    }
+
+    private static void throwUnmappable(int off, int nb, boolean throwCCE) {
+        // keep the existing code path (throwing IAE with a msg) by default
+        if (!throwCCE) {
+            throw new IllegalArgumentException("malformed input off : " + off +
+                    ", length : " + nb);
+        } else {
+            throwUnmappable(nb);
+        }
+    }
+
+    // Wrap CCE in IAE; May add offset if UnmappableCharacterException can take it in the future
+    private static void throwUnmappable(int nb) {
+        throw new IllegalArgumentException(new UnmappableCharacterException(nb));
     }
 
     private static char repl = '\ufffd';
 
     private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
         // ascii-bais, which has a relative impact to the non-ascii-only bytes
         if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
             return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
                                            LATIN1);
-        return decodeUTF8_0(src, sp, len, doReplace);
+        return decodeUTF8_0(src, sp, len, doReplace, false);
     }
 
-    private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
+    private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace, boolean throwCCE) {
         Result ret = resultCached.get();
 
         int sl = sp + len;
         int dp = 0;
         byte[] dst = new byte[len];

@@ -750,11 +771,11 @@
             } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
                 if (sp < sl) {
                     int b2 = src[sp++];
                     if (isNotContinuation(b2)) {
                         if (!doReplace) {
-                            throwMalformed(sp - 1, 1);
+                            throwMalformed(sp - 1, 1, throwCCE);
                         }
                         putChar(dst, dp++, repl);
                         sp--;
                     } else {
                         putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^

@@ -762,21 +783,21 @@
                                                   ((byte) 0x80 << 0))));
                     }
                     continue;
                 }
                 if (!doReplace) {
-                    throwMalformed(sp, 1);  // underflow()
+                    throwMalformed(sp, 1, throwCCE);  // underflow()
                 }
                 putChar(dst, dp++, repl);
                 break;
             } else if ((b1 >> 4) == -2) {
                 if (sp + 1 < sl) {
                     int b2 = src[sp++];
                     int b3 = src[sp++];
                     if (isMalformed3(b1, b2, b3)) {
                         if (!doReplace) {
-                            throwMalformed(sp - 3, 3);
+                            throwMalformed(sp - 3, 3, throwCCE);
                         }
                         putChar(dst, dp++, repl);
                         sp -= 3;
                         sp += malformedN(src, sp, 3);
                     } else {

@@ -786,28 +807,28 @@
                                          (((byte) 0xE0 << 12) ^
                                          ((byte) 0x80 <<  6) ^
                                          ((byte) 0x80 <<  0))));
                         if (isSurrogate(c)) {
                             if (!doReplace) {
-                                throwMalformed(sp - 3, 3);
+                                throwMalformed(sp - 3, 3, throwCCE);
                             }
                             putChar(dst, dp++, repl);
                         } else {
                             putChar(dst, dp++, c);
                         }
                     }
                     continue;
                 }
                 if (sp  < sl && isMalformed3_2(b1, src[sp])) {
                     if (!doReplace) {
-                        throwMalformed(sp - 1, 2);
+                        throwMalformed(sp - 1, 2, throwCCE);
                     }
                     putChar(dst, dp++, repl);
                     continue;
                 }
                 if (!doReplace){
-                    throwMalformed(sp, 1);
+                    throwMalformed(sp, 1, throwCCE);
                 }
                 putChar(dst, dp++, repl);
                 break;
             } else if ((b1 >> 3) == -2) {
                 if (sp + 2 < sl) {

@@ -823,11 +844,11 @@
                                ((byte) 0x80 <<  6) ^
                                ((byte) 0x80 <<  0))));
                     if (isMalformed4(b2, b3, b4) ||
                         !isSupplementaryCodePoint(uc)) { // shortest form check
                         if (!doReplace) {
-                            throwMalformed(sp - 4, 4);
+                            throwMalformed(sp - 4, 4, throwCCE);
                         }
                         putChar(dst, dp++, repl);
                         sp -= 4;
                         sp += malformedN(src, sp, 4);
                     } else {

@@ -838,40 +859,40 @@
                 }
                 b1 &= 0xff;
                 if (b1 > 0xf4 ||
                     sp  < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
                     if (!doReplace) {
-                        throwMalformed(sp - 1, 1);  // or 2
+                        throwMalformed(sp - 1, 1, throwCCE);  // or 2
                     }
                     putChar(dst, dp++, repl);
                     continue;
                 }
                 if (!doReplace) {
-                    throwMalformed(sp - 1, 1);
+                    throwMalformed(sp - 1, 1, throwCCE);
                 }
                 sp++;
                 putChar(dst, dp++, repl);
                 if (sp  < sl && isMalformed4_3(src[sp])) {
                     continue;
                 }
                 break;
             } else {
                 if (!doReplace) {
-                    throwMalformed(sp - 1, 1);
+                    throwMalformed(sp - 1, 1, throwCCE);
                 }
                 putChar(dst, dp++, repl);
             }
         }
         if (dp != len) {
             dst = Arrays.copyOf(dst, dp << 1);
         }
         return ret.with(dst, UTF16);
     }
 
-    private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
+    private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace, boolean throwCCE) {
         if (coder == UTF16)
-            return encodeUTF8_UTF16(val, doReplace);
+            return encodeUTF8_UTF16(val, doReplace, throwCCE);
 
         if (!hasNegatives(val, 0, val.length))
             return Arrays.copyOf(val, val.length);
 
         int dp = 0;

@@ -888,11 +909,11 @@
         if (dp == dst.length)
             return dst;
         return Arrays.copyOf(dst, dp);
     }
 
-    private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
+    private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace, boolean throwCCE) {
         int dp = 0;
         int sp = 0;
         int sl = val.length >> 1;
         byte[] dst = new byte[sl * 3];
         char c;

@@ -917,11 +938,11 @@
                 }
                 if (uc < 0) {
                     if (doReplace) {
                         dst[dp++] = '?';
                     } else {
-                        throwMalformed(sp - 1, 1); // or 2, does not matter here
+                        throwUnmappable(sp - 1, 1, throwCCE); // or 2, does not matter here
                     }
                 } else {
                     dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
                     dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
                     dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));

@@ -947,19 +968,19 @@
      * Throws iae, instead of replacing, if malformed or unmappable.
      */
     static String newStringUTF8NoRepl(byte[] src, int off, int len) {
         if (COMPACT_STRINGS && !hasNegatives(src, off, len))
             return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
-        Result ret = decodeUTF8_0(src, off, len, false);
+        Result ret = decodeUTF8_0(src, off, len, false, false);
         return new String(ret.value, ret.coder);
     }
 
     /*
      * Throws iae, instead of replacing, if unmappable.
      */
     static byte[] getBytesUTF8NoRepl(String s) {
-        return encodeUTF8(s.coder(), s.value(), false);
+        return encodeUTF8(s.coder(), s.value(), false, false);
     }
 
     ////////////////////// for j.n.f.Files //////////////////////////
 
     private static boolean isASCII(byte[] src) {

@@ -974,21 +995,21 @@
 
     static String newStringNoRepl(byte[] src, Charset cs) {
         if (cs == UTF_8) {
             if (COMPACT_STRINGS && isASCII(src))
                 return new String(src, LATIN1);
-            Result ret = decodeUTF8_0(src, 0, src.length, false);
+            Result ret = decodeUTF8_0(src, 0, src.length, false, true);
             return new String(ret.value, ret.coder);
         }
         if (cs == ISO_8859_1) {
             return newStringLatin1(src);
         }
         if (cs == US_ASCII) {
             if (isASCII(src)) {
                 return newStringLatin1(src);
             } else {
-                throwMalformed(src);
+                throwMalformed(src, true);
             }
         }
 
         CharsetDecoder cd = cs.newDecoder();
         // ascii fastpath

@@ -1030,24 +1051,24 @@
         byte coder = s.coder();
         if (cs == UTF_8) {
             if (isASCII(val)) {
                 return val;
             }
-            return encodeUTF8(coder, val, false);
+            return encodeUTF8(coder, val, false, true);
         }
         if (cs == ISO_8859_1) {
             if (coder == LATIN1) {
                 return val;
             }
-            return encode8859_1(coder, val, false);
+            return encode8859_1(coder, val, false, true);
         }
         if (cs == US_ASCII) {
             if (coder == LATIN1) {
                 if (isASCII(val)) {
                     return val;
                 } else {
-                    throwMalformed(val);
+                    throwUnmappable(1);
                 }
             }
         }
         CharsetEncoder ce = cs.newEncoder();
         // fastpath for ascii compatible

@@ -1081,10 +1102,10 @@
                 cr.throwException();
             cr = ce.flush(bb);
             if (!cr.isUnderflow())
                 cr.throwException();
         } catch (CharacterCodingException x) {
-            throw new Error(x);
+            throw new IllegalArgumentException(x);
         }
         return safeTrim(ba, bb.position(), isTrusted);
     }
 }
< prev index next >