< prev index next >

src/java.base/share/classes/java/lang/StringCoding.java

Print this page


   1 /*
   2  * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 572            return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
 573        } else {
 574            return result.with(StringLatin1.inflate(ba, off, len), UTF16);
 575        }
 576     }
 577 
 578     @HotSpotIntrinsicCandidate
 579     private static int implEncodeISOArray(byte[] sa, int sp,
 580                                           byte[] da, int dp, int len) {
 581         int i = 0;
 582         for (; i < len; i++) {
 583             char c = StringUTF16.getChar(sa, sp++);
 584             if (c > '\u00FF')
 585                 break;
 586             da[dp++] = (byte)c;
 587         }
 588         return i;
 589     }
 590 
 591     private static byte[] encode8859_1(byte coder, byte[] val) {




 592         if (coder == LATIN1) {
 593             return Arrays.copyOf(val, val.length);
 594         }
 595         int len = val.length >> 1;
 596         byte[] dst = new byte[len];
 597         int dp = 0;
 598         int sp = 0;
 599         int sl = len;
 600         while (sp < sl) {
 601             int ret = implEncodeISOArray(val, sp, dst, dp, len);
 602             sp = sp + ret;
 603             dp = dp + ret;
 604             if (ret != len) {



 605                 char c = StringUTF16.getChar(val, sp++);
 606                 if (Character.isHighSurrogate(c) && sp < sl &&
 607                     Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
 608                     sp++;
 609                 }
 610                 dst[dp++] = '?';
 611                 len = sl - sp;
 612             }
 613         }
 614         if (dp == dst.length) {
 615             return dst;
 616         }
 617         return Arrays.copyOf(dst, dp);
 618     }
 619 
 620     //////////////////////////////// utf8 ////////////////////////////////////
 621 
 622     private static boolean isNotContinuation(int b) {
 623         return (b & 0xc0) != 0x80;
 624     }


 659             int b1 = src[sp++] & 0xff;
 660             int b2 = src[sp++] & 0xff;
 661             if (b1 > 0xf4 ||
 662                 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
 663                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 664                 isNotContinuation(b2))
 665                 return 1;
 666             if (isNotContinuation(src[sp++]))
 667                 return 2;
 668             return 3;
 669         }
 670         assert false;
 671         return -1;
 672     }
 673 
 674     private static void throwMalformed(int off, int nb) {
 675         throw new IllegalArgumentException("malformed input off : " + off +
 676                                            ", length : " + nb);
 677     }
 678 






 679     private static char repl = '\ufffd';
 680 
 681     private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
 682         // ascii-bais, which has a relative impact to the non-ascii-only bytes
 683         if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
 684             return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
 685                                            LATIN1);
 686         return decodeUTF8_0(src, sp, len, doReplace);
 687     }
 688 
 689     private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
 690         Result ret = resultCached.get();
 691 
 692         int sl = sp + len;
 693         int dp = 0;
 694         byte[] dst = new byte[len];
 695 
 696         if (COMPACT_STRINGS) {
 697             while (sp < sl) {
 698                 int b1 = src[sp];


 928         return Arrays.copyOf(dst, dp);
 929     }
 930 
 931     ////////////////////// for j.u.z.ZipCoder //////////////////////////
 932 
 933     /*
 934      * Throws iae, instead of replacing, if malformed or unmappble.
 935      */
 936     static String newStringUTF8NoRepl(byte[] src, int off, int len) {
 937         if (COMPACT_STRINGS && !hasNegatives(src, off, len))
 938             return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
 939         Result ret = decodeUTF8_0(src, off, len, false);
 940         return new String(ret.value, ret.coder);
 941     }
 942 
 943     /*
 944      * Throws iae, instead of replacing, if unmappble.
 945      */
 946     static byte[] getBytesUTF8NoRepl(String s) {
 947         return encodeUTF8(s.coder(), s.value(), false);
































































































































 948     }
 949 }
   1 /*
   2  * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 572            return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
 573        } else {
 574            return result.with(StringLatin1.inflate(ba, off, len), UTF16);
 575        }
 576     }
 577 
 578     @HotSpotIntrinsicCandidate
 579     private static int implEncodeISOArray(byte[] sa, int sp,
 580                                           byte[] da, int dp, int len) {
 581         int i = 0;
 582         for (; i < len; i++) {
 583             char c = StringUTF16.getChar(sa, sp++);
 584             if (c > '\u00FF')
 585                 break;
 586             da[dp++] = (byte)c;
 587         }
 588         return i;
 589     }
 590 
 591     private static byte[] encode8859_1(byte coder, byte[] val) {
 592         return encode8859_1(coder, val, true);
 593     }
 594 
 595     private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
 596         if (coder == LATIN1) {
 597             return Arrays.copyOf(val, val.length);
 598         }
 599         int len = val.length >> 1;
 600         byte[] dst = new byte[len];
 601         int dp = 0;
 602         int sp = 0;
 603         int sl = len;
 604         while (sp < sl) {
 605             int ret = implEncodeISOArray(val, sp, dst, dp, len);
 606             sp = sp + ret;
 607             dp = dp + ret;
 608             if (ret != len) {
 609                 if (!doReplace) {
 610                     throwMalformed(sp, 1);
 611                 }
 612                 char c = StringUTF16.getChar(val, sp++);
 613                 if (Character.isHighSurrogate(c) && sp < sl &&
 614                     Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
 615                     sp++;
 616                 }
 617                 dst[dp++] = '?';
 618                 len = sl - sp;
 619             }
 620         }
 621         if (dp == dst.length) {
 622             return dst;
 623         }
 624         return Arrays.copyOf(dst, dp);
 625     }
 626 
 627     //////////////////////////////// utf8 ////////////////////////////////////
 628 
 629     private static boolean isNotContinuation(int b) {
 630         return (b & 0xc0) != 0x80;
 631     }


 666             int b1 = src[sp++] & 0xff;
 667             int b2 = src[sp++] & 0xff;
 668             if (b1 > 0xf4 ||
 669                 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
 670                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 671                 isNotContinuation(b2))
 672                 return 1;
 673             if (isNotContinuation(src[sp++]))
 674                 return 2;
 675             return 3;
 676         }
 677         assert false;
 678         return -1;
 679     }
 680 
 681     private static void throwMalformed(int off, int nb) {
 682         throw new IllegalArgumentException("malformed input off : " + off +
 683                                            ", length : " + nb);
 684     }
 685 
 686     private static void throwMalformed(byte[] val) {
 687         int dp = 0;
 688         while (dp < val.length && val[dp] >=0) { dp++; }
 689         throwMalformed(dp, 1);
 690     }
 691 
 692     private static char repl = '\ufffd';
 693 
 694     private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
 695         // ascii-bais, which has a relative impact to the non-ascii-only bytes
 696         if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
 697             return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
 698                                            LATIN1);
 699         return decodeUTF8_0(src, sp, len, doReplace);
 700     }
 701 
 702     private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
 703         Result ret = resultCached.get();
 704 
 705         int sl = sp + len;
 706         int dp = 0;
 707         byte[] dst = new byte[len];
 708 
 709         if (COMPACT_STRINGS) {
 710             while (sp < sl) {
 711                 int b1 = src[sp];


 941         return Arrays.copyOf(dst, dp);
 942     }
 943 
 944     ////////////////////// for j.u.z.ZipCoder //////////////////////////
 945 
 946     /*
 947      * Throws iae, instead of replacing, if malformed or unmappble.
 948      */
 949     static String newStringUTF8NoRepl(byte[] src, int off, int len) {
 950         if (COMPACT_STRINGS && !hasNegatives(src, off, len))
 951             return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
 952         Result ret = decodeUTF8_0(src, off, len, false);
 953         return new String(ret.value, ret.coder);
 954     }
 955 
 956     /*
 957      * Throws iae, instead of replacing, if unmappble.
 958      */
 959     static byte[] getBytesUTF8NoRepl(String s) {
 960         return encodeUTF8(s.coder(), s.value(), false);
 961     }
 962 
 963     ////////////////////// for j.n.f.Files //////////////////////////
 964 
 965     private static boolean isASCII(byte[] src) {
 966         return !hasNegatives(src, 0, src.length);
 967     }
 968 
 969     private static String newStringLatin1(byte[] src) {
 970         if (COMPACT_STRINGS)
 971            return new String(src, LATIN1);
 972         return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
 973     }
 974 
 975     static String newStringNoRepl(byte[] src, Charset cs) {
 976         if (cs == UTF_8) {
 977             if (COMPACT_STRINGS && isASCII(src))
 978                 return new String(src, LATIN1);
 979             Result ret = decodeUTF8_0(src, 0, src.length, false);
 980             return new String(ret.value, ret.coder);
 981         }
 982         if (cs == ISO_8859_1) {
 983             return newStringLatin1(src);
 984         }
 985         if (cs == US_ASCII) {
 986             if (isASCII(src)) {
 987                 return newStringLatin1(src);
 988             } else {
 989                 throwMalformed(src);
 990             }
 991         }
 992 
 993         CharsetDecoder cd = cs.newDecoder();
 994         // ascii fastpath
 995         if ((cd instanceof ArrayDecoder) &&
 996             ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
 997             return newStringLatin1(src);
 998         }
 999         int len = src.length;
1000         if (len == 0) {
1001             return "";
1002         }
1003         int en = scale(len, cd.maxCharsPerByte());
1004         char[] ca = new char[en];
1005         if (cs.getClass().getClassLoader0() != null &&
1006             System.getSecurityManager() != null) {
1007             src = Arrays.copyOf(src, len);
1008         }
1009         ByteBuffer bb = ByteBuffer.wrap(src);
1010         CharBuffer cb = CharBuffer.wrap(ca);
1011         try {
1012             CoderResult cr = cd.decode(bb, cb, true);
1013             if (!cr.isUnderflow())
1014                 cr.throwException();
1015             cr = cd.flush(cb);
1016             if (!cr.isUnderflow())
1017                 cr.throwException();
1018         } catch (CharacterCodingException x) {
1019             throw new IllegalArgumentException(x);  // todo
1020         }
1021         Result ret = resultCached.get().with(ca, 0, cb.position());
1022         return new String(ret.value, ret.coder);
1023     }
1024 
1025     /*
1026      * Throws iae, instead of replacing, if unmappble.
1027      */
1028     static byte[] getBytesNoRepl(String s, Charset cs) {
1029         byte[] val = s.value();
1030         byte coder = s.coder();
1031         if (cs == UTF_8) {
1032             if (isASCII(val)) {
1033                 return val;
1034             }
1035             return encodeUTF8(coder, val, false);
1036         }
1037         if (cs == ISO_8859_1) {
1038             if (coder == LATIN1) {
1039                 return val;
1040             }
1041             return encode8859_1(coder, val, false);
1042         }
1043         if (cs == US_ASCII) {
1044             if (coder == LATIN1) {
1045                 if (isASCII(val)) {
1046                     return val;
1047                 } else {
1048                     throwMalformed(val);
1049                 }
1050             }
1051         }
1052         CharsetEncoder ce = cs.newEncoder();
1053         // fastpath for ascii compatible
1054         if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
1055                                  ((ArrayEncoder)ce).isASCIICompatible() &&
1056                                  isASCII(val)))) {
1057             return val;
1058         }
1059         int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
1060         int en = scale(len, ce.maxBytesPerChar());
1061         byte[] ba = new byte[en];
1062         if (len == 0) {
1063             return ba;
1064         }
1065         if (ce instanceof ArrayEncoder) {
1066             int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
1067                                           : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
1068             if (blen != -1) {
1069                 return safeTrim(ba, blen, true);
1070             }
1071         }
1072         boolean isTrusted = cs.getClass().getClassLoader0() == null ||
1073                             System.getSecurityManager() == null;
1074         char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
1075                                        : StringUTF16.toChars(val);
1076         ByteBuffer bb = ByteBuffer.wrap(ba);
1077         CharBuffer cb = CharBuffer.wrap(ca, 0, len);
1078         try {
1079             CoderResult cr = ce.encode(cb, bb, true);
1080             if (!cr.isUnderflow())
1081                 cr.throwException();
1082             cr = ce.flush(bb);
1083             if (!cr.isUnderflow())
1084                 cr.throwException();
1085         } catch (CharacterCodingException x) {
1086             throw new Error(x);
1087         }
1088         return safeTrim(ba, bb.position(), isTrusted);
1089     }
1090 }
< prev index next >