< prev index next >

src/java.base/share/classes/java/lang/StringCoding.java

Print this page


   1 /*
   2  * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 411                 CoderResult cr = ce.encode(cb, bb, true);
 412                 if (!cr.isUnderflow())
 413                     cr.throwException();
 414                 cr = ce.flush(bb);
 415                 if (!cr.isUnderflow())
 416                     cr.throwException();
 417             } catch (CharacterCodingException x) {
 418                 // Substitution is always enabled,
 419                 // so this shouldn't happen
 420                 throw new Error(x);
 421             }
 422             return safeTrim(ba, bb.position(), isTrusted);
 423         }
 424     }
 425 
 426     @HotSpotIntrinsicCandidate
 427     private static int implEncodeISOArray(byte[] sa, int sp,
 428                                           byte[] da, int dp, int len) {
 429         int i = 0;
 430         for (; i < len; i++) {
 431             char c = StringUTF16.getChar(sa, sp++);
 432             if (c > '\u00FF')
 433                 break;
 434             da[dp++] = (byte)c;
 435         }
 436         return i;
 437     }
 438 
 439     static byte[] encode8859_1(byte coder, byte[] val) {
 440         if (coder == LATIN1) {
 441             return Arrays.copyOf(val, val.length);
 442         }
 443         int len = val.length >> 1;
 444         byte[] dst = new byte[len];
 445         int dp = 0;
 446         int sp = 0;
 447         int sl = len;
 448         while (sp < sl) {
 449             int ret = implEncodeISOArray(val, sp, dst, dp, len);
 450             sp = sp + ret;
 451             dp = dp + ret;
 452             if (ret != len) {
 453                 char c = StringUTF16.getChar(val, sp++);
 454                 if (Character.isHighSurrogate(c) && sp < sl &&
 455                     Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
 456                     sp++;
 457                 }
 458                 dst[dp++] = '?';
 459                 len = sl - sp;
 460             }
 461         }
 462         if (dp == dst.length) {
 463             return dst;
 464         }
 465         return Arrays.copyOf(dst, dp);
 466     }
 467 
 468     static byte[] encodeASCII(byte coder, byte[] val) {
 469         if (coder == LATIN1) {
 470             byte[] dst = new byte[val.length];
 471             for (int i = 0; i < val.length; i++) {
 472                 if (val[i] < 0) {
 473                     dst[i] = '?';
 474                 } else {
 475                     dst[i] = val[i];
 476                 }
 477             }
 478             return dst;
 479         }
 480         int len = val.length >> 1;
 481         byte[] dst = new byte[len];
 482         int dp = 0;
 483         for (int i = 0; i < len; i++) {
 484             char c = StringUTF16.getChar(val, i);
 485             if (c < 0x80) {
 486                 dst[dp++] = (byte)c;
 487                 continue;
 488             }
 489             if (Character.isHighSurrogate(c) && i + 1 < len &&
 490                 Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
 491                 i++;
 492             }
 493             dst[dp++] = '?';
 494         }
 495         if (len == dp) {
 496             return dst;
 497         }
 498         return Arrays.copyOf(dst, dp);
 499     }
 500 
 501    static byte[] encodeUTF8(byte coder, byte[] val) {
 502         int dp = 0;
 503         byte[] dst;
 504         if (coder == LATIN1) {
 505             dst = new byte[val.length << 1];
 506             for (int sp = 0; sp < val.length; sp++) {
 507                 byte c = val[sp];
 508                 if (c < 0) {
 509                     dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
 510                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
 511                 } else {
 512                     dst[dp++] = c;
 513                 }
 514             }
 515         } else {
 516             int sp = 0;
 517             int sl = val.length >> 1;
 518             dst = new byte[sl * 3];
 519             char c;
 520             while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
 521                 // ascii fast loop;
 522                 dst[dp++] = (byte)c;
 523                 sp++;
 524             }
 525             while (sp < sl) {
 526                 c = StringUTF16.getChar(val, sp++);
 527                 if (c < 0x80) {
 528                     dst[dp++] = (byte)c;
 529                 } else if (c < 0x800) {
 530                     dst[dp++] = (byte)(0xc0 | (c >> 6));
 531                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
 532                 } else if (Character.isSurrogate(c)) {
 533                     int uc = -1;
 534                     char c2;
 535                     if (Character.isHighSurrogate(c) && sp < sl &&
 536                         Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
 537                         uc = Character.toCodePoint(c, c2);
 538                     }
 539                     if (uc < 0) {
 540                         dst[dp++] = '?';
 541                     } else {
 542                         dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
 543                         dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 544                         dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 545                         dst[dp++] = (byte)(0x80 | (uc & 0x3f));
 546                         sp++;  // 2 chars
 547                     }
 548                 } else {
 549                     // 3 bytes, 16 bits
 550                     dst[dp++] = (byte)(0xe0 | ((c >> 12)));
 551                     dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 552                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
 553                 }
 554             }
 555         }
 556         if (dp == dst.length) {


   1 /*
   2  * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 411                 CoderResult cr = ce.encode(cb, bb, true);
 412                 if (!cr.isUnderflow())
 413                     cr.throwException();
 414                 cr = ce.flush(bb);
 415                 if (!cr.isUnderflow())
 416                     cr.throwException();
 417             } catch (CharacterCodingException x) {
 418                 // Substitution is always enabled,
 419                 // so this shouldn't happen
 420                 throw new Error(x);
 421             }
 422             return safeTrim(ba, bb.position(), isTrusted);
 423         }
 424     }
 425 
 426     @HotSpotIntrinsicCandidate
 427     private static int implEncodeISOArray(byte[] sa, int sp,
 428                                           byte[] da, int dp, int len) {
 429         int i = 0;
 430         for (; i < len; i++) {
 431             char c = StringUTF16.Trusted.getChar(sa, sp++);
 432             if (c > '\u00FF')
 433                 break;
 434             da[dp++] = (byte)c;
 435         }
 436         return i;
 437     }
 438 
 439     static byte[] encode8859_1(byte coder, byte[] val) {
 440         if (coder == LATIN1) {
 441             return Arrays.copyOf(val, val.length);
 442         }
 443         int len = val.length >> 1;
 444         byte[] dst = new byte[len];
 445         int dp = 0;
 446         int sp = 0;
 447         int sl = len;
 448         while (sp < sl) {
 449             int ret = implEncodeISOArray(val, sp, dst, dp, len);
 450             sp = sp + ret;
 451             dp = dp + ret;
 452             if (ret != len) {
 453                 char c = StringUTF16.Trusted.getChar(val, sp++);
 454                 if (Character.isHighSurrogate(c) && sp < sl &&
 455                     Character.isLowSurrogate(StringUTF16.Trusted.getChar(val, sp))) {
 456                     sp++;
 457                 }
 458                 dst[dp++] = '?';
 459                 len = sl - sp;
 460             }
 461         }
 462         if (dp == dst.length) {
 463             return dst;
 464         }
 465         return Arrays.copyOf(dst, dp);
 466     }
 467 
 468     static byte[] encodeASCII(byte coder, byte[] val) {
 469         if (coder == LATIN1) {
 470             byte[] dst = new byte[val.length];
 471             for (int i = 0; i < val.length; i++) {
 472                 if (val[i] < 0) {
 473                     dst[i] = '?';
 474                 } else {
 475                     dst[i] = val[i];
 476                 }
 477             }
 478             return dst;
 479         }
 480         int len = val.length >> 1;
 481         byte[] dst = new byte[len];
 482         int dp = 0;
 483         for (int i = 0; i < len; i++) {
 484             char c = StringUTF16.Trusted.getChar(val, i);
 485             if (c < 0x80) {
 486                 dst[dp++] = (byte)c;
 487                 continue;
 488             }
 489             if (Character.isHighSurrogate(c) && i + 1 < len &&
 490                 Character.isLowSurrogate(StringUTF16.Trusted.getChar(val, i + 1))) {
 491                 i++;
 492             }
 493             dst[dp++] = '?';
 494         }
 495         if (len == dp) {
 496             return dst;
 497         }
 498         return Arrays.copyOf(dst, dp);
 499     }
 500 
 501    static byte[] encodeUTF8(byte coder, byte[] val) {
 502         int dp = 0;
 503         byte[] dst;
 504         if (coder == LATIN1) {
 505             dst = new byte[val.length << 1];
 506             for (int sp = 0; sp < val.length; sp++) {
 507                 byte c = val[sp];
 508                 if (c < 0) {
 509                     dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
 510                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
 511                 } else {
 512                     dst[dp++] = c;
 513                 }
 514             }
 515         } else {
 516             int sp = 0;
 517             int sl = val.length >> 1;
 518             dst = new byte[sl * 3];
 519             char c;
 520             while (sp < sl && (c = StringUTF16.Trusted.getChar(val, sp)) < '\u0080') {
 521                 // ascii fast loop;
 522                 dst[dp++] = (byte)c;
 523                 sp++;
 524             }
 525             while (sp < sl) {
 526                 c = StringUTF16.Trusted.getChar(val, sp++);
 527                 if (c < 0x80) {
 528                     dst[dp++] = (byte)c;
 529                 } else if (c < 0x800) {
 530                     dst[dp++] = (byte)(0xc0 | (c >> 6));
 531                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
 532                 } else if (Character.isSurrogate(c)) {
 533                     int uc = -1;
 534                     char c2;
 535                     if (Character.isHighSurrogate(c) && sp < sl &&
 536                         Character.isLowSurrogate(c2 = StringUTF16.Trusted.getChar(val, sp))) {
 537                         uc = Character.toCodePoint(c, c2);
 538                     }
 539                     if (uc < 0) {
 540                         dst[dp++] = '?';
 541                     } else {
 542                         dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
 543                         dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 544                         dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 545                         dst[dp++] = (byte)(0x80 | (uc & 0x3f));
 546                         sp++;  // 2 chars
 547                     }
 548                 } else {
 549                     // 3 bytes, 16 bits
 550                     dst[dp++] = (byte)(0xe0 | ((c >> 12)));
 551                     dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 552                     dst[dp++] = (byte)(0x80 | (c & 0x3f));
 553                 }
 554             }
 555         }
 556         if (dp == dst.length) {


< prev index next >