--- old/src/java.base/share/classes/sun/net/idn/Punycode.java 2020-01-10 15:57:32.000000000 -0800 +++ /dev/null 2020-01-10 15:57:32.000000000 -0800 @@ -1,511 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -/* - ******************************************************************************* - * Copyright (C) 2003-2004, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ -// -// CHANGELOG -// 2005-05-19 Edward Wang -// - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/Punycode.java -// - move from package com.ibm.icu.text to package sun.net.idn -// - use ParseException instead of StringPrepParseException -// 2007-08-14 Martin Buchholz -// - remove redundant casts -// -package sun.net.idn; - -import java.text.ParseException; -import sun.text.normalizer.UCharacter; -import sun.text.normalizer.UTF16; - -/** - * Ported code from ICU punycode.c - * @author ram - */ - -/* Package Private class */ -public final class Punycode { - - /* Punycode parameters for Bootstring */ - private static final int BASE = 36; - private static final int TMIN = 1; - private static final int TMAX = 26; - private static final int SKEW = 38; - private static final int DAMP = 700; - private static final int INITIAL_BIAS = 72; - private static final int INITIAL_N = 0x80; - - /* "Basic" Unicode/ASCII code points */ - private static final int HYPHEN = 0x2d; - private static final int DELIMITER = HYPHEN; - - private static final int ZERO = 0x30; - private static final int NINE = 0x39; - - private static final int SMALL_A = 0x61; - private static final int SMALL_Z = 0x7a; - - private static final int CAPITAL_A = 0x41; - private static final int CAPITAL_Z = 0x5a; - - // TODO: eliminate the 256 limitation - private static final int MAX_CP_COUNT = 256; - - private static final int UINT_MAGIC = 0x80000000; - private static final long ULONG_MAGIC = 0x8000000000000000L; - - private static int adaptBias(int delta, int length, boolean firstTime){ - if(firstTime){ - delta /=DAMP; - }else{ - delta /= 2; - } - delta += delta/length; - - int count=0; - for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { - delta/=(BASE-TMIN); - } - - return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); - } - - /** - * basicToDigit[] contains the numeric value of a basic code - * point (for use in representing integers) in the range 0 to - * BASE-1, or -1 if b is does not represent a value. - */ - static final int[] basicToDigit= new int[]{ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, - - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - }; - - private static char asciiCaseMap(char b, boolean uppercase) { - if(uppercase) { - if(SMALL_A<=b && b<=SMALL_Z) { - b-=(SMALL_A-CAPITAL_A); - } - } else { - if(CAPITAL_A<=b && b<=CAPITAL_Z) { - b+=(SMALL_A-CAPITAL_A); - } - } - return b; - } - - /** - * digitToBasic() returns the basic code point whose value - * (when used for representing integers) is d, which must be in the - * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is - * nonzero, in which case the uppercase form is used. - */ - private static char digitToBasic(int digit, boolean uppercase) { - /* 0..25 map to ASCII a..z or A..Z */ - /* 26..35 map to ASCII 0..9 */ - if(digit<26) { - if(uppercase) { - return (char)(CAPITAL_A+digit); - } else { - return (char)(SMALL_A+digit); - } - } else { - return (char)((ZERO-26)+digit); - } - } - /** - * Converts Unicode to Punycode. - * The input string must not contain single, unpaired surrogates. - * The output will be represented as an array of ASCII code points. - * - * @param src - * @param caseFlags - * @return - * @throws ParseException - */ - public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{ - - int[] cpBuffer = new int[MAX_CP_COUNT]; - int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; - char c, c2; - int srcLength = src.length(); - int destCapacity = MAX_CP_COUNT; - char[] dest = new char[destCapacity]; - StringBuffer result = new StringBuffer(); - /* - * Handle the basic code points and - * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): - */ - srcCPCount=destLength=0; - - for(j=0; j0) { - if(destLength state to , but guard against overflow: - */ - if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { - throw new RuntimeException("Internal program error"); - } - delta+=(m-n)*(handledCPCount+1); - n=m; - - /* Encode a sequence of same code points n */ - for(j=0; jTMAX) { - t=TMAX; - } - */ - - t=k-bias; - if(t=(bias+TMAX)) { - t=TMAX; - } - - if(q0;) { - if(src.charAt(--j)==DELIMITER) { - break; - } - } - destLength=basicLength=destCPCount=j; - - while(j>0) { - b=src.charAt(--j); - if(!isBasic(b)) { - throw new ParseException("Illegal char found", -1); - } - - if(j0 ? basicLength+1 : 0; in=srcLength) { - throw new ParseException("Illegal char found", -1); - } - - digit=basicToDigit[(byte)src.charAt(in++)]; - if(digit<0) { - throw new ParseException("Invalid char found", -1); - } - if(digit>(0x7fffffff-i)/w) { - /* integer overflow */ - throw new ParseException("Illegal char found", -1); - } - - i+=digit*w; - t=k-bias; - if(t=(bias+TMAX)) { - t=TMAX; - } - if(digit0x7fffffff/(BASE-t)) { - /* integer overflow */ - throw new ParseException("Illegal char found", -1); - } - w*=BASE-t; - } - - /* - * Modification from sample code: - * Increments destCPCount here, - * where needed instead of in for() loop tail. - */ - ++destCPCount; - bias=adaptBias(i-oldi, destCPCount, (oldi==0)); - - /* - * i was supposed to wrap around from (incremented) destCPCount to 0, - * incrementing n each time, so we'll fix that now: - */ - if(i/destCPCount>(0x7fffffff-n)) { - /* integer overflow */ - throw new ParseException("Illegal char found", -1); - } - - n+=i/destCPCount; - i%=destCPCount; - /* not needed for Punycode: */ - /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ - - if(n>0x10ffff || isSurrogate(n)) { - /* Unicode code point overflow */ - throw new ParseException("Illegal char found", -1); - } - - /* Insert n at position i of the output: */ - cpLength=UTF16.getCharCount(n); - if((destLength+cpLength)1) { - firstSupplementaryIndex=codeUnitIndex; - } else { - ++firstSupplementaryIndex; - } - } else { - codeUnitIndex=firstSupplementaryIndex; - codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex); - } - - /* use the UChar index codeUnitIndex instead of the code point index i */ - if(codeUnitIndex((BASE-TMIN)*TMAX)/2; count+=BASE) { + delta/=(BASE-TMIN); + } + + return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); + } + + /** + * basicToDigit[] contains the numeric value of a basic code + * point (for use in representing integers) in the range 0 to + * BASE-1, or -1 if b is does not represent a value. + */ + static final int[] basicToDigit= new int[]{ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, + + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + + private static char asciiCaseMap(char b, boolean uppercase) { + if(uppercase) { + if(SMALL_A<=b && b<=SMALL_Z) { + b-=(SMALL_A-CAPITAL_A); + } + } else { + if(CAPITAL_A<=b && b<=CAPITAL_Z) { + b+=(SMALL_A-CAPITAL_A); + } + } + return b; + } + + /** + * digitToBasic() returns the basic code point whose value + * (when used for representing integers) is d, which must be in the + * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is + * nonzero, in which case the uppercase form is used. + */ + private static char digitToBasic(int digit, boolean uppercase) { + /* 0..25 map to ASCII a..z or A..Z */ + /* 26..35 map to ASCII 0..9 */ + if(digit<26) { + if(uppercase) { + return (char)(CAPITAL_A+digit); + } else { + return (char)(SMALL_A+digit); + } + } else { + return (char)((ZERO-26)+digit); + } + } + /** + * Converts Unicode to Punycode. + * The input string must not contain single, unpaired surrogates. + * The output will be represented as an array of ASCII code points. + * + * @param src + * @param caseFlags + * @return + * @throws ParseException + */ + public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{ + + int[] cpBuffer = new int[MAX_CP_COUNT]; + int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; + char c, c2; + int srcLength = src.length(); + int destCapacity = MAX_CP_COUNT; + char[] dest = new char[destCapacity]; + StringBuffer result = new StringBuffer(); + /* + * Handle the basic code points and + * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): + */ + srcCPCount=destLength=0; + + for(j=0; j0) { + if(destLength state to , but guard against overflow: + */ + if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { + throw new RuntimeException("Internal program error"); + } + delta+=(m-n)*(handledCPCount+1); + n=m; + + /* Encode a sequence of same code points n */ + for(j=0; jTMAX) { + t=TMAX; + } + */ + + t=k-bias; + if(t=(bias+TMAX)) { + t=TMAX; + } + + if(q0;) { + if(src.charAt(--j)==DELIMITER) { + break; + } + } + destLength=basicLength=destCPCount=j; + + while(j>0) { + b=src.charAt(--j); + if(!isBasic(b)) { + throw new ParseException("Illegal char found", -1); + } + + if(j0 ? basicLength+1 : 0; in=srcLength) { + throw new ParseException("Illegal char found", -1); + } + + digit=basicToDigit[(byte)src.charAt(in++)]; + if(digit<0) { + throw new ParseException("Invalid char found", -1); + } + if(digit>(0x7fffffff-i)/w) { + /* integer overflow */ + throw new ParseException("Illegal char found", -1); + } + + i+=digit*w; + t=k-bias; + if(t=(bias+TMAX)) { + t=TMAX; + } + if(digit0x7fffffff/(BASE-t)) { + /* integer overflow */ + throw new ParseException("Illegal char found", -1); + } + w*=BASE-t; + } + + /* + * Modification from sample code: + * Increments destCPCount here, + * where needed instead of in for() loop tail. + */ + ++destCPCount; + bias=adaptBias(i-oldi, destCPCount, (oldi==0)); + + /* + * i was supposed to wrap around from (incremented) destCPCount to 0, + * incrementing n each time, so we'll fix that now: + */ + if(i/destCPCount>(0x7fffffff-n)) { + /* integer overflow */ + throw new ParseException("Illegal char found", -1); + } + + n+=i/destCPCount; + i%=destCPCount; + /* not needed for Punycode: */ + /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ + + if(n>0x10ffff || isSurrogate(n)) { + /* Unicode code point overflow */ + throw new ParseException("Illegal char found", -1); + } + + /* Insert n at position i of the output: */ + cpLength=UTF16.getCharCount(n); + if((destLength+cpLength)1) { + firstSupplementaryIndex=codeUnitIndex; + } else { + ++firstSupplementaryIndex; + } + } else { + codeUnitIndex=firstSupplementaryIndex; + codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex); + } + + /* use the UChar index codeUnitIndex instead of the code point index i */ + if(codeUnitIndex