/* * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* ******************************************************************************* * Copyright (C) 2003-2004, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ // // CHANGELOG // 2005-05-19 Edward Wang // - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/Punycode.java // - move from package com.ibm.icu.text to package sun.net.idn // - use ParseException instead of StringPrepParseException // 2007-08-14 Martin Buchholz // - remove redundant casts // package jdk.internal.icu.impl; import java.text.ParseException; import jdk.internal.icu.lang.UCharacter; import jdk.internal.icu.text.UTF16; /** * Ported code from ICU punycode.c * @author ram */ /* Package Private class */ public final class Punycode { /* Punycode parameters for Bootstring */ private static final int BASE = 36; private static final int TMIN = 1; private static final int TMAX = 26; private static final int SKEW = 38; private static final int DAMP = 700; private static final int INITIAL_BIAS = 72; private static final int INITIAL_N = 0x80; /* "Basic" Unicode/ASCII code points */ private static final int HYPHEN = 0x2d; private static final int DELIMITER = HYPHEN; private static final int ZERO = 0x30; private static final int NINE = 0x39; private static final int SMALL_A = 0x61; private static final int SMALL_Z = 0x7a; private static final int CAPITAL_A = 0x41; private static final int CAPITAL_Z = 0x5a; // TODO: eliminate the 256 limitation private static final int MAX_CP_COUNT = 256; private static final int UINT_MAGIC = 0x80000000; private static final long ULONG_MAGIC = 0x8000000000000000L; private static int adaptBias(int delta, int length, boolean firstTime){ if(firstTime){ delta /=DAMP; }else{ delta /= 2; } delta += delta/length; int count=0; for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { delta/=(BASE-TMIN); } return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); } /** * basicToDigit[] contains the numeric value of a basic code * point (for use in representing integers) in the range 0 to * BASE-1, or -1 if b is does not represent a value. */ static final int[] basicToDigit= new int[]{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; private static char asciiCaseMap(char b, boolean uppercase) { if(uppercase) { if(SMALL_A<=b && b<=SMALL_Z) { b-=(SMALL_A-CAPITAL_A); } } else { if(CAPITAL_A<=b && b<=CAPITAL_Z) { b+=(SMALL_A-CAPITAL_A); } } return b; } /** * digitToBasic() returns the basic code point whose value * (when used for representing integers) is d, which must be in the * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is * nonzero, in which case the uppercase form is used. */ private static char digitToBasic(int digit, boolean uppercase) { /* 0..25 map to ASCII a..z or A..Z */ /* 26..35 map to ASCII 0..9 */ if(digit<26) { if(uppercase) { return (char)(CAPITAL_A+digit); } else { return (char)(SMALL_A+digit); } } else { return (char)((ZERO-26)+digit); } } /** * Converts Unicode to Punycode. * The input string must not contain single, unpaired surrogates. * The output will be represented as an array of ASCII code points. * * @param src * @param caseFlags * @return * @throws ParseException */ public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{ int[] cpBuffer = new int[MAX_CP_COUNT]; int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; char c, c2; int srcLength = src.length(); int destCapacity = MAX_CP_COUNT; char[] dest = new char[destCapacity]; StringBuffer result = new StringBuffer(); /* * Handle the basic code points and * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): */ srcCPCount=destLength=0; for(j=0; j0) { if(destLength state to , but guard against overflow: */ if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { throw new RuntimeException("Internal program error"); } delta+=(m-n)*(handledCPCount+1); n=m; /* Encode a sequence of same code points n */ for(j=0; jTMAX) { t=TMAX; } */ t=k-bias; if(t=(bias+TMAX)) { t=TMAX; } if(q0;) { if(src.charAt(--j)==DELIMITER) { break; } } destLength=basicLength=destCPCount=j; while(j>0) { b=src.charAt(--j); if(!isBasic(b)) { throw new ParseException("Illegal char found", -1); } if(j0 ? basicLength+1 : 0; in=srcLength) { throw new ParseException("Illegal char found", -1); } digit=basicToDigit[(byte)src.charAt(in++)]; if(digit<0) { throw new ParseException("Invalid char found", -1); } if(digit>(0x7fffffff-i)/w) { /* integer overflow */ throw new ParseException("Illegal char found", -1); } i+=digit*w; t=k-bias; if(t=(bias+TMAX)) { t=TMAX; } if(digit0x7fffffff/(BASE-t)) { /* integer overflow */ throw new ParseException("Illegal char found", -1); } w*=BASE-t; } /* * Modification from sample code: * Increments destCPCount here, * where needed instead of in for() loop tail. */ ++destCPCount; bias=adaptBias(i-oldi, destCPCount, (oldi==0)); /* * i was supposed to wrap around from (incremented) destCPCount to 0, * incrementing n each time, so we'll fix that now: */ if(i/destCPCount>(0x7fffffff-n)) { /* integer overflow */ throw new ParseException("Illegal char found", -1); } n+=i/destCPCount; i%=destCPCount; /* not needed for Punycode: */ /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ if(n>0x10ffff || isSurrogate(n)) { /* Unicode code point overflow */ throw new ParseException("Illegal char found", -1); } /* Insert n at position i of the output: */ cpLength=UTF16.getCharCount(n); if((destLength+cpLength)1) { firstSupplementaryIndex=codeUnitIndex; } else { ++firstSupplementaryIndex; } } else { codeUnitIndex=firstSupplementaryIndex; codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex); } /* use the UChar index codeUnitIndex instead of the code point index i */ if(codeUnitIndex