1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.runtime;
  27 
  28 import static jdk.nashorn.internal.runtime.ECMAErrors.uriError;
  29 
  30 /**
  31  * URI handling global functions. ECMA 15.1.3 URI Handling Function Properties
  32  *
  33  */
  34 public final class URIUtils {
  35 
  36     private URIUtils() {
  37     }
  38 
  39     static String encodeURI(final Object self, final String string) {
  40         return encode(self, string, false);
  41     }
  42 
  43     static String encodeURIComponent(final Object self, final String string) {
  44         return encode(self, string, true);
  45     }
  46 
  47     static String decodeURI(final Object self, final String string) {
  48         return decode(self, string, false);
  49     }
  50 
  51     static String decodeURIComponent(final Object self, final String string) {
  52         return decode(self, string, true);
  53     }
  54 
  55     // abstract encode function
  56     private static String encode(final Object self, final String string, final boolean component) {
  57         if (string.isEmpty()) {
  58             return string;
  59         }
  60 
  61         final int len = string.length();
  62         final StringBuilder sb = new StringBuilder();
  63 
  64         for (int k = 0; k < len; k++) {
  65             final char C = string.charAt(k);
  66             if (isUnescaped(C, component)) {
  67                 sb.append(C);
  68                 continue;
  69             }
  70 
  71             if (C >= 0xDC00 && C <= 0xDFFF) {
  72                 return error(string, k);
  73             }
  74 
  75             int V;
  76             if (C < 0xD800 || C > 0xDBFF) {
  77                 V = C;
  78             } else {
  79                 k++;
  80                 if (k == len) {
  81                     return error(string, k);
  82                 }
  83 
  84                 final char kChar = string.charAt(k);
  85                 if (kChar < 0xDC00 || kChar > 0xDFFF) {
  86                     return error(string, k);
  87                 }
  88                 V = ((C - 0xD800) * 0x400 + (kChar - 0xDC00) + 0x10000);
  89             }
  90 
  91             try {
  92                 sb.append(toHexEscape(V));
  93             } catch (final Exception e) {
  94                 throw uriError(e, "bad.uri", string, Integer.toString(k));
  95             }
  96         }
  97 
  98         return sb.toString();
  99     }
 100 
 101     // abstract decode function
 102     private static String decode(final Object self, final String string, final boolean component) {
 103         if (string.isEmpty()) {
 104             return string;
 105         }
 106 
 107         final int           len = string.length();
 108         final StringBuilder sb  = new StringBuilder();
 109 
 110         for (int k = 0; k < len; k++) {
 111             final char ch = string.charAt(k);
 112             if (ch != '%') {
 113                 sb.append(ch);
 114                 continue;
 115             }
 116             final int start = k;
 117             if (k + 2 >= len) {
 118                 return error(string, k);
 119             }
 120 
 121             int B = toHexByte(string.charAt(k + 1), string.charAt(k + 2));
 122             if (B < 0) {
 123                 return error(string, k + 1);
 124             }
 125 
 126             k += 2;
 127             char C;
 128             // Most significant bit is zero
 129             if ((B & 0x80) == 0) {
 130                 C = (char) B;
 131                 if (!component && URI_RESERVED.indexOf(C) >= 0) {
 132                     for (int j = start; j <= k; j++) {
 133                         sb.append(string.charAt(j));
 134                     }
 135                 } else {
 136                     sb.append(C);
 137                 }
 138             } else {
 139                 // n is utf8 length, V is codepoint and minV is lower bound
 140                 int n, V, minV;
 141 
 142                 if ((B & 0xC0) == 0x80) {
 143                     // 10xxxxxx - illegal first byte
 144                     return error(string, k);
 145                 } else if ((B & 0x20) == 0) {
 146                     // 110xxxxx 10xxxxxx
 147                     n = 2;
 148                     V = B & 0x1F;
 149                     minV = 0x80;
 150                 } else if ((B & 0x10) == 0) {
 151                     // 1110xxxx 10xxxxxx 10xxxxxx
 152                     n = 3;
 153                     V = B & 0x0F;
 154                     minV = 0x800;
 155                 } else if ((B & 0x08) == 0) {
 156                     // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 157                     n = 4;
 158                     V = B & 0x07;
 159                     minV = 0x10000;
 160                 } else if ((B & 0x04) == 0) {
 161                     // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 162                     n = 5;
 163                     V =  B & 0x03;
 164                     minV = 0x200000;
 165                 } else if ((B & 0x02) == 0) {
 166                     // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 167                     n = 6;
 168                     V = B & 0x01;
 169                     minV = 0x4000000;
 170                 } else {
 171                     return error(string, k);
 172                 }
 173 
 174                 // check bound for sufficient chars
 175                 if (k + (3*(n-1)) >= len) {
 176                     return error(string, k);
 177                 }
 178 
 179                 for (int j = 1; j < n; j++) {
 180                     k++;
 181                     if (string.charAt(k) != '%') {
 182                         return error(string, k);
 183                     }
 184 
 185                     B = toHexByte(string.charAt(k + 1), string.charAt(k + 2));
 186                     if (B < 0 || (B & 0xC0) != 0x80) {
 187                         return error(string, k + 1);
 188                     }
 189 
 190                     V = (V << 6) | (B & 0x3F);
 191                     k += 2;
 192                 }
 193 
 194                 // Check for overlongs and invalid codepoints.
 195                 // The high and low surrogate halves used by UTF-16
 196                 // (U+D800 through U+DFFF) are not legal Unicode values.
 197                 if ((V < minV) || (V >= 0xD800 && V <= 0xDFFF)) {
 198                     V = Integer.MAX_VALUE;
 199                 }
 200 
 201                 if (V < 0x10000) {
 202                     C = (char) V;
 203                     if (!component && URI_RESERVED.indexOf(C) >= 0) {
 204                         for (int j = start; j != k; j++) {
 205                             sb.append(string.charAt(j));
 206                         }
 207                     } else {
 208                         sb.append(C);
 209                     }
 210                 } else { // V >= 0x10000
 211                     if (V > 0x10FFFF) {
 212                         return error(string, k);
 213                     }
 214                     final int L = ((V - 0x10000) & 0x3FF) + 0xDC00;
 215                     final int H = (((V - 0x10000) >> 10) & 0x3FF) + 0xD800;
 216                     sb.append((char) H);
 217                     sb.append((char) L);
 218                 }
 219             }
 220         }
 221 
 222         return sb.toString();
 223     }
 224 
 225     private static int hexDigit(final char ch) {
 226         final char chu = Character.toUpperCase(ch);
 227         if (chu >= '0' && chu <= '9') {
 228             return (chu - '0');
 229         } else if (chu >= 'A' && chu <= 'F') {
 230             return (chu - 'A' + 10);
 231         } else {
 232             return -1;
 233         }
 234     }
 235 
 236     private static int toHexByte(final char ch1, final char ch2) {
 237         final int i1 = hexDigit(ch1);
 238         final int i2 = hexDigit(ch2);
 239         if (i1 >= 0 && i2 >= 0) {
 240             return (i1 << 4) | i2;
 241         }
 242         return -1;
 243     }
 244 
 245     private static String toHexEscape(final int u0) {
 246         int u = u0;
 247         int len;
 248         final byte[] b = new byte[6];
 249 
 250         if (u <= 0x7f) {
 251             b[0] = (byte) u;
 252             len = 1;
 253         } else {
 254             // > 0x7ff -> length 2
 255             // > 0xffff -> length 3
 256             // and so on. each new length is an additional 5 bits from the
 257             // original 11
 258             // the final mask is 8-len zeros in the low part.
 259             len = 2;
 260             for (int mask = u >>> 11; mask != 0; mask >>>= 5) {
 261                 len++;
 262             }
 263             for (int i = len - 1; i > 0; i--) {
 264                 b[i] = (byte) (0x80 | (u & 0x3f));
 265                 u >>>= 6; // 64 bits per octet.
 266             }
 267 
 268             b[0] = (byte) (~((1 << (8 - len)) - 1) | u);
 269         }
 270 
 271         final StringBuilder sb = new StringBuilder();
 272         for (int i = 0; i < len; i++) {
 273             sb.append('%');
 274             if ((b[i] & 0xff) < 0x10) {
 275                 sb.append('0');
 276             }
 277             sb.append(Integer.toHexString(b[i] & 0xff).toUpperCase());
 278         }
 279 
 280         return sb.toString();
 281     }
 282 
 283     private static String error(final String string, final int index) {
 284         throw uriError("bad.uri", string, Integer.toString(index));
 285     }
 286 
 287     // 'uriEscaped' except for alphanumeric chars
 288     private static final String URI_UNESCAPED_NONALPHANUMERIC = "-_.!~*'()";
 289     // 'uriReserved' + '#'
 290     private static final String URI_RESERVED = ";/?:@&=+$,#";
 291 
 292     private static boolean isUnescaped(final char ch, final boolean component) {
 293         if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z')
 294                 || ('0' <= ch && ch <= '9')) {
 295             return true;
 296         }
 297 
 298         if (URI_UNESCAPED_NONALPHANUMERIC.indexOf(ch) >= 0) {
 299             return true;
 300         }
 301 
 302         if (!component) {
 303             return URI_RESERVED.indexOf(ch) >= 0;
 304         }
 305 
 306         return false;
 307     }
 308 }