1 /*
   2  * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include "jni.h"
  29 
  30 #ifndef max
  31 #define max(a,b) ( (a>b) ? a : b )
  32 #endif
  33 #ifndef min
  34 #define min(a,b) ( (a<b) ? a : b )
  35 #endif
  36 
  37 /*
  38  * Validates that a URI path component does not contain any illegal characters
  39  * - ported from src/share/classes/java/net/URI.java
  40  */
  41 
  42 static jlong L_HEX;
  43 static jlong H_HEX;
  44 static jlong L_PATH;
  45 static jlong H_PATH;
  46 
  47 /* Compute the low-order mask for the characters in the given string */
  48 static jlong lowMask(char* s) {
  49     size_t n = strlen(s);
  50     jlong m = 0;
  51     size_t i;
  52     for (i = 0; i < n; i++) {
  53         int c = (int)s[i];
  54         if (c < 64)
  55             m |= ((jlong)1 << c);
  56     }
  57     return m;
  58 }
  59 
  60 /* Compute the high-order mask for the characters in the given string */
  61 static jlong highMask(char* s) {
  62     size_t n = strlen(s);
  63     jlong m = 0;
  64     size_t i;
  65     for (i = 0; i < n; i++) {
  66         int c = (int)s[i];
  67         if ((c >= 64) && (c < 128))
  68             m |= ((jlong)1 << (c - 64));
  69     }
  70     return m;
  71 }
  72 
  73 /*
  74  * Compute a low-order mask for the characters
  75  * between first and last, inclusive
  76  */
  77 static jlong lowMaskRange(char first, char last) {
  78     jlong m = 0;
  79     int f = max(min(first, 63), 0);
  80     int l = max(min(last, 63), 0);
  81     int i;
  82 
  83     for (i = f; i <= l; i++)  {
  84         m |= (jlong)1 << i;
  85     }
  86     return m;
  87 }
  88 
  89 /*
  90  * Compute a high-order mask for the characters
  91  * between first and last, inclusive
  92  */
  93 static jlong highMaskRange(char first, char last) {
  94     jlong m = 0;
  95     int f = max(min(first, 127), 64) - 64;
  96     int l = max(min(last, 127), 64) - 64;
  97     int i;
  98     for (i = f; i <= l; i++) {
  99         m |= (jlong)1 << i;
 100     }
 101     return m;
 102 }
 103 
 104 /*
 105  * Tell whether the given character is permitted by the given mask pair
 106  */
 107 static int match(int c, jlong lowMask, jlong highMask) {
 108     if (c >= 0 && c < 64)
 109         if ((((jlong)1 << c) & lowMask) != 0) return 1;
 110     if (c >= 64 && c < 128)
 111         if ((((jlong)1 << (c - 64)) & highMask) != 0) return 1;
 112     return 0;
 113 }
 114 
 115 static void initialize() {
 116     // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
 117     //            "8" | "9"
 118     jlong L_DIGIT = lowMaskRange('0', '9');
 119     jlong H_DIGIT = 0;
 120 
 121     // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
 122     //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
 123     //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
 124     jlong L_UPALPHA = 0;
 125     jlong H_UPALPHA = highMaskRange('A', 'Z');
 126 
 127     // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
 128     //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
 129     //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
 130     jlong L_LOWALPHA = 0;
 131     jlong H_LOWALPHA = highMaskRange('a', 'z');
 132 
 133     // alpha         = lowalpha | upalpha
 134     jlong L_ALPHA = L_LOWALPHA | L_UPALPHA;
 135     jlong H_ALPHA = H_LOWALPHA | H_UPALPHA;
 136 
 137     // alphanum      = alpha | digit
 138     jlong L_ALPHANUM = L_DIGIT | L_ALPHA;
 139     jlong H_ALPHANUM = H_DIGIT | H_ALPHA;
 140 
 141     // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
 142     //                 "(" | ")"
 143     jlong L_MARK = lowMask("-_.!~*'()");
 144     jlong H_MARK = highMask("-_.!~*'()");
 145 
 146     // unreserved    = alphanum | mark
 147     jlong L_UNRESERVED = L_ALPHANUM | L_MARK;
 148     jlong H_UNRESERVED = H_ALPHANUM | H_MARK;
 149 
 150     // pchar         = unreserved |
 151     //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
 152     jlong L_PCHAR = L_UNRESERVED | lowMask(":@&=+$,");
 153     jlong H_PCHAR = H_UNRESERVED | highMask(":@&=+$,");
 154 
 155     // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
 156     //                         "a" | "b" | "c" | "d" | "e" | "f"
 157     L_HEX = L_DIGIT;
 158     H_HEX = highMaskRange('A', 'F') | highMaskRange('a', 'f');
 159 
 160     // All valid path characters
 161     L_PATH = L_PCHAR | lowMask(";/");
 162     H_PATH = H_PCHAR | highMask(";/");
 163 }
 164 
 165 
 166 /*
 167  * Validates that the given URI path component does not contain any
 168  * illegal characters. Returns 0 if only validate characters are present.
 169  */
 170 int validatePathChars(const char* path) {
 171     size_t i, n;
 172 
 173     /* initialize on first usage */
 174     if (L_HEX == 0) {
 175         initialize();
 176     }
 177 
 178     i=0;
 179     n = strlen(path);
 180     while (i < n) {
 181         int c = (int)(signed char)path[i];
 182 
 183         /* definitely not us-ascii */
 184         if (c < 0) return -1;
 185 
 186         /* start of an escapted character */
 187         if (c == '%') {
 188             if (i + 3 <= n) {
 189                 int h1 = (int)(signed char)path[i+1];
 190                 int h2 = (int)(signed char)path[i+2];
 191                 if (h1 < 0 || h2 < 0) return -1;
 192                 if (!match(h1, L_HEX, H_HEX)) return -1;
 193                 if (!match(h2, L_HEX, H_HEX)) return -1;
 194                 i += 3;
 195             } else {
 196                /* malformed escape pair */
 197                return -1;
 198             }
 199         } else {
 200             if (!match(c, L_PATH, H_PATH)) return -1;
 201             i++;
 202         }
 203     }
 204 
 205     return 0;
 206 }