1 /*
   2  * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * Pathname canonicalization for Win32 file systems
  28  */
  29 
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <ctype.h>
  34 #include <assert.h>
  35 #include <sys/stat.h>
  36 
  37 #include <windows.h>
  38 #include <winbase.h>
  39 #include <errno.h>
  40 
  41 /* We should also include jdk_util.h here, for the prototype of JDK_Canonicalize.
  42    This isn't possible though because canonicalize_md.c is as well used in
  43    different contexts within Oracle.
  44  */
  45 #include "io_util_md.h"
  46 
  47 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
  48    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
  49    before copying bytes from src to send - 1. */
  50 static WCHAR*
  51 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
  52 {
  53     WCHAR *p = src, *q = dst;
  54     if (first != L'\0') {
  55         if (q < dend) {
  56             *q++ = first;
  57         } else {
  58             errno = ENAMETOOLONG;
  59             return NULL;
  60         }
  61     }
  62     if (send - p > dend - q) {
  63         errno = ENAMETOOLONG;
  64         return NULL;
  65     }
  66     while (p < send)
  67         *q++ = *p++;
  68     return q;
  69 }
  70 
  71 /* Find first instance of '\\' at or following start.  Return the address of
  72    that byte or the address of the null terminator if '\\' is not found. */
  73 static WCHAR *
  74 wnextsep(WCHAR *start)
  75 {
  76     WCHAR *p = start;
  77     int c;
  78     while ((c = *p) && (c != L'\\'))
  79         p++;
  80     return p;
  81 }
  82 
  83 /* Tell whether the given string contains any wildcard characters */
  84 static int
  85 wwild(WCHAR *start)
  86 {
  87     WCHAR *p = start;
  88     int c;
  89     while (c = *p) {
  90         if ((c == L'*') || (c == L'?'))
  91             return 1;
  92         p++;
  93     }
  94     return 0;
  95 }
  96 
  97 /* Tell whether the given string contains prohibited combinations of dots.
  98    In the canonicalized form no path element may have dots at its end.
  99    Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
 100    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
 101 */
 102 static int
 103 wdots(WCHAR *start)
 104 {
 105     WCHAR *p = start;
 106     // Skip "\\.\" prefix
 107     if (wcslen(p) > 4 && !wcsncmp(p, L"\\\\.\\", 4))
 108         p = p + 4;
 109 
 110     while (*p) {
 111         if ((p = wcschr(p, L'.')) == NULL) // find next occurrence of '.'
 112             return 0; // no more dots
 113         p++; // next char
 114         while ((*p) == L'.') // go to the end of dots
 115             p++;
 116         if (*p && (*p != L'\\')) // path element does not end with a dot
 117             p++; // go to the next char
 118         else
 119             return 1; // path element does end with a dot - prohibited
 120     }
 121     return 0; // no prohibited combinations of dots found
 122 }
 123 
 124 /* If the lookup of a particular prefix fails because the file does not exist,
 125    because it is of the wrong type, because access is denied, or because the
 126    network is unreachable then canonicalization does not fail, it terminates
 127    successfully after copying the rest of the original path to the result path.
 128    Other I/O errors cause an error return.
 129 */
 130 int
 131 lastErrorReportable()
 132 {
 133     DWORD errval = GetLastError();
 134     if ((errval == ERROR_FILE_NOT_FOUND)
 135         || (errval == ERROR_DIRECTORY)
 136         || (errval == ERROR_PATH_NOT_FOUND)
 137         || (errval == ERROR_BAD_NETPATH)
 138         || (errval == ERROR_BAD_NET_NAME)
 139         || (errval == ERROR_ACCESS_DENIED)
 140         || (errval == ERROR_NETWORK_UNREACHABLE)
 141         || (errval == ERROR_NETWORK_ACCESS_DENIED)) {
 142         return 0;
 143     }
 144     return 1;
 145 }
 146 
 147 /* Convert a pathname to canonical form.  The input orig_path is assumed to
 148    have been converted to native form already, via JVM_NativePath().  This is
 149    necessary because _fullpath() rejects duplicate separator characters on
 150    Win95, though it accepts them on NT. */
 151 int
 152 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
 153 {
 154     WIN32_FIND_DATAW fd;
 155     HANDLE h;
 156     WCHAR *path;    /* Working copy of path */
 157     WCHAR *src, *dst, *dend, c;
 158 
 159     /* Reject paths that contain wildcards */
 160     if (wwild(orig_path)) {
 161         errno = EINVAL;
 162         return -1;
 163     }
 164 
 165     if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
 166         return -1;
 167 
 168     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
 169        contrary to the documentation, the _fullpath procedure does not require
 170        the drive to be available.  */
 171     if(!_wfullpath(path, orig_path, size)) {
 172         goto err;
 173     }
 174 
 175     if (wdots(path)) /* Check for prohibited combinations of dots */
 176         goto err;
 177 
 178     src = path;            /* Start scanning here */
 179     dst = result;        /* Place results here */
 180     dend = dst + size;        /* Don't go to or past here */
 181 
 182     /* Copy prefix, assuming path is absolute */
 183     c = src[0];
 184     if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A'))
 185        && (src[1] == L':') && (src[2] == L'\\')) {
 186         /* Drive specifier */
 187         *src = towupper(*src);    /* Canonicalize drive letter */
 188         if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
 189             goto err;
 190         }
 191 
 192         src += 2;
 193     } else if ((src[0] == L'\\') && (src[1] == L'\\')) {
 194         /* UNC pathname */
 195         WCHAR *p;
 196         p = wnextsep(src + 2);    /* Skip past host name */
 197         if (!*p) {
 198             /* A UNC pathname must begin with "\\\\host\\share",
 199                so reject this path as invalid if there is no share name */
 200             errno = EINVAL;
 201             goto err;
 202         }
 203         p = wnextsep(p + 1);    /* Skip past share name */
 204         if (!(dst = wcp(dst, dend, L'\0', src, p)))
 205             goto err;
 206         src = p;
 207     } else {
 208         /* Invalid path */
 209         errno = EINVAL;
 210         goto err;
 211     }
 212     /* At this point we have copied either a drive specifier ("z:") or a UNC
 213        prefix ("\\\\host\\share") to the result buffer, and src points to the
 214        first byte of the remainder of the path.  We now scan through the rest
 215        of the path, looking up each prefix in order to find the true name of
 216        the last element of each prefix, thereby computing the full true name of
 217        the original path. */
 218     while (*src) {
 219         WCHAR *p = wnextsep(src + 1);    /* Find next separator */
 220         WCHAR c = *p;
 221         WCHAR *pathbuf;
 222         int pathlen;
 223 
 224         assert(*src == L'\\');        /* Invariant */
 225         *p = L'\0';            /* Temporarily clear separator */
 226 
 227         if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) {
 228             pathbuf = getPrefixed(path, pathlen);
 229             h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 230             free(pathbuf);
 231         } else
 232             h = FindFirstFileW(path, &fd);    /* Look up prefix */
 233 
 234         *p = c;                /* Restore separator */
 235         if (h != INVALID_HANDLE_VALUE) {
 236             /* Lookup succeeded; append true name to result and continue */
 237             FindClose(h);
 238             if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
 239                             fd.cFileName + wcslen(fd.cFileName)))){
 240                 goto err;
 241             }
 242             src = p;
 243             continue;
 244         } else {
 245             if (!lastErrorReportable()) {
 246                if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
 247                    goto err;
 248                }
 249                 break;
 250             } else {
 251                 goto err;
 252             }
 253         }
 254     }
 255 
 256     if (dst >= dend) {
 257     errno = ENAMETOOLONG;
 258         goto err;
 259     }
 260     *dst = L'\0';
 261     free(path);
 262     return 0;
 263 
 264  err:
 265     free(path);
 266     return -1;
 267 }
 268 
 269 /* Convert a pathname to canonical form.  The input prefix is assumed
 270    to be in canonical form already, and the trailing filename must not
 271    contain any wildcard, dot/double dot, or other "tricky" characters
 272    that are rejected by the canonicalize() routine above.  This
 273    routine is present to allow the canonicalization prefix cache to be
 274    used while still returning canonical names with the correct
 275    capitalization. */
 276 int
 277 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
 278 {
 279     WIN32_FIND_DATAW fd;
 280     HANDLE h;
 281     WCHAR *src, *dst, *dend;
 282     WCHAR *pathbuf;
 283     int pathlen;
 284 
 285     src = pathWithCanonicalPrefix;
 286     dst = result;        /* Place results here */
 287     dend = dst + size;   /* Don't go to or past here */
 288 
 289 
 290     if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
 291         pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
 292         h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 293         free(pathbuf);
 294     } else
 295         h = FindFirstFileW(pathWithCanonicalPrefix, &fd);    /* Look up prefix */
 296     if (h != INVALID_HANDLE_VALUE) {
 297         /* Lookup succeeded; append true name to result and continue */
 298         FindClose(h);
 299         if (!(dst = wcp(dst, dend, L'\0',
 300                         canonicalPrefix,
 301                         canonicalPrefix + wcslen(canonicalPrefix)))) {
 302             return -1;
 303         }
 304         if (!(dst = wcp(dst, dend, L'\\',
 305                         fd.cFileName,
 306                         fd.cFileName + wcslen(fd.cFileName)))) {
 307             return -1;
 308         }
 309     } else {
 310         if (!lastErrorReportable()) {
 311             if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
 312                 return -1;
 313             }
 314         } else {
 315             return -1;
 316         }
 317     }
 318 
 319     if (dst >= dend) {
 320         errno = ENAMETOOLONG;
 321         return -1;
 322     }
 323     *dst = L'\0';
 324     return 0;
 325 }
 326 
 327 /* Non-Wide character version of canonicalize.
 328    Converts to wchar and delegates to wcanonicalize. */
 329 JNIEXPORT int
 330 JDK_Canonicalize(const char *orig, char *out, int len) {
 331     wchar_t* wpath = NULL;
 332     wchar_t* wresult = NULL;
 333     size_t conv;
 334     size_t path_len = strlen(orig);
 335     int ret = -1;
 336 
 337     if ((wpath = (wchar_t*) malloc(sizeof(wchar_t) * (path_len + 1))) == NULL) {
 338         goto finish;
 339     }
 340 
 341     if (mbstowcs_s(&conv, wpath, path_len + 1, orig, path_len) != 0) {
 342         goto finish;
 343     }
 344 
 345     if ((wresult = (wchar_t*) malloc(sizeof(wchar_t) * len)) == NULL) {
 346         goto finish;
 347     }
 348 
 349     if (wcanonicalize(wpath, wresult, len) != 0) {
 350         goto finish;
 351     }
 352 
 353     if (wcstombs_s(&conv, out, (size_t) len, wresult, (size_t) (len - 1)) != 0) {
 354         goto finish;
 355     }
 356 
 357     // Change return value to success.
 358     ret = 0;
 359 
 360 finish:
 361     free(wresult);
 362     free(wpath);
 363 
 364     return ret;
 365 }
 366 
 367 /* The appropriate location of getPrefixed() is io_util_md.c, but it is
 368    also used in a non-OpenJDK context within Oracle. There, canonicalize_md.c
 369    is already pulled in and compiled, so to avoid more complicated solutions
 370    we keep this method here.
 371  */
 372 
 373 /* copy \\?\ or \\?\UNC\ to the front of path */
 374 JNIEXPORT WCHAR*
 375 getPrefixed(const WCHAR* path, int pathlen) {
 376     WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR));
 377     if (pathbuf != 0) {
 378         if (path[0] == L'\\' && path[1] == L'\\') {
 379             if (path[2] == L'?' && path[3] == L'\\'){
 380                 /* if it already has a \\?\ don't do the prefix */
 381                 wcscpy(pathbuf, path );
 382             } else {
 383                 /* only UNC pathname includes double slashes here */
 384                 wcscpy(pathbuf, L"\\\\?\\UNC\0");
 385                 wcscat(pathbuf, path + 1);
 386             }
 387         } else {
 388             wcscpy(pathbuf, L"\\\\?\\\0");
 389             wcscat(pathbuf, path );
 390         }
 391     }
 392     return pathbuf;
 393 }