1 /*
   2  * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * Pathname canonicalization for Win32 file systems
  28  */
  29 
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <ctype.h>
  34 #include <assert.h>
  35 #include <sys/stat.h>
  36 
  37 #include <windows.h>
  38 #include <winbase.h>
  39 #include <errno.h>
  40 #include "io_util_md.h"
  41 
  42 #undef DEBUG_PATH        /* Define this to debug path code */
  43 
  44 #define isfilesep(c) ((c) == '/' || (c) == '\\')
  45 #define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
  46 #define islb(c)      (IsDBCSLeadByte((BYTE)(c)))
  47 
  48 
  49 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
  50    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
  51    before copying bytes from src to send - 1. */
  52 
  53 static char *
  54 cp(char *dst, char *dend, char first, char *src, char *send)
  55 {
  56     char *p = src, *q = dst;
  57     if (first != '\0') {
  58         if (q < dend) {
  59             *q++ = first;
  60         } else {
  61             errno = ENAMETOOLONG;
  62             return NULL;
  63         }
  64     }
  65     if (send - p > dend - q) {
  66         errno = ENAMETOOLONG;
  67         return NULL;
  68     }
  69     while (p < send) {
  70         *q++ = *p++;
  71     }
  72     return q;
  73 }
  74 
  75 /* Wide character version of cp */
  76 
  77 static WCHAR*
  78 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
  79 {
  80     WCHAR *p = src, *q = dst;
  81     if (first != L'\0') {
  82         if (q < dend) {
  83             *q++ = first;
  84         } else {
  85             errno = ENAMETOOLONG;
  86             return NULL;
  87         }
  88     }
  89     if (send - p > dend - q) {
  90         errno = ENAMETOOLONG;
  91         return NULL;
  92     }
  93     while (p < send)
  94         *q++ = *p++;
  95     return q;
  96 }
  97 
  98 
  99 /* Find first instance of '\\' at or following start.  Return the address of
 100    that byte or the address of the null terminator if '\\' is not found. */
 101 
 102 static char *
 103 nextsep(char *start)
 104 {
 105     char *p = start;
 106     int c;
 107     while ((c = *p) && (c != '\\')) {
 108         p += ((islb(c) && p[1]) ? 2 : 1);
 109     }
 110     return p;
 111 }
 112 
 113 /* Wide character version of nextsep */
 114 
 115 static WCHAR *
 116 wnextsep(WCHAR *start)
 117 {
 118     WCHAR *p = start;
 119     int c;
 120     while ((c = *p) && (c != L'\\'))
 121         p++;
 122     return p;
 123 }
 124 
 125 /* Tell whether the given string contains any wildcard characters */
 126 
 127 static int
 128 wild(char *start)
 129 {
 130     char *p = start;
 131     int c;
 132     while (c = *p) {
 133         if ((c == '*') || (c == '?')) return 1;
 134         p += ((islb(c) && p[1]) ? 2 : 1);
 135     }
 136     return 0;
 137 }
 138 
 139 /* Wide character version of wild */
 140 
 141 static int
 142 wwild(WCHAR *start)
 143 {
 144     WCHAR *p = start;
 145     int c;
 146     while (c = *p) {
 147         if ((c == L'*') || (c == L'?'))
 148             return 1;
 149         p++;
 150     }
 151     return 0;
 152 }
 153 
 154 /* Tell whether the given string contains prohibited combinations of dots.
 155    In the canonicalized form no path element may have dots at its end.
 156    Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
 157    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
 158 */
 159 static int
 160 dots(char *start)
 161 {
 162     char *p = start;
 163     while (*p) {
 164         if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.'
 165             return 0; // no more dots
 166         p++; // next char
 167         while ((*p) == '.') // go to the end of dots
 168             p++;
 169         if (*p && (*p != '\\')) // path element does not end with a dot
 170             p++; // go to the next char
 171         else
 172             return 1; // path element does end with a dot - prohibited
 173     }
 174     return 0; // no prohibited combinations of dots found
 175 }
 176 
 177 /* Wide character version of dots */
 178 static int
 179 wdots(WCHAR *start)
 180 {
 181     WCHAR *p = start;
 182     // Skip "\\.\" prefix
 183     if (wcslen(p) > 4 && !wcsncmp(p, L"\\\\.\\", 4))
 184         p = p + 4;
 185 
 186     while (*p) {
 187         if ((p = wcschr(p, L'.')) == NULL) // find next occurrence of '.'
 188             return 0; // no more dots
 189         p++; // next char
 190         while ((*p) == L'.') // go to the end of dots
 191             p++;
 192         if (*p && (*p != L'\\')) // path element does not end with a dot
 193             p++; // go to the next char
 194         else
 195             return 1; // path element does end with a dot - prohibited
 196     }
 197     return 0; // no prohibited combinations of dots found
 198 }
 199 
 200 /* If the lookup of a particular prefix fails because the file does not exist,
 201    because it is of the wrong type, because access is denied, or because the
 202    network is unreachable then canonicalization does not fail, it terminates
 203    successfully after copying the rest of the original path to the result path.
 204    Other I/O errors cause an error return.
 205 */
 206 
 207 int
 208 lastErrorReportable()
 209 {
 210     DWORD errval = GetLastError();
 211     if ((errval == ERROR_FILE_NOT_FOUND)
 212         || (errval == ERROR_DIRECTORY)
 213         || (errval == ERROR_PATH_NOT_FOUND)
 214         || (errval == ERROR_BAD_NETPATH)
 215         || (errval == ERROR_BAD_NET_NAME)
 216         || (errval == ERROR_ACCESS_DENIED)
 217         || (errval == ERROR_NETWORK_UNREACHABLE)
 218         || (errval == ERROR_NETWORK_ACCESS_DENIED)) {
 219         return 0;
 220     }
 221 
 222 #ifdef DEBUG_PATH
 223     jio_fprintf(stderr, "canonicalize: errval %d\n", errval);
 224 #endif
 225     return 1;
 226 }
 227 
 228 /* Convert a pathname to canonical form.  The input orig_path is assumed to
 229    have been converted to native form already, via JVM_NativePath().  This is
 230    necessary because _fullpath() rejects duplicate separator characters on
 231    Win95, though it accepts them on NT. */
 232 
 233 int
 234 canonicalize(char *orig_path, char *result, int size)
 235 {
 236     WIN32_FIND_DATA fd;
 237     HANDLE h;
 238     char path[1024];    /* Working copy of path */
 239     char *src, *dst, *dend;
 240 
 241     /* Reject paths that contain wildcards */
 242     if (wild(orig_path)) {
 243         errno = EINVAL;
 244         return -1;
 245     }
 246 
 247     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
 248        contrary to the documentation, the _fullpath procedure does not require
 249        the drive to be available.  It also does not reliably change all
 250        occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
 251     if(!_fullpath(path, orig_path, sizeof(path))) {
 252         return -1;
 253     }
 254 
 255     /* Correction for Win95: _fullpath may leave a trailing "\\"
 256        on a UNC pathname */
 257     if ((path[0] == '\\') && (path[1] == '\\')) {
 258         char *p = path + strlen(path);
 259         if ((p[-1] == '\\') && !islb(p[-2])) {
 260             p[-1] = '\0';
 261         }
 262     }
 263 
 264     if (dots(path)) /* Check for prohibited combinations of dots */
 265         return -1;
 266 
 267     src = path;            /* Start scanning here */
 268     dst = result;        /* Place results here */
 269     dend = dst + size;        /* Don't go to or past here */
 270 
 271     /* Copy prefix, assuming path is absolute */
 272     if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
 273         /* Drive specifier */
 274         *src = toupper(*src);    /* Canonicalize drive letter */
 275         if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
 276             return -1;
 277         }
 278         src += 2;
 279     } else if ((src[0] == '\\') && (src[1] == '\\')) {
 280         /* UNC pathname */
 281         char *p;
 282         p = nextsep(src + 2);    /* Skip past host name */
 283         if (!*p) {
 284         /* A UNC pathname must begin with "\\\\host\\share",
 285            so reject this path as invalid if there is no share name */
 286             errno = EINVAL;
 287             return -1;
 288     }
 289     p = nextsep(p + 1);    /* Skip past share name */
 290     if (!(dst = cp(dst, dend, '\0', src, p))) {
 291         return -1;
 292     }
 293     src = p;
 294     } else {
 295         /* Invalid path */
 296         errno = EINVAL;
 297         return -1;
 298     }
 299 
 300     /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
 301     /* for root pathes like "E:\" . If the path has this form, we should  */
 302     /* simply return it, it is already canonicalized. */
 303     if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
 304         /* At this point we have already copied the drive specifier ("z:")*/
 305         /* so we need to copy "\" and the null character. */
 306         result[2] = '\\';
 307         result[3] = '\0';
 308         return 0;
 309     }
 310 
 311     /* At this point we have copied either a drive specifier ("z:") or a UNC
 312        prefix ("\\\\host\\share") to the result buffer, and src points to the
 313        first byte of the remainder of the path.  We now scan through the rest
 314        of the path, looking up each prefix in order to find the true name of
 315        the last element of each prefix, thereby computing the full true name of
 316        the original path. */
 317     while (*src) {
 318         char *p = nextsep(src + 1);    /* Find next separator */
 319         char c = *p;
 320         assert(*src == '\\');        /* Invariant */
 321         *p = '\0';            /* Temporarily clear separator */
 322         h = FindFirstFile(path, &fd);    /* Look up prefix */
 323         *p = c;                /* Restore separator */
 324         if (h != INVALID_HANDLE_VALUE) {
 325             /* Lookup succeeded; append true name to result and continue */
 326             FindClose(h);
 327             if (!(dst = cp(dst, dend, '\\',
 328                            fd.cFileName,
 329                            fd.cFileName + strlen(fd.cFileName)))) {
 330                 return -1;
 331             }
 332             src = p;
 333             continue;
 334         } else {
 335             if (!lastErrorReportable()) {
 336                 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
 337                     return -1;
 338                 }
 339                 break;
 340             } else {
 341                 return -1;
 342             }
 343         }
 344     }
 345 
 346     if (dst >= dend) {
 347     errno = ENAMETOOLONG;
 348     return -1;
 349     }
 350     *dst = '\0';
 351     return 0;
 352 
 353 }
 354 
 355 
 356 /* Convert a pathname to canonical form.  The input prefix is assumed
 357    to be in canonical form already, and the trailing filename must not
 358    contain any wildcard, dot/double dot, or other "tricky" characters
 359    that are rejected by the canonicalize() routine above.  This
 360    routine is present to allow the canonicalization prefix cache to be
 361    used while still returning canonical names with the correct
 362    capitalization. */
 363 
 364 int
 365 canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
 366 {
 367     WIN32_FIND_DATA fd;
 368     HANDLE h;
 369     char *src, *dst, *dend;
 370 
 371     src = pathWithCanonicalPrefix;
 372     dst = result;        /* Place results here */
 373     dend = dst + size;   /* Don't go to or past here */
 374 
 375     h = FindFirstFile(pathWithCanonicalPrefix, &fd);    /* Look up file */
 376     if (h != INVALID_HANDLE_VALUE) {
 377         /* Lookup succeeded; concatenate true name to prefix */
 378         FindClose(h);
 379         if (!(dst = cp(dst, dend, '\0',
 380                        canonicalPrefix,
 381                        canonicalPrefix + strlen(canonicalPrefix)))) {
 382             return -1;
 383         }
 384         if (!(dst = cp(dst, dend, '\\',
 385                        fd.cFileName,
 386                        fd.cFileName + strlen(fd.cFileName)))) {
 387             return -1;
 388         }
 389     } else {
 390         if (!lastErrorReportable()) {
 391             if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
 392                 return -1;
 393             }
 394         } else {
 395             return -1;
 396         }
 397     }
 398 
 399     if (dst >= dend) {
 400         errno = ENAMETOOLONG;
 401         return -1;
 402     }
 403     *dst = '\0';
 404     return 0;
 405 }
 406 
 407 
 408 /* Wide character version of canonicalize. Size is a wide-character size. */
 409 
 410 int
 411 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
 412 {
 413     WIN32_FIND_DATAW fd;
 414     HANDLE h;
 415     WCHAR *path;    /* Working copy of path */
 416     WCHAR *src, *dst, *dend, c;
 417 
 418     /* Reject paths that contain wildcards */
 419     if (wwild(orig_path)) {
 420         errno = EINVAL;
 421         return -1;
 422     }
 423 
 424     if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
 425         return -1;
 426 
 427     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
 428        contrary to the documentation, the _fullpath procedure does not require
 429        the drive to be available.  */
 430     if(!_wfullpath(path, orig_path, size)) {
 431         goto err;
 432     }
 433 
 434     if (wdots(path)) /* Check for prohibited combinations of dots */
 435         goto err;
 436 
 437     src = path;            /* Start scanning here */
 438     dst = result;        /* Place results here */
 439     dend = dst + size;        /* Don't go to or past here */
 440 
 441     /* Copy prefix, assuming path is absolute */
 442     c = src[0];
 443     if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A'))
 444        && (src[1] == L':') && (src[2] == L'\\')) {
 445         /* Drive specifier */
 446         *src = towupper(*src);    /* Canonicalize drive letter */
 447         if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
 448             goto err;
 449         }
 450 
 451         src += 2;
 452     } else if ((src[0] == L'\\') && (src[1] == L'\\')) {
 453         /* UNC pathname */
 454         WCHAR *p;
 455         p = wnextsep(src + 2);    /* Skip past host name */
 456         if (!*p) {
 457             /* A UNC pathname must begin with "\\\\host\\share",
 458                so reject this path as invalid if there is no share name */
 459             errno = EINVAL;
 460             goto err;
 461         }
 462         p = wnextsep(p + 1);    /* Skip past share name */
 463         if (!(dst = wcp(dst, dend, L'\0', src, p)))
 464             goto err;
 465         src = p;
 466     } else {
 467         /* Invalid path */
 468         errno = EINVAL;
 469         goto err;
 470     }
 471     /* At this point we have copied either a drive specifier ("z:") or a UNC
 472        prefix ("\\\\host\\share") to the result buffer, and src points to the
 473        first byte of the remainder of the path.  We now scan through the rest
 474        of the path, looking up each prefix in order to find the true name of
 475        the last element of each prefix, thereby computing the full true name of
 476        the original path. */
 477     while (*src) {
 478         WCHAR *p = wnextsep(src + 1);    /* Find next separator */
 479         WCHAR c = *p;
 480         WCHAR *pathbuf;
 481         int pathlen;
 482 
 483         assert(*src == L'\\');        /* Invariant */
 484         *p = L'\0';            /* Temporarily clear separator */
 485 
 486         if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) {
 487             pathbuf = getPrefixed(path, pathlen);
 488             h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 489             free(pathbuf);
 490         } else
 491             h = FindFirstFileW(path, &fd);    /* Look up prefix */
 492 
 493         *p = c;                /* Restore separator */
 494         if (h != INVALID_HANDLE_VALUE) {
 495             /* Lookup succeeded; append true name to result and continue */
 496             FindClose(h);
 497             if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
 498                             fd.cFileName + wcslen(fd.cFileName)))){
 499                 goto err;
 500             }
 501             src = p;
 502             continue;
 503         } else {
 504             if (!lastErrorReportable()) {
 505                if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
 506                    goto err;
 507                }
 508                 break;
 509             } else {
 510                 goto err;
 511             }
 512         }
 513     }
 514 
 515     if (dst >= dend) {
 516     errno = ENAMETOOLONG;
 517         goto err;
 518     }
 519     *dst = L'\0';
 520     free(path);
 521     return 0;
 522 
 523  err:
 524     free(path);
 525     return -1;
 526 }
 527 
 528 
 529 /* Wide character version of canonicalizeWithPrefix. */
 530 
 531 int
 532 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
 533 {
 534     WIN32_FIND_DATAW fd;
 535     HANDLE h;
 536     WCHAR *src, *dst, *dend;
 537     WCHAR *pathbuf;
 538     int pathlen;
 539 
 540     src = pathWithCanonicalPrefix;
 541     dst = result;        /* Place results here */
 542     dend = dst + size;   /* Don't go to or past here */
 543 
 544 
 545     if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
 546         pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
 547         h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 548         free(pathbuf);
 549     } else
 550         h = FindFirstFileW(pathWithCanonicalPrefix, &fd);    /* Look up prefix */
 551     if (h != INVALID_HANDLE_VALUE) {
 552         /* Lookup succeeded; append true name to result and continue */
 553         FindClose(h);
 554         if (!(dst = wcp(dst, dend, L'\0',
 555                         canonicalPrefix,
 556                         canonicalPrefix + wcslen(canonicalPrefix)))) {
 557             return -1;
 558         }
 559         if (!(dst = wcp(dst, dend, L'\\',
 560                         fd.cFileName,
 561                         fd.cFileName + wcslen(fd.cFileName)))) {
 562             return -1;
 563         }
 564     } else {
 565         if (!lastErrorReportable()) {
 566             if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
 567                 return -1;
 568             }
 569         } else {
 570             return -1;
 571         }
 572     }
 573 
 574     if (dst >= dend) {
 575         errno = ENAMETOOLONG;
 576         return -1;
 577     }
 578     *dst = L'\0';
 579     return 0;
 580 }
 581 
 582 
 583 /* The appropriate location of getPrefixed() should be io_util_md.c, but
 584    java.lang.instrument package has hardwired canonicalize_md.c into their
 585    dll, to avoid complicate solution such as including io_util_md.c into
 586    that package, as a workaround we put this method here.
 587  */
 588 
 589 /* copy \\?\ or \\?\UNC\ to the front of path*/
 590 WCHAR*
 591 getPrefixed(const WCHAR* path, int pathlen) {
 592     WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR));
 593     if (pathbuf != 0) {
 594         if (path[0] == L'\\' && path[1] == L'\\') {
 595             if (path[2] == L'?' && path[3] == L'\\'){
 596                 /* if it already has a \\?\ don't do the prefix */
 597                 wcscpy(pathbuf, path );
 598             } else {
 599                 /* only UNC pathname includes double slashes here */
 600                 wcscpy(pathbuf, L"\\\\?\\UNC\0");
 601                 wcscat(pathbuf, path + 1);
 602             }
 603         } else {
 604             wcscpy(pathbuf, L"\\\\?\\\0");
 605             wcscat(pathbuf, path );
 606         }
 607     }
 608     return pathbuf;
 609 }