1 /*
   2  * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * Pathname canonicalization for Win32 file systems
  28  */
  29 
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <ctype.h>
  34 #include <assert.h>
  35 #include <sys/stat.h>
  36 
  37 #include <windows.h>
  38 #include <winbase.h>
  39 #include <errno.h>
  40 #include "io_util_md.h"
  41 
  42 #undef DEBUG_PATH        /* Define this to debug path code */
  43 
  44 #define isfilesep(c) ((c) == '/' || (c) == '\\')
  45 #define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
  46 #define islb(c)      (IsDBCSLeadByte((BYTE)(c)))
  47 
  48 
  49 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
  50    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
  51    before copying bytes from src to send - 1. */
  52 
  53 static char *
  54 cp(char *dst, char *dend, char first, char *src, char *send)
  55 {
  56     char *p = src, *q = dst;
  57     if (first != '\0') {
  58         if (q < dend) {
  59             *q++ = first;
  60         } else {
  61             errno = ENAMETOOLONG;
  62             return NULL;
  63         }
  64     }
  65     if (send - p > dend - q) {
  66         errno = ENAMETOOLONG;
  67         return NULL;
  68     }
  69     while (p < send) {
  70         *q++ = *p++;
  71     }
  72     return q;
  73 }
  74 
  75 /* Wide character version of cp */
  76 
  77 static WCHAR*
  78 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
  79 {
  80     WCHAR *p = src, *q = dst;
  81     if (first != L'\0') {
  82         if (q < dend) {
  83             *q++ = first;
  84         } else {
  85             errno = ENAMETOOLONG;
  86             return NULL;
  87         }
  88     }
  89     if (send - p > dend - q) {
  90         errno = ENAMETOOLONG;
  91         return NULL;
  92     }
  93     while (p < send)
  94         *q++ = *p++;
  95     return q;
  96 }
  97 
  98 
  99 /* Find first instance of '\\' at or following start.  Return the address of
 100    that byte or the address of the null terminator if '\\' is not found. */
 101 
 102 static char *
 103 nextsep(char *start)
 104 {
 105     char *p = start;
 106     int c;
 107     while ((c = *p) && (c != '\\')) {
 108         p += ((islb(c) && p[1]) ? 2 : 1);
 109     }
 110     return p;
 111 }
 112 
 113 /* Wide character version of nextsep */
 114 
 115 static WCHAR *
 116 wnextsep(WCHAR *start)
 117 {
 118     WCHAR *p = start;
 119     int c;
 120     while ((c = *p) && (c != L'\\'))
 121         p++;
 122     return p;
 123 }
 124 
 125 /* Tell whether the given string contains any wildcard characters */
 126 
 127 static int
 128 wild(char *start)
 129 {
 130     char *p = start;
 131     int c;
 132     while (c = *p) {
 133         if ((c == '*') || (c == '?')) return 1;
 134         p += ((islb(c) && p[1]) ? 2 : 1);
 135     }
 136     return 0;
 137 }
 138 
 139 /* Wide character version of wild */
 140 
 141 static int
 142 wwild(WCHAR *start)
 143 {
 144     WCHAR *p = start;
 145     int c;
 146     while (c = *p) {
 147         if ((c == L'*') || (c == L'?'))
 148             return 1;
 149         p++;
 150     }
 151     return 0;
 152 }
 153 
 154 /* Tell whether the given string contains prohibited combinations of dots.
 155    In the canonicalized form no path element may have dots at its end.
 156    Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
 157    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
 158 */
 159 static int
 160 dots(char *start)
 161 {
 162     char *p = start;
 163     while (*p) {
 164         if ((p = strchr(p, '.')) == NULL) // find next occurence of '.'
 165             return 0; // no more dots
 166         p++; // next char
 167         while ((*p) == '.') // go to the end of dots
 168             p++;
 169         if (*p && (*p != '\\')) // path element does not end with a dot
 170             p++; // go to the next char
 171         else
 172             return 1; // path element does end with a dot - prohibited
 173     }
 174     return 0; // no prohibited combinations of dots found
 175 }
 176 
 177 /* Wide character version of dots */
 178 static int
 179 wdots(WCHAR *start)
 180 {
 181     WCHAR *p = start;
 182     while (*p) {
 183         if ((p = wcschr(p, L'.')) == NULL) // find next occurence of '.'
 184             return 0; // no more dots
 185         p++; // next char
 186         while ((*p) == L'.') // go to the end of dots
 187             p++;
 188         if (*p && (*p != L'\\')) // path element does not end with a dot
 189             p++; // go to the next char
 190         else
 191             return 1; // path element does end with a dot - prohibited
 192     }
 193     return 0; // no prohibited combinations of dots found
 194 }
 195 
 196 /* If the lookup of a particular prefix fails because the file does not exist,
 197    because it is of the wrong type, because access is denied, or because the
 198    network is unreachable then canonicalization does not fail, it terminates
 199    successfully after copying the rest of the original path to the result path.
 200    Other I/O errors cause an error return.
 201 */
 202 
 203 int
 204 lastErrorReportable()
 205 {
 206     DWORD errval = GetLastError();
 207     if ((errval == ERROR_FILE_NOT_FOUND)
 208         || (errval == ERROR_DIRECTORY)
 209         || (errval == ERROR_PATH_NOT_FOUND)
 210         || (errval == ERROR_BAD_NETPATH)
 211         || (errval == ERROR_BAD_NET_NAME)
 212         || (errval == ERROR_ACCESS_DENIED)
 213         || (errval == ERROR_NETWORK_UNREACHABLE)
 214         || (errval == ERROR_NETWORK_ACCESS_DENIED)) {
 215         return 0;
 216     }
 217 
 218 #ifdef DEBUG_PATH
 219     jio_fprintf(stderr, "canonicalize: errval %d\n", errval);
 220 #endif
 221     return 1;
 222 }
 223 
 224 /* Convert a pathname to canonical form.  The input orig_path is assumed to
 225    have been converted to native form already, via JVM_NativePath().  This is
 226    necessary because _fullpath() rejects duplicate separator characters on
 227    Win95, though it accepts them on NT. */
 228 
 229 int
 230 canonicalize(char *orig_path, char *result, int size)
 231 {
 232     WIN32_FIND_DATA fd;
 233     HANDLE h;
 234     char path[1024];    /* Working copy of path */
 235     char *src, *dst, *dend;
 236 
 237     /* Reject paths that contain wildcards */
 238     if (wild(orig_path)) {
 239         errno = EINVAL;
 240         return -1;
 241     }
 242 
 243     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
 244        contrary to the documentation, the _fullpath procedure does not require
 245        the drive to be available.  It also does not reliably change all
 246        occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
 247     if(!_fullpath(path, orig_path, sizeof(path))) {
 248         return -1;
 249     }
 250 
 251     /* Correction for Win95: _fullpath may leave a trailing "\\"
 252        on a UNC pathname */
 253     if ((path[0] == '\\') && (path[1] == '\\')) {
 254         char *p = path + strlen(path);
 255         if ((p[-1] == '\\') && !islb(p[-2])) {
 256             p[-1] = '\0';
 257         }
 258     }
 259 
 260     if (dots(path)) /* Check for prohibited combinations of dots */
 261         return -1;
 262 
 263     src = path;            /* Start scanning here */
 264     dst = result;        /* Place results here */
 265     dend = dst + size;        /* Don't go to or past here */
 266 
 267     /* Copy prefix, assuming path is absolute */
 268     if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
 269         /* Drive specifier */
 270         *src = toupper(*src);    /* Canonicalize drive letter */
 271         if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
 272             return -1;
 273         }
 274         src += 2;
 275     } else if ((src[0] == '\\') && (src[1] == '\\')) {
 276         /* UNC pathname */
 277         char *p;
 278         p = nextsep(src + 2);    /* Skip past host name */
 279         if (!*p) {
 280         /* A UNC pathname must begin with "\\\\host\\share",
 281            so reject this path as invalid if there is no share name */
 282             errno = EINVAL;
 283             return -1;
 284     }
 285     p = nextsep(p + 1);    /* Skip past share name */
 286     if (!(dst = cp(dst, dend, '\0', src, p))) {
 287         return -1;
 288     }
 289     src = p;
 290     } else {
 291         /* Invalid path */
 292         errno = EINVAL;
 293         return -1;
 294     }
 295 
 296     /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
 297     /* for root pathes like "E:\" . If the path has this form, we should  */
 298     /* simply return it, it is already canonicalized. */
 299     if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
 300         /* At this point we have already copied the drive specifier ("z:")*/
 301         /* so we need to copy "\" and the null character. */
 302         result[2] = '\\';
 303         result[3] = '\0';
 304         return 0;
 305     }
 306 
 307     /* At this point we have copied either a drive specifier ("z:") or a UNC
 308        prefix ("\\\\host\\share") to the result buffer, and src points to the
 309        first byte of the remainder of the path.  We now scan through the rest
 310        of the path, looking up each prefix in order to find the true name of
 311        the last element of each prefix, thereby computing the full true name of
 312        the original path. */
 313     while (*src) {
 314         char *p = nextsep(src + 1);    /* Find next separator */
 315         char c = *p;
 316         assert(*src == '\\');        /* Invariant */
 317         *p = '\0';            /* Temporarily clear separator */
 318         h = FindFirstFile(path, &fd);    /* Look up prefix */
 319         *p = c;                /* Restore separator */
 320         if (h != INVALID_HANDLE_VALUE) {
 321             /* Lookup succeeded; append true name to result and continue */
 322             FindClose(h);
 323             if (!(dst = cp(dst, dend, '\\',
 324                            fd.cFileName,
 325                            fd.cFileName + strlen(fd.cFileName)))) {
 326                 return -1;
 327             }
 328             src = p;
 329             continue;
 330         } else {
 331             if (!lastErrorReportable()) {
 332                 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
 333                     return -1;
 334                 }
 335                 break;
 336             } else {
 337                 return -1;
 338             }
 339         }
 340     }
 341 
 342     if (dst >= dend) {
 343     errno = ENAMETOOLONG;
 344     return -1;
 345     }
 346     *dst = '\0';
 347     return 0;
 348 
 349 }
 350 
 351 
 352 /* Convert a pathname to canonical form.  The input prefix is assumed
 353    to be in canonical form already, and the trailing filename must not
 354    contain any wildcard, dot/double dot, or other "tricky" characters
 355    that are rejected by the canonicalize() routine above.  This
 356    routine is present to allow the canonicalization prefix cache to be
 357    used while still returning canonical names with the correct
 358    capitalization. */
 359 
 360 int
 361 canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
 362 {
 363     WIN32_FIND_DATA fd;
 364     HANDLE h;
 365     char *src, *dst, *dend;
 366 
 367     src = pathWithCanonicalPrefix;
 368     dst = result;        /* Place results here */
 369     dend = dst + size;   /* Don't go to or past here */
 370 
 371     h = FindFirstFile(pathWithCanonicalPrefix, &fd);    /* Look up file */
 372     if (h != INVALID_HANDLE_VALUE) {
 373         /* Lookup succeeded; concatenate true name to prefix */
 374         FindClose(h);
 375         if (!(dst = cp(dst, dend, '\0',
 376                        canonicalPrefix,
 377                        canonicalPrefix + strlen(canonicalPrefix)))) {
 378             return -1;
 379         }
 380         if (!(dst = cp(dst, dend, '\\',
 381                        fd.cFileName,
 382                        fd.cFileName + strlen(fd.cFileName)))) {
 383             return -1;
 384         }
 385     } else {
 386         if (!lastErrorReportable()) {
 387             if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
 388                 return -1;
 389             }
 390         } else {
 391             return -1;
 392         }
 393     }
 394 
 395     if (dst >= dend) {
 396         errno = ENAMETOOLONG;
 397         return -1;
 398     }
 399     *dst = '\0';
 400     return 0;
 401 }
 402 
 403 
 404 /* Wide character version of canonicalize. Size is a wide-character size. */
 405 
 406 int
 407 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
 408 {
 409     WIN32_FIND_DATAW fd;
 410     HANDLE h;
 411     WCHAR *path;    /* Working copy of path */
 412     WCHAR *src, *dst, *dend, c;
 413 
 414     /* Reject paths that contain wildcards */
 415     if (wwild(orig_path)) {
 416         errno = EINVAL;
 417         return -1;
 418     }
 419 
 420     if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
 421         return -1;
 422 
 423     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
 424        contrary to the documentation, the _fullpath procedure does not require
 425        the drive to be available.  */
 426     if(!_wfullpath(path, orig_path, size)) {
 427         goto err;
 428     }
 429 
 430     if (wdots(path)) /* Check for prohibited combinations of dots */
 431         goto err;
 432 
 433     src = path;            /* Start scanning here */
 434     dst = result;        /* Place results here */
 435     dend = dst + size;        /* Don't go to or past here */
 436 
 437     /* Copy prefix, assuming path is absolute */
 438     c = src[0];
 439     if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A'))
 440        && (src[1] == L':') && (src[2] == L'\\')) {
 441         /* Drive specifier */
 442         *src = towupper(*src);    /* Canonicalize drive letter */
 443         if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
 444             goto err;
 445         }
 446 
 447         src += 2;
 448     } else if ((src[0] == L'\\') && (src[1] == L'\\')) {
 449         /* UNC pathname */
 450         WCHAR *p;
 451         p = wnextsep(src + 2);    /* Skip past host name */
 452         if (!*p) {
 453             /* A UNC pathname must begin with "\\\\host\\share",
 454                so reject this path as invalid if there is no share name */
 455             errno = EINVAL;
 456             goto err;
 457         }
 458         p = wnextsep(p + 1);    /* Skip past share name */
 459         if (!(dst = wcp(dst, dend, L'\0', src, p)))
 460             goto err;
 461         src = p;
 462     } else {
 463         /* Invalid path */
 464         errno = EINVAL;
 465         goto err;
 466     }
 467     /* At this point we have copied either a drive specifier ("z:") or a UNC
 468        prefix ("\\\\host\\share") to the result buffer, and src points to the
 469        first byte of the remainder of the path.  We now scan through the rest
 470        of the path, looking up each prefix in order to find the true name of
 471        the last element of each prefix, thereby computing the full true name of
 472        the original path. */
 473     while (*src) {
 474         WCHAR *p = wnextsep(src + 1);    /* Find next separator */
 475         WCHAR c = *p;
 476         WCHAR *pathbuf;
 477         int pathlen;
 478 
 479         assert(*src == L'\\');        /* Invariant */
 480         *p = L'\0';            /* Temporarily clear separator */
 481 
 482         if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) {
 483             pathbuf = getPrefixed(path, pathlen);
 484             h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 485             free(pathbuf);
 486         } else
 487             h = FindFirstFileW(path, &fd);    /* Look up prefix */
 488 
 489         *p = c;                /* Restore separator */
 490         if (h != INVALID_HANDLE_VALUE) {
 491             /* Lookup succeeded; append true name to result and continue */
 492             FindClose(h);
 493             if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
 494                             fd.cFileName + wcslen(fd.cFileName)))){
 495                 goto err;
 496             }
 497             src = p;
 498             continue;
 499         } else {
 500             if (!lastErrorReportable()) {
 501                if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
 502                    goto err;
 503                }
 504                 break;
 505             } else {
 506                 goto err;
 507             }
 508         }
 509     }
 510 
 511     if (dst >= dend) {
 512     errno = ENAMETOOLONG;
 513         goto err;
 514     }
 515     *dst = L'\0';
 516     free(path);
 517     return 0;
 518 
 519  err:
 520     free(path);
 521     return -1;
 522 }
 523 
 524 
 525 /* Wide character version of canonicalizeWithPrefix. */
 526 
 527 int
 528 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
 529 {
 530     WIN32_FIND_DATAW fd;
 531     HANDLE h;
 532     WCHAR *src, *dst, *dend;
 533     WCHAR *pathbuf;
 534     int pathlen;
 535 
 536     src = pathWithCanonicalPrefix;
 537     dst = result;        /* Place results here */
 538     dend = dst + size;   /* Don't go to or past here */
 539 
 540 
 541     if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
 542         pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
 543         h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 544         free(pathbuf);
 545     } else
 546         h = FindFirstFileW(pathWithCanonicalPrefix, &fd);    /* Look up prefix */
 547     if (h != INVALID_HANDLE_VALUE) {
 548         /* Lookup succeeded; append true name to result and continue */
 549         FindClose(h);
 550         if (!(dst = wcp(dst, dend, L'\0',
 551                         canonicalPrefix,
 552                         canonicalPrefix + wcslen(canonicalPrefix)))) {
 553             return -1;
 554         }
 555         if (!(dst = wcp(dst, dend, L'\\',
 556                         fd.cFileName,
 557                         fd.cFileName + wcslen(fd.cFileName)))) {
 558             return -1;
 559         }
 560     } else {
 561         if (!lastErrorReportable()) {
 562             if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
 563                 return -1;
 564             }
 565         } else {
 566             return -1;
 567         }
 568     }
 569 
 570     if (dst >= dend) {
 571         errno = ENAMETOOLONG;
 572         return -1;
 573     }
 574     *dst = L'\0';
 575     return 0;
 576 }
 577 
 578 
 579 /* The appropriate location of getPrefixed() should be io_util_md.c, but
 580    java.lang.instrument package has hardwired canonicalize_md.c into their
 581    dll, to avoid complicate solution such as including io_util_md.c into
 582    that package, as a workaround we put this method here.
 583  */
 584 
 585 /* copy \\?\ or \\?\UNC\ to the front of path*/
 586 WCHAR*
 587 getPrefixed(const WCHAR* path, int pathlen) {
 588     WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR));
 589     if (pathbuf != 0) {
 590         if (path[0] == L'\\' && path[1] == L'\\') {
 591             if (path[2] == L'?' && path[3] == L'\\'){
 592                 /* if it already has a \\?\ don't do the prefix */
 593                 wcscpy(pathbuf, path );
 594             } else {
 595                 /* only UNC pathname includes double slashes here */
 596                 wcscpy(pathbuf, L"\\\\?\\UNC\0");
 597                 wcscat(pathbuf, path + 1);
 598             }
 599         } else {
 600             wcscpy(pathbuf, L"\\\\?\\\0");
 601             wcscat(pathbuf, path );
 602         }
 603     }
 604     return pathbuf;
 605 }