New src/java.base/windows/native/libjava/canonicalize

   1 /*
   2  * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * Pathname canonicalization for Win32 file systems
  28  */
  29 
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <ctype.h>
  34 #include <assert.h>
  35 #include <sys/stat.h>
  36 
  37 #include <windows.h>
  38 #include <winbase.h>
  39 #include <errno.h>
  40 #include "io_util_md.h"
  41 
  42 #undef DEBUG_PATH        /* Define this to debug path code */
  43 
  44 #define isfilesep(c) ((c) == '/' || (c) == '\\')
  45 #define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
  46 #define islb(c)      (IsDBCSLeadByte((BYTE)(c)))
  47 
  48 
  49 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
  50    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
  51    before copying bytes from src to send - 1. */
  52 
  53 static char *
  54 cp(char *dst, char *dend, char first, char *src, char *send)
  55 {
  56     char *p = src, *q = dst;
  57     if (first != '\0') {
  58         if (q < dend) {
  59             *q++ = first;
  60         } else {
  61             errno = ENAMETOOLONG;
  62             return NULL;
  63         }
  64     }
  65     if (send - p > dend - q) {
  66         errno = ENAMETOOLONG;
  67         return NULL;
  68     }
  69     while (p < send) {
  70         *q++ = *p++;
  71     }
  72     return q;
  73 }
  74 
  75 /* Wide character version of cp */
  76 
  77 static WCHAR*
  78 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
  79 {
  80     WCHAR *p = src, *q = dst;
  81     if (first != L'\0') {
  82         if (q < dend) {
  83             *q++ = first;
  84         } else {
  85             errno = ENAMETOOLONG;
  86             return NULL;
  87         }
  88     }
  89     if (send - p > dend - q) {
  90         errno = ENAMETOOLONG;
  91         return NULL;
  92     }
  93     while (p < send)
  94         *q++ = *p++;
  95     return q;
  96 }
  97 
  98 
  99 /* Find first instance of '\\' at or following start.  Return the address of
 100    that byte or the address of the null terminator if '\\' is not found. */
 101 
 102 static char *
 103 nextsep(char *start)
 104 {
 105     char *p = start;
 106     int c;
 107     while ((c = *p) && (c != '\\')) {
 108         p += ((islb(c) && p[1]) ? 2 : 1);
 109     }
 110     return p;
 111 }
 112 
 113 /* Wide character version of nextsep */
 114 
 115 static WCHAR *
 116 wnextsep(WCHAR *start)
 117 {
 118     WCHAR *p = start;
 119     int c;
 120     while ((c = *p) && (c != L'\\'))
 121         p++;
 122     return p;
 123 }
 124 
 125 /* Tell whether the given string contains any wildcard characters */
 126 
 127 static int
 128 wild(char *start)
 129 {
 130     char *p = start;
 131     int c;
 132     while (c = *p) {
 133         if ((c == '*') || (c == '?')) return 1;
 134         p += ((islb(c) && p[1]) ? 2 : 1);
 135     }
 136     return 0;
 137 }
 138 
 139 /* Wide character version of wild */
 140 
 141 static int
 142 wwild(WCHAR *start)
 143 {
 144     WCHAR *p = start;
 145     int c;
 146     while (c = *p) {
 147         if ((c == L'*') || (c == L'?'))
 148             return 1;
 149         p++;
 150     }
 151     return 0;
 152 }
 153 
 154 /* Tell whether the given string contains prohibited combinations of dots.
 155    In the canonicalized form no path element may have dots at its end.
 156    Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
 157    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
 158 */
 159 static int
 160 dots(char *start)
 161 {
 162     char *p = start;
 163     while (*p) {
 164         if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.'
 165             return 0; // no more dots
 166         p++; // next char
 167         while ((*p) == '.') // go to the end of dots
 168             p++;
 169         if (*p && (*p != '\\')) // path element does not end with a dot
 170             p++; // go to the next char
 171         else
 172             return 1; // path element does end with a dot - prohibited
 173     }
 174     return 0; // no prohibited combinations of dots found
 175 }
 176 
 177 /* Wide character version of dots */
 178 static int
 179 wdots(WCHAR *start)
 180 {
 181     WCHAR *p = start;
 182     // Skip "\\.\" prefix
 183     if (wcslen(p) > 4 && !wcsncmp(p, L"\\\\.\\", 4))
 184         p = p + 4;
 185 
 186     while (*p) {
 187         if ((p = wcschr(p, L'.')) == NULL) // find next occurrence of '.'
 188             return 0; // no more dots
 189         p++; // next char
 190         while ((*p) == L'.') // go to the end of dots
 191             p++;
 192         if (*p && (*p != L'\\')) // path element does not end with a dot
 193             p++; // go to the next char
 194         else
 195             return 1; // path element does end with a dot - prohibited
 196     }
 197     return 0; // no prohibited combinations of dots found
 198 }
 199 
 200 /* If the lookup of a particular prefix fails because the file does not exist,
 201    because it is of the wrong type, because access is denied, or because the
 202    network is unreachable then canonicalization does not fail, it terminates
 203    successfully after copying the rest of the original path to the result path.
 204    Other I/O errors cause an error return.
 205 */
 206 
 207 int
 208 lastErrorReportable()
 209 {
 210     DWORD errval = GetLastError();
 211     if ((errval == ERROR_FILE_NOT_FOUND)
 212         || (errval == ERROR_DIRECTORY)
 213         || (errval == ERROR_PATH_NOT_FOUND)
 214         || (errval == ERROR_BAD_NETPATH)
 215         || (errval == ERROR_BAD_NET_NAME)
 216         || (errval == ERROR_ACCESS_DENIED)
 217         || (errval == ERROR_NETWORK_UNREACHABLE)
 218         || (errval == ERROR_NETWORK_ACCESS_DENIED)) {
 219         return 0;
 220     }
 221 
 222 #ifdef DEBUG_PATH
 223     jio_fprintf(stderr, "canonicalize: errval %d\n", errval);
 224 #endif
 225     return 1;
 226 }
 227 
 228 int wcanonicalize(WCHAR *orig_path, WCHAR *result, int size);
 229 
 230 /* Convert a pathname to canonical form.  The input orig_path is assumed to
 231    have been converted to native form already, via JVM_NativePath().  This is
 232    necessary because _fullpath() rejects duplicate separator characters on
 233    Win95, though it accepts them on NT. */
 234 
 235 int
 236 canonicalize(char *orig_path, char *result, int size)
 237 {
 238     WIN32_FIND_DATA fd;
 239     HANDLE h;
 240     char path[1024];    /* Working copy of path */
 241     char *src, *dst, *dend;
 242     wchar_t *worig_path, *wresult;
 243     size_t converted_chars = 0;
 244 
 245     /* handle long path with length >= MAX_PATH */
 246     if (strlen(orig_path) >= MAX_PATH) {
 247         if ((worig_path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
 248             return -1;
 249 
 250         if (mbstowcs_s(&converted_chars, worig_path, (size_t)size, orig_path, (size_t)(size - 1)) != 0) {
 251             free(worig_path);
 252             return -1;
 253         }
 254 
 255         if ((wresult = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
 256             return -1;
 257 
 258         if (wcanonicalize(worig_path, wresult, size) != 0) {
 259             free(worig_path);
 260             free(wresult);
 261             return -1;
 262         }
 263 
 264         if (wcstombs_s(&converted_chars, result, (size_t)size, wresult, (size_t)(size - 1)) != 0) {
 265             free(worig_path);
 266             free(wresult);
 267             return -1;
 268         }
 269 
 270         free(worig_path);
 271         free(wresult);
 272         return 0;
 273     }
 274 
 275     /* Reject paths that contain wildcards */
 276     if (wild(orig_path)) {
 277         errno = EINVAL;
 278         return -1;
 279     }
 280 
 281     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
 282       contrary to the documentation, the _fullpath procedure does not require
 283       the drive to be available.  It also does not reliably change all
 284       occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
 285     if (!_fullpath(path, orig_path, sizeof(path))) {
 286         return -1;
 287     }
 288 
 289     /* Correction for Win95: _fullpath may leave a trailing "\\"
 290       on a UNC pathname */
 291     if ((path[0] == '\\') && (path[1] == '\\')) {
 292         char *p = path + strlen(path);
 293         if ((p[-1] == '\\') && !islb(p[-2])) {
 294             p[-1] = '\0';
 295         }
 296     }
 297 
 298     if (dots(path)) /* Check for prohibited combinations of dots */
 299         return -1;
 300 
 301     src = path;            /* Start scanning here */
 302     dst = result;        /* Place results here */
 303     dend = dst + size;        /* Don't go to or past here */
 304 
 305     /* Copy prefix, assuming path is absolute */
 306     if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
 307         /* Drive specifier */
 308         *src = toupper(*src);    /* Canonicalize drive letter */
 309         if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
 310             return -1;
 311         }
 312         src += 2;
 313     } else if ((src[0] == '\\') && (src[1] == '\\')) {
 314         /* UNC pathname */
 315         char *p;
 316         p = nextsep(src + 2);    /* Skip past host name */
 317         if (!*p) {
 318             /* A UNC pathname must begin with "\\\\host\\share",
 319             so reject this path as invalid if there is no share name */
 320             errno = EINVAL;
 321             return -1;
 322         }
 323         p = nextsep(p + 1);    /* Skip past share name */
 324         if (!(dst = cp(dst, dend, '\0', src, p))) {
 325             return -1;
 326         }
 327         src = p;
 328     } else {
 329         /* Invalid path */
 330         errno = EINVAL;
 331         return -1;
 332     }
 333 
 334     /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
 335     /* for root pathes like "E:\" . If the path has this form, we should  */
 336     /* simply return it, it is already canonicalized. */
 337     if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
 338         /* At this point we have already copied the drive specifier ("z:")*/
 339         /* so we need to copy "\" and the null character. */
 340         result[2] = '\\';
 341         result[3] = '\0';
 342         return 0;
 343     }
 344 
 345     /* At this point we have copied either a drive specifier ("z:") or a UNC
 346     prefix ("\\\\host\\share") to the result buffer, and src points to the
 347     first byte of the remainder of the path.  We now scan through the rest
 348     of the path, looking up each prefix in order to find the true name of
 349     the last element of each prefix, thereby computing the full true name of
 350     the original path. */
 351     while (*src) {
 352         char *p = nextsep(src + 1);    /* Find next separator */
 353         char c = *p;
 354         assert(*src == '\\');        /* Invariant */
 355         *p = '\0';            /* Temporarily clear separator */
 356         h = FindFirstFile(path, &fd);    /* Look up prefix */
 357         *p = c;                /* Restore separator */
 358         if (h != INVALID_HANDLE_VALUE) {
 359             /* Lookup succeeded; append true name to result and continue */
 360             FindClose(h);
 361             if (!(dst = cp(dst, dend, '\\',
 362                 fd.cFileName,
 363                 fd.cFileName + strlen(fd.cFileName)))) {
 364                 return -1;
 365             }
 366             src = p;
 367             continue;
 368         } else {
 369             if (!lastErrorReportable()) {
 370                 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
 371                     return -1;
 372                 }
 373                 break;
 374             } else {
 375                 return -1;
 376             }
 377         }
 378     }
 379 
 380     if (dst >= dend) {
 381         errno = ENAMETOOLONG;
 382         return -1;
 383     }
 384     *dst = '\0';
 385     return 0;
 386 
 387 }
 388 
 389 
 390 /* Convert a pathname to canonical form.  The input prefix is assumed
 391    to be in canonical form already, and the trailing filename must not
 392    contain any wildcard, dot/double dot, or other "tricky" characters
 393    that are rejected by the canonicalize() routine above.  This
 394    routine is present to allow the canonicalization prefix cache to be
 395    used while still returning canonical names with the correct
 396    capitalization. */
 397 
 398 int
 399 canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
 400 {
 401     WIN32_FIND_DATA fd;
 402     HANDLE h;
 403     char *src, *dst, *dend;
 404 
 405     src = pathWithCanonicalPrefix;
 406     dst = result;        /* Place results here */
 407     dend = dst + size;   /* Don't go to or past here */
 408 
 409     h = FindFirstFile(pathWithCanonicalPrefix, &fd);    /* Look up file */
 410     if (h != INVALID_HANDLE_VALUE) {
 411         /* Lookup succeeded; concatenate true name to prefix */
 412         FindClose(h);
 413         if (!(dst = cp(dst, dend, '\0',
 414                        canonicalPrefix,
 415                        canonicalPrefix + strlen(canonicalPrefix)))) {
 416             return -1;
 417         }
 418         if (!(dst = cp(dst, dend, '\\',
 419                        fd.cFileName,
 420                        fd.cFileName + strlen(fd.cFileName)))) {
 421             return -1;
 422         }
 423     } else {
 424         if (!lastErrorReportable()) {
 425             if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
 426                 return -1;
 427             }
 428         } else {
 429             return -1;
 430         }
 431     }
 432 
 433     if (dst >= dend) {
 434         errno = ENAMETOOLONG;
 435         return -1;
 436     }
 437     *dst = '\0';
 438     return 0;
 439 }
 440 
 441 
 442 /* Wide character version of canonicalize. Size is a wide-character size. */
 443 
 444 int
 445 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
 446 {
 447     WIN32_FIND_DATAW fd;
 448     HANDLE h;
 449     WCHAR *path;    /* Working copy of path */
 450     WCHAR *src, *dst, *dend, c;
 451 
 452     /* Reject paths that contain wildcards */
 453     if (wwild(orig_path)) {
 454         errno = EINVAL;
 455         return -1;
 456     }
 457 
 458     if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
 459         return -1;
 460 
 461     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
 462        contrary to the documentation, the _fullpath procedure does not require
 463        the drive to be available.  */
 464     if(!_wfullpath(path, orig_path, size)) {
 465         goto err;
 466     }
 467 
 468     if (wdots(path)) /* Check for prohibited combinations of dots */
 469         goto err;
 470 
 471     src = path;            /* Start scanning here */
 472     dst = result;        /* Place results here */
 473     dend = dst + size;        /* Don't go to or past here */
 474 
 475     /* Copy prefix, assuming path is absolute */
 476     c = src[0];
 477     if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A'))
 478        && (src[1] == L':') && (src[2] == L'\\')) {
 479         /* Drive specifier */
 480         *src = towupper(*src);    /* Canonicalize drive letter */
 481         if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
 482             goto err;
 483         }
 484 
 485         src += 2;
 486     } else if ((src[0] == L'\\') && (src[1] == L'\\')) {
 487         /* UNC pathname */
 488         WCHAR *p;
 489         p = wnextsep(src + 2);    /* Skip past host name */
 490         if (!*p) {
 491             /* A UNC pathname must begin with "\\\\host\\share",
 492                so reject this path as invalid if there is no share name */
 493             errno = EINVAL;
 494             goto err;
 495         }
 496         p = wnextsep(p + 1);    /* Skip past share name */
 497         if (!(dst = wcp(dst, dend, L'\0', src, p)))
 498             goto err;
 499         src = p;
 500     } else {
 501         /* Invalid path */
 502         errno = EINVAL;
 503         goto err;
 504     }
 505     /* At this point we have copied either a drive specifier ("z:") or a UNC
 506        prefix ("\\\\host\\share") to the result buffer, and src points to the
 507        first byte of the remainder of the path.  We now scan through the rest
 508        of the path, looking up each prefix in order to find the true name of
 509        the last element of each prefix, thereby computing the full true name of
 510        the original path. */
 511     while (*src) {
 512         WCHAR *p = wnextsep(src + 1);    /* Find next separator */
 513         WCHAR c = *p;
 514         WCHAR *pathbuf;
 515         int pathlen;
 516 
 517         assert(*src == L'\\');        /* Invariant */
 518         *p = L'\0';            /* Temporarily clear separator */
 519 
 520         if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) {
 521             pathbuf = getPrefixed(path, pathlen);
 522             h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 523             free(pathbuf);
 524         } else
 525             h = FindFirstFileW(path, &fd);    /* Look up prefix */
 526 
 527         *p = c;                /* Restore separator */
 528         if (h != INVALID_HANDLE_VALUE) {
 529             /* Lookup succeeded; append true name to result and continue */
 530             FindClose(h);
 531             if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
 532                             fd.cFileName + wcslen(fd.cFileName)))){
 533                 goto err;
 534             }
 535             src = p;
 536             continue;
 537         } else {
 538             if (!lastErrorReportable()) {
 539                if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
 540                    goto err;
 541                }
 542                 break;
 543             } else {
 544                 goto err;
 545             }
 546         }
 547     }
 548 
 549     if (dst >= dend) {
 550     errno = ENAMETOOLONG;
 551         goto err;
 552     }
 553     *dst = L'\0';
 554     free(path);
 555     return 0;
 556 
 557  err:
 558     free(path);
 559     return -1;
 560 }
 561 
 562 
 563 /* Wide character version of canonicalizeWithPrefix. */
 564 
 565 int
 566 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
 567 {
 568     WIN32_FIND_DATAW fd;
 569     HANDLE h;
 570     WCHAR *src, *dst, *dend;
 571     WCHAR *pathbuf;
 572     int pathlen;
 573 
 574     src = pathWithCanonicalPrefix;
 575     dst = result;        /* Place results here */
 576     dend = dst + size;   /* Don't go to or past here */
 577 
 578 
 579     if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
 580         pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
 581         h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
 582         free(pathbuf);
 583     } else
 584         h = FindFirstFileW(pathWithCanonicalPrefix, &fd);    /* Look up prefix */
 585     if (h != INVALID_HANDLE_VALUE) {
 586         /* Lookup succeeded; append true name to result and continue */
 587         FindClose(h);
 588         if (!(dst = wcp(dst, dend, L'\0',
 589                         canonicalPrefix,
 590                         canonicalPrefix + wcslen(canonicalPrefix)))) {
 591             return -1;
 592         }
 593         if (!(dst = wcp(dst, dend, L'\\',
 594                         fd.cFileName,
 595                         fd.cFileName + wcslen(fd.cFileName)))) {
 596             return -1;
 597         }
 598     } else {
 599         if (!lastErrorReportable()) {
 600             if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
 601                 return -1;
 602             }
 603         } else {
 604             return -1;
 605         }
 606     }
 607 
 608     if (dst >= dend) {
 609         errno = ENAMETOOLONG;
 610         return -1;
 611     }
 612     *dst = L'\0';
 613     return 0;
 614 }
 615 
 616 
 617 /* The appropriate location of getPrefixed() should be io_util_md.c, but
 618    java.lang.instrument package has hardwired canonicalize_md.c into their
 619    dll, to avoid complicate solution such as including io_util_md.c into
 620    that package, as a workaround we put this method here.
 621  */
 622 
 623 /* copy \\?\ or \\?\UNC\ to the front of path*/
 624 __declspec(dllexport) WCHAR*
 625 getPrefixed(const WCHAR* path, int pathlen) {
 626     WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR));
 627     if (pathbuf != 0) {
 628         if (path[0] == L'\\' && path[1] == L'\\') {
 629             if (path[2] == L'?' && path[3] == L'\\'){
 630                 /* if it already has a \\?\ don't do the prefix */
 631                 wcscpy(pathbuf, path );
 632             } else {
 633                 /* only UNC pathname includes double slashes here */
 634                 wcscpy(pathbuf, L"\\\\?\\UNC\0");
 635                 wcscat(pathbuf, path + 1);
 636             }
 637         } else {
 638             wcscpy(pathbuf, L"\\\\?\\\0");
 639             wcscat(pathbuf, path );
 640         }
 641     }
 642     return pathbuf;
 643 }