1 /* 2 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * Pathname canonicalization for Win32 file systems 28 */ 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <ctype.h> 34 #include <assert.h> 35 #include <sys/stat.h> 36 37 #include <windows.h> 38 #include <winbase.h> 39 #include <errno.h> 40 #include "io_util_md.h" 41 42 #undef DEBUG_PATH /* Define this to debug path code */ 43 44 #define isfilesep(c) ((c) == '/' || (c) == '\\') 45 #define wisfilesep(c) ((c) == L'/' || (c) == L'\\') 46 #define islb(c) (IsDBCSLeadByte((BYTE)(c))) 47 48 49 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied, 50 or NULL if dend would have been exceeded. If first != '\0', copy that byte 51 before copying bytes from src to send - 1. */ 52 53 static char * 54 cp(char *dst, char *dend, char first, char *src, char *send) 55 { 56 char *p = src, *q = dst; 57 if (first != '\0') { 58 if (q < dend) { 59 *q++ = first; 60 } else { 61 errno = ENAMETOOLONG; 62 return NULL; 63 } 64 } 65 if (send - p > dend - q) { 66 errno = ENAMETOOLONG; 67 return NULL; 68 } 69 while (p < send) { 70 *q++ = *p++; 71 } 72 return q; 73 } 74 75 /* Wide character version of cp */ 76 77 static WCHAR* 78 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send) 79 { 80 WCHAR *p = src, *q = dst; 81 if (first != L'\0') { 82 if (q < dend) { 83 *q++ = first; 84 } else { 85 errno = ENAMETOOLONG; 86 return NULL; 87 } 88 } 89 if (send - p > dend - q) { 90 errno = ENAMETOOLONG; 91 return NULL; 92 } 93 while (p < send) 94 *q++ = *p++; 95 return q; 96 } 97 98 99 /* Find first instance of '\\' at or following start. Return the address of 100 that byte or the address of the null terminator if '\\' is not found. */ 101 102 static char * 103 nextsep(char *start) 104 { 105 char *p = start; 106 int c; 107 while ((c = *p) && (c != '\\')) { 108 p += ((islb(c) && p[1]) ? 2 : 1); 109 } 110 return p; 111 } 112 113 /* Wide character version of nextsep */ 114 115 static WCHAR * 116 wnextsep(WCHAR *start) 117 { 118 WCHAR *p = start; 119 int c; 120 while ((c = *p) && (c != L'\\')) 121 p++; 122 return p; 123 } 124 125 /* Tell whether the given string contains any wildcard characters */ 126 127 static int 128 wild(char *start) 129 { 130 char *p = start; 131 int c; 132 while (c = *p) { 133 if ((c == '*') || (c == '?')) return 1; 134 p += ((islb(c) && p[1]) ? 2 : 1); 135 } 136 return 0; 137 } 138 139 /* Wide character version of wild */ 140 141 static int 142 wwild(WCHAR *start) 143 { 144 WCHAR *p = start; 145 int c; 146 while (c = *p) { 147 if ((c == L'*') || (c == L'?')) 148 return 1; 149 p++; 150 } 151 return 0; 152 } 153 154 /* Tell whether the given string contains prohibited combinations of dots. 155 In the canonicalized form no path element may have dots at its end. 156 Allowed canonical paths: c:\xa...dksd\..ksa\.lk c:\...a\.b\cd..x.x 157 Prohibited canonical paths: c:\..\x c:\x.\d c:\... 158 */ 159 static int 160 dots(char *start) 161 { 162 char *p = start; 163 while (*p) { 164 if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.' 165 return 0; // no more dots 166 p++; // next char 167 while ((*p) == '.') // go to the end of dots 168 p++; 169 if (*p && (*p != '\\')) // path element does not end with a dot 170 p++; // go to the next char 171 else 172 return 1; // path element does end with a dot - prohibited 173 } 174 return 0; // no prohibited combinations of dots found 175 } 176 177 /* Wide character version of dots */ 178 static int 179 wdots(WCHAR *start) 180 { 181 WCHAR *p = start; 182 // Skip "\\.\" prefix 183 if (wcslen(p) > 4 && !wcsncmp(p, L"\\\\.\\", 4)) 184 p = p + 4; 185 186 while (*p) { 187 if ((p = wcschr(p, L'.')) == NULL) // find next occurrence of '.' 188 return 0; // no more dots 189 p++; // next char 190 while ((*p) == L'.') // go to the end of dots 191 p++; 192 if (*p && (*p != L'\\')) // path element does not end with a dot 193 p++; // go to the next char 194 else 195 return 1; // path element does end with a dot - prohibited 196 } 197 return 0; // no prohibited combinations of dots found 198 } 199 200 /* If the lookup of a particular prefix fails because the file does not exist, 201 because it is of the wrong type, because access is denied, or because the 202 network is unreachable then canonicalization does not fail, it terminates 203 successfully after copying the rest of the original path to the result path. 204 Other I/O errors cause an error return. 205 */ 206 207 int 208 lastErrorReportable() 209 { 210 DWORD errval = GetLastError(); 211 if ((errval == ERROR_FILE_NOT_FOUND) 212 || (errval == ERROR_DIRECTORY) 213 || (errval == ERROR_PATH_NOT_FOUND) 214 || (errval == ERROR_BAD_NETPATH) 215 || (errval == ERROR_BAD_NET_NAME) 216 || (errval == ERROR_ACCESS_DENIED) 217 || (errval == ERROR_NETWORK_UNREACHABLE) 218 || (errval == ERROR_NETWORK_ACCESS_DENIED)) { 219 return 0; 220 } 221 222 #ifdef DEBUG_PATH 223 jio_fprintf(stderr, "canonicalize: errval %d\n", errval); 224 #endif 225 return 1; 226 } 227 228 /* Convert a pathname to canonical form. The input orig_path is assumed to 229 have been converted to native form already, via JVM_NativePath(). This is 230 necessary because _fullpath() rejects duplicate separator characters on 231 Win95, though it accepts them on NT. */ 232 233 int 234 canonicalize(char *orig_path, char *result, int size) 235 { 236 WIN32_FIND_DATA fd; 237 HANDLE h; 238 char path[1024]; /* Working copy of path */ 239 char *src, *dst, *dend; 240 241 /* Reject paths that contain wildcards */ 242 if (wild(orig_path)) { 243 errno = EINVAL; 244 return -1; 245 } 246 247 /* Collapse instances of "foo\.." and ensure absoluteness. Note that 248 contrary to the documentation, the _fullpath procedure does not require 249 the drive to be available. It also does not reliably change all 250 occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */ 251 if(!_fullpath(path, orig_path, sizeof(path))) { 252 return -1; 253 } 254 255 /* Correction for Win95: _fullpath may leave a trailing "\\" 256 on a UNC pathname */ 257 if ((path[0] == '\\') && (path[1] == '\\')) { 258 char *p = path + strlen(path); 259 if ((p[-1] == '\\') && !islb(p[-2])) { 260 p[-1] = '\0'; 261 } 262 } 263 264 if (dots(path)) /* Check for prohibited combinations of dots */ 265 return -1; 266 267 src = path; /* Start scanning here */ 268 dst = result; /* Place results here */ 269 dend = dst + size; /* Don't go to or past here */ 270 271 /* Copy prefix, assuming path is absolute */ 272 if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) { 273 /* Drive specifier */ 274 *src = toupper(*src); /* Canonicalize drive letter */ 275 if (!(dst = cp(dst, dend, '\0', src, src + 2))) { 276 return -1; 277 } 278 src += 2; 279 } else if ((src[0] == '\\') && (src[1] == '\\')) { 280 /* UNC pathname */ 281 char *p; 282 p = nextsep(src + 2); /* Skip past host name */ 283 if (!*p) { 284 /* A UNC pathname must begin with "\\\\host\\share", 285 so reject this path as invalid if there is no share name */ 286 errno = EINVAL; 287 return -1; 288 } 289 p = nextsep(p + 1); /* Skip past share name */ 290 if (!(dst = cp(dst, dend, '\0', src, p))) { 291 return -1; 292 } 293 src = p; 294 } else { 295 /* Invalid path */ 296 errno = EINVAL; 297 return -1; 298 } 299 300 /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */ 301 /* for root pathes like "E:\" . If the path has this form, we should */ 302 /* simply return it, it is already canonicalized. */ 303 if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') { 304 /* At this point we have already copied the drive specifier ("z:")*/ 305 /* so we need to copy "\" and the null character. */ 306 result[2] = '\\'; 307 result[3] = '\0'; 308 return 0; 309 } 310 311 /* At this point we have copied either a drive specifier ("z:") or a UNC 312 prefix ("\\\\host\\share") to the result buffer, and src points to the 313 first byte of the remainder of the path. We now scan through the rest 314 of the path, looking up each prefix in order to find the true name of 315 the last element of each prefix, thereby computing the full true name of 316 the original path. */ 317 while (*src) { 318 char *p = nextsep(src + 1); /* Find next separator */ 319 char c = *p; 320 assert(*src == '\\'); /* Invariant */ 321 *p = '\0'; /* Temporarily clear separator */ 322 h = FindFirstFile(path, &fd); /* Look up prefix */ 323 *p = c; /* Restore separator */ 324 if (h != INVALID_HANDLE_VALUE) { 325 /* Lookup succeeded; append true name to result and continue */ 326 FindClose(h); 327 if (!(dst = cp(dst, dend, '\\', 328 fd.cFileName, 329 fd.cFileName + strlen(fd.cFileName)))) { 330 return -1; 331 } 332 src = p; 333 continue; 334 } else { 335 if (!lastErrorReportable()) { 336 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) { 337 return -1; 338 } 339 break; 340 } else { 341 return -1; 342 } 343 } 344 } 345 346 if (dst >= dend) { 347 errno = ENAMETOOLONG; 348 return -1; 349 } 350 *dst = '\0'; 351 return 0; 352 353 } 354 355 356 /* Convert a pathname to canonical form. The input prefix is assumed 357 to be in canonical form already, and the trailing filename must not 358 contain any wildcard, dot/double dot, or other "tricky" characters 359 that are rejected by the canonicalize() routine above. This 360 routine is present to allow the canonicalization prefix cache to be 361 used while still returning canonical names with the correct 362 capitalization. */ 363 364 int 365 canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size) 366 { 367 WIN32_FIND_DATA fd; 368 HANDLE h; 369 char *src, *dst, *dend; 370 371 src = pathWithCanonicalPrefix; 372 dst = result; /* Place results here */ 373 dend = dst + size; /* Don't go to or past here */ 374 375 h = FindFirstFile(pathWithCanonicalPrefix, &fd); /* Look up file */ 376 if (h != INVALID_HANDLE_VALUE) { 377 /* Lookup succeeded; concatenate true name to prefix */ 378 FindClose(h); 379 if (!(dst = cp(dst, dend, '\0', 380 canonicalPrefix, 381 canonicalPrefix + strlen(canonicalPrefix)))) { 382 return -1; 383 } 384 if (!(dst = cp(dst, dend, '\\', 385 fd.cFileName, 386 fd.cFileName + strlen(fd.cFileName)))) { 387 return -1; 388 } 389 } else { 390 if (!lastErrorReportable()) { 391 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) { 392 return -1; 393 } 394 } else { 395 return -1; 396 } 397 } 398 399 if (dst >= dend) { 400 errno = ENAMETOOLONG; 401 return -1; 402 } 403 *dst = '\0'; 404 return 0; 405 } 406 407 408 /* Wide character version of canonicalize. Size is a wide-character size. */ 409 410 int 411 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size) 412 { 413 WIN32_FIND_DATAW fd; 414 HANDLE h; 415 WCHAR *path; /* Working copy of path */ 416 WCHAR *src, *dst, *dend, c; 417 418 /* Reject paths that contain wildcards */ 419 if (wwild(orig_path)) { 420 errno = EINVAL; 421 return -1; 422 } 423 424 if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL) 425 return -1; 426 427 /* Collapse instances of "foo\.." and ensure absoluteness. Note that 428 contrary to the documentation, the _fullpath procedure does not require 429 the drive to be available. */ 430 if(!_wfullpath(path, orig_path, size)) { 431 goto err; 432 } 433 434 if (wdots(path)) /* Check for prohibited combinations of dots */ 435 goto err; 436 437 src = path; /* Start scanning here */ 438 dst = result; /* Place results here */ 439 dend = dst + size; /* Don't go to or past here */ 440 441 /* Copy prefix, assuming path is absolute */ 442 c = src[0]; 443 if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A')) 444 && (src[1] == L':') && (src[2] == L'\\')) { 445 /* Drive specifier */ 446 *src = towupper(*src); /* Canonicalize drive letter */ 447 if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) { 448 goto err; 449 } 450 451 src += 2; 452 } else if ((src[0] == L'\\') && (src[1] == L'\\')) { 453 /* UNC pathname */ 454 WCHAR *p; 455 p = wnextsep(src + 2); /* Skip past host name */ 456 if (!*p) { 457 /* A UNC pathname must begin with "\\\\host\\share", 458 so reject this path as invalid if there is no share name */ 459 errno = EINVAL; 460 goto err; 461 } 462 p = wnextsep(p + 1); /* Skip past share name */ 463 if (!(dst = wcp(dst, dend, L'\0', src, p))) 464 goto err; 465 src = p; 466 } else { 467 /* Invalid path */ 468 errno = EINVAL; 469 goto err; 470 } 471 /* At this point we have copied either a drive specifier ("z:") or a UNC 472 prefix ("\\\\host\\share") to the result buffer, and src points to the 473 first byte of the remainder of the path. We now scan through the rest 474 of the path, looking up each prefix in order to find the true name of 475 the last element of each prefix, thereby computing the full true name of 476 the original path. */ 477 while (*src) { 478 WCHAR *p = wnextsep(src + 1); /* Find next separator */ 479 WCHAR c = *p; 480 WCHAR *pathbuf; 481 int pathlen; 482 483 assert(*src == L'\\'); /* Invariant */ 484 *p = L'\0'; /* Temporarily clear separator */ 485 486 if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) { 487 pathbuf = getPrefixed(path, pathlen); 488 h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */ 489 free(pathbuf); 490 } else 491 h = FindFirstFileW(path, &fd); /* Look up prefix */ 492 493 *p = c; /* Restore separator */ 494 if (h != INVALID_HANDLE_VALUE) { 495 /* Lookup succeeded; append true name to result and continue */ 496 FindClose(h); 497 if (!(dst = wcp(dst, dend, L'\\', fd.cFileName, 498 fd.cFileName + wcslen(fd.cFileName)))){ 499 goto err; 500 } 501 src = p; 502 continue; 503 } else { 504 if (!lastErrorReportable()) { 505 if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){ 506 goto err; 507 } 508 break; 509 } else { 510 goto err; 511 } 512 } 513 } 514 515 if (dst >= dend) { 516 errno = ENAMETOOLONG; 517 goto err; 518 } 519 *dst = L'\0'; 520 free(path); 521 return 0; 522 523 err: 524 free(path); 525 return -1; 526 } 527 528 529 /* Wide character version of canonicalizeWithPrefix. */ 530 531 int 532 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size) 533 { 534 WIN32_FIND_DATAW fd; 535 HANDLE h; 536 WCHAR *src, *dst, *dend; 537 WCHAR *pathbuf; 538 int pathlen; 539 540 src = pathWithCanonicalPrefix; 541 dst = result; /* Place results here */ 542 dend = dst + size; /* Don't go to or past here */ 543 544 545 if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) { 546 pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen); 547 h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */ 548 free(pathbuf); 549 } else 550 h = FindFirstFileW(pathWithCanonicalPrefix, &fd); /* Look up prefix */ 551 if (h != INVALID_HANDLE_VALUE) { 552 /* Lookup succeeded; append true name to result and continue */ 553 FindClose(h); 554 if (!(dst = wcp(dst, dend, L'\0', 555 canonicalPrefix, 556 canonicalPrefix + wcslen(canonicalPrefix)))) { 557 return -1; 558 } 559 if (!(dst = wcp(dst, dend, L'\\', 560 fd.cFileName, 561 fd.cFileName + wcslen(fd.cFileName)))) { 562 return -1; 563 } 564 } else { 565 if (!lastErrorReportable()) { 566 if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) { 567 return -1; 568 } 569 } else { 570 return -1; 571 } 572 } 573 574 if (dst >= dend) { 575 errno = ENAMETOOLONG; 576 return -1; 577 } 578 *dst = L'\0'; 579 return 0; 580 } 581 582 583 /* The appropriate location of getPrefixed() should be io_util_md.c, but 584 java.lang.instrument package has hardwired canonicalize_md.c into their 585 dll, to avoid complicate solution such as including io_util_md.c into 586 that package, as a workaround we put this method here. 587 */ 588 589 /* copy \\?\ or \\?\UNC\ to the front of path*/ 590 WCHAR* 591 getPrefixed(const WCHAR* path, int pathlen) { 592 WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR)); 593 if (pathbuf != 0) { 594 if (path[0] == L'\\' && path[1] == L'\\') { 595 if (path[2] == L'?' && path[3] == L'\\'){ 596 /* if it already has a \\?\ don't do the prefix */ 597 wcscpy(pathbuf, path ); 598 } else { 599 /* only UNC pathname includes double slashes here */ 600 wcscpy(pathbuf, L"\\\\?\\UNC\0"); 601 wcscat(pathbuf, path + 1); 602 } 603 } else { 604 wcscpy(pathbuf, L"\\\\?\\\0"); 605 wcscat(pathbuf, path ); 606 } 607 } 608 return pathbuf; 609 }