1 /* 2 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * Pathname canonicalization for Win32 file systems 28 */ 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <ctype.h> 34 #include <assert.h> 35 #include <sys/stat.h> 36 37 #include <windows.h> 38 #include <winbase.h> 39 #include <errno.h> 40 #include "io_util_md.h" 41 42 #undef DEBUG_PATH /* Define this to debug path code */ 43 44 #define isfilesep(c) ((c) == '/' || (c) == '\\') 45 #define wisfilesep(c) ((c) == L'/' || (c) == L'\\') 46 #define islb(c) (IsDBCSLeadByte((BYTE)(c))) 47 48 49 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied, 50 or NULL if dend would have been exceeded. If first != '\0', copy that byte 51 before copying bytes from src to send - 1. */ 52 53 static char * 54 cp(char *dst, char *dend, char first, char *src, char *send) 55 { 56 char *p = src, *q = dst; 57 if (first != '\0') { 58 if (q < dend) { 59 *q++ = first; 60 } else { 61 errno = ENAMETOOLONG; 62 return NULL; 63 } 64 } 65 if (send - p > dend - q) { 66 errno = ENAMETOOLONG; 67 return NULL; 68 } 69 while (p < send) { 70 *q++ = *p++; 71 } 72 return q; 73 } 74 75 /* Wide character version of cp */ 76 77 static WCHAR* 78 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send) 79 { 80 WCHAR *p = src, *q = dst; 81 if (first != L'\0') { 82 if (q < dend) { 83 *q++ = first; 84 } else { 85 errno = ENAMETOOLONG; 86 return NULL; 87 } 88 } 89 if (send - p > dend - q) { 90 errno = ENAMETOOLONG; 91 return NULL; 92 } 93 while (p < send) 94 *q++ = *p++; 95 return q; 96 } 97 98 99 /* Find first instance of '\\' at or following start. Return the address of 100 that byte or the address of the null terminator if '\\' is not found. */ 101 102 static char * 103 nextsep(char *start) 104 { 105 char *p = start; 106 int c; 107 while ((c = *p) && (c != '\\')) { 108 p += ((islb(c) && p[1]) ? 2 : 1); 109 } 110 return p; 111 } 112 113 /* Wide character version of nextsep */ 114 115 static WCHAR * 116 wnextsep(WCHAR *start) 117 { 118 WCHAR *p = start; 119 int c; 120 while ((c = *p) && (c != L'\\')) 121 p++; 122 return p; 123 } 124 125 /* Tell whether the given string contains any wildcard characters */ 126 127 static int 128 wild(char *start) 129 { 130 char *p = start; 131 int c; 132 while (c = *p) { 133 if ((c == '*') || (c == '?')) return 1; 134 p += ((islb(c) && p[1]) ? 2 : 1); 135 } 136 return 0; 137 } 138 139 /* Wide character version of wild */ 140 141 static int 142 wwild(WCHAR *start) 143 { 144 WCHAR *p = start; 145 int c; 146 while (c = *p) { 147 if ((c == L'*') || (c == L'?')) 148 return 1; 149 p++; 150 } 151 return 0; 152 } 153 154 /* Tell whether the given string contains prohibited combinations of dots. 155 In the canonicalized form no path element may have dots at its end. 156 Allowed canonical paths: c:\xa...dksd\..ksa\.lk c:\...a\.b\cd..x.x 157 Prohibited canonical paths: c:\..\x c:\x.\d c:\... 158 */ 159 static int 160 dots(char *start) 161 { 162 char *p = start; 163 while (*p) { 164 if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.' 165 return 0; // no more dots 166 p++; // next char 167 while ((*p) == '.') // go to the end of dots 168 p++; 169 if (*p && (*p != '\\')) // path element does not end with a dot 170 p++; // go to the next char 171 else 172 return 1; // path element does end with a dot - prohibited 173 } 174 return 0; // no prohibited combinations of dots found 175 } 176 177 /* Wide character version of dots */ 178 static int 179 wdots(WCHAR *start) 180 { 181 WCHAR *p = start; 182 // Skip "\\.\" prefix 183 if (wcslen(p) > 4 && !wcsncmp(p, L"\\\\.\\", 4)) 184 p = p + 4; 185 186 while (*p) { 187 if ((p = wcschr(p, L'.')) == NULL) // find next occurrence of '.' 188 return 0; // no more dots 189 p++; // next char 190 while ((*p) == L'.') // go to the end of dots 191 p++; 192 if (*p && (*p != L'\\')) // path element does not end with a dot 193 p++; // go to the next char 194 else 195 return 1; // path element does end with a dot - prohibited 196 } 197 return 0; // no prohibited combinations of dots found 198 } 199 200 /* If the lookup of a particular prefix fails because the file does not exist, 201 because it is of the wrong type, because access is denied, or because the 202 network is unreachable then canonicalization does not fail, it terminates 203 successfully after copying the rest of the original path to the result path. 204 Other I/O errors cause an error return. 205 */ 206 207 int 208 lastErrorReportable() 209 { 210 DWORD errval = GetLastError(); 211 if ((errval == ERROR_FILE_NOT_FOUND) 212 || (errval == ERROR_DIRECTORY) 213 || (errval == ERROR_PATH_NOT_FOUND) 214 || (errval == ERROR_BAD_NETPATH) 215 || (errval == ERROR_BAD_NET_NAME) 216 || (errval == ERROR_ACCESS_DENIED) 217 || (errval == ERROR_NETWORK_UNREACHABLE) 218 || (errval == ERROR_NETWORK_ACCESS_DENIED)) { 219 return 0; 220 } 221 222 #ifdef DEBUG_PATH 223 jio_fprintf(stderr, "canonicalize: errval %d\n", errval); 224 #endif 225 return 1; 226 } 227 228 int wcanonicalize(WCHAR *orig_path, WCHAR *result, int size); 229 230 /* Convert a pathname to canonical form. The input orig_path is assumed to 231 have been converted to native form already, via JVM_NativePath(). This is 232 necessary because _fullpath() rejects duplicate separator characters on 233 Win95, though it accepts them on NT. */ 234 235 int 236 canonicalize(char *orig_path, char *result, int size) 237 { 238 WIN32_FIND_DATA fd; 239 HANDLE h; 240 char path[1024]; /* Working copy of path */ 241 char *src, *dst, *dend; 242 wchar_t *worig_path, *wresult; 243 size_t converted_chars = 0; 244 245 /* handle long path with length >= MAX_PATH */ 246 if (strlen(orig_path) >= MAX_PATH) { 247 if ((worig_path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL) 248 return -1; 249 250 if (mbstowcs_s(&converted_chars, worig_path, (size_t)size, orig_path, (size_t)(size - 1)) != 0) { 251 free(worig_path); 252 return -1; 253 } 254 255 if ((wresult = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL) 256 return -1; 257 258 if (wcanonicalize(worig_path, wresult, size) != 0) { 259 free(worig_path); 260 free(wresult); 261 return -1; 262 } 263 264 if (wcstombs_s(&converted_chars, result, (size_t)size, wresult, (size_t)(size - 1)) != 0) { 265 free(worig_path); 266 free(wresult); 267 return -1; 268 } 269 270 free(worig_path); 271 free(wresult); 272 return 0; 273 } 274 275 /* Reject paths that contain wildcards */ 276 if (wild(orig_path)) { 277 errno = EINVAL; 278 return -1; 279 } 280 281 /* Collapse instances of "foo\.." and ensure absoluteness. Note that 282 contrary to the documentation, the _fullpath procedure does not require 283 the drive to be available. It also does not reliably change all 284 occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */ 285 if (!_fullpath(path, orig_path, sizeof(path))) { 286 return -1; 287 } 288 289 /* Correction for Win95: _fullpath may leave a trailing "\\" 290 on a UNC pathname */ 291 if ((path[0] == '\\') && (path[1] == '\\')) { 292 char *p = path + strlen(path); 293 if ((p[-1] == '\\') && !islb(p[-2])) { 294 p[-1] = '\0'; 295 } 296 } 297 298 if (dots(path)) /* Check for prohibited combinations of dots */ 299 return -1; 300 301 src = path; /* Start scanning here */ 302 dst = result; /* Place results here */ 303 dend = dst + size; /* Don't go to or past here */ 304 305 /* Copy prefix, assuming path is absolute */ 306 if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) { 307 /* Drive specifier */ 308 *src = toupper(*src); /* Canonicalize drive letter */ 309 if (!(dst = cp(dst, dend, '\0', src, src + 2))) { 310 return -1; 311 } 312 src += 2; 313 } else if ((src[0] == '\\') && (src[1] == '\\')) { 314 /* UNC pathname */ 315 char *p; 316 p = nextsep(src + 2); /* Skip past host name */ 317 if (!*p) { 318 /* A UNC pathname must begin with "\\\\host\\share", 319 so reject this path as invalid if there is no share name */ 320 errno = EINVAL; 321 return -1; 322 } 323 p = nextsep(p + 1); /* Skip past share name */ 324 if (!(dst = cp(dst, dend, '\0', src, p))) { 325 return -1; 326 } 327 src = p; 328 } else { 329 /* Invalid path */ 330 errno = EINVAL; 331 return -1; 332 } 333 334 /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */ 335 /* for root pathes like "E:\" . If the path has this form, we should */ 336 /* simply return it, it is already canonicalized. */ 337 if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') { 338 /* At this point we have already copied the drive specifier ("z:")*/ 339 /* so we need to copy "\" and the null character. */ 340 result[2] = '\\'; 341 result[3] = '\0'; 342 return 0; 343 } 344 345 /* At this point we have copied either a drive specifier ("z:") or a UNC 346 prefix ("\\\\host\\share") to the result buffer, and src points to the 347 first byte of the remainder of the path. We now scan through the rest 348 of the path, looking up each prefix in order to find the true name of 349 the last element of each prefix, thereby computing the full true name of 350 the original path. */ 351 while (*src) { 352 char *p = nextsep(src + 1); /* Find next separator */ 353 char c = *p; 354 assert(*src == '\\'); /* Invariant */ 355 *p = '\0'; /* Temporarily clear separator */ 356 h = FindFirstFile(path, &fd); /* Look up prefix */ 357 *p = c; /* Restore separator */ 358 if (h != INVALID_HANDLE_VALUE) { 359 /* Lookup succeeded; append true name to result and continue */ 360 FindClose(h); 361 if (!(dst = cp(dst, dend, '\\', 362 fd.cFileName, 363 fd.cFileName + strlen(fd.cFileName)))) { 364 return -1; 365 } 366 src = p; 367 continue; 368 } else { 369 if (!lastErrorReportable()) { 370 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) { 371 return -1; 372 } 373 break; 374 } else { 375 return -1; 376 } 377 } 378 } 379 380 if (dst >= dend) { 381 errno = ENAMETOOLONG; 382 return -1; 383 } 384 *dst = '\0'; 385 return 0; 386 387 } 388 389 390 /* Convert a pathname to canonical form. The input prefix is assumed 391 to be in canonical form already, and the trailing filename must not 392 contain any wildcard, dot/double dot, or other "tricky" characters 393 that are rejected by the canonicalize() routine above. This 394 routine is present to allow the canonicalization prefix cache to be 395 used while still returning canonical names with the correct 396 capitalization. */ 397 398 int 399 canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size) 400 { 401 WIN32_FIND_DATA fd; 402 HANDLE h; 403 char *src, *dst, *dend; 404 405 src = pathWithCanonicalPrefix; 406 dst = result; /* Place results here */ 407 dend = dst + size; /* Don't go to or past here */ 408 409 h = FindFirstFile(pathWithCanonicalPrefix, &fd); /* Look up file */ 410 if (h != INVALID_HANDLE_VALUE) { 411 /* Lookup succeeded; concatenate true name to prefix */ 412 FindClose(h); 413 if (!(dst = cp(dst, dend, '\0', 414 canonicalPrefix, 415 canonicalPrefix + strlen(canonicalPrefix)))) { 416 return -1; 417 } 418 if (!(dst = cp(dst, dend, '\\', 419 fd.cFileName, 420 fd.cFileName + strlen(fd.cFileName)))) { 421 return -1; 422 } 423 } else { 424 if (!lastErrorReportable()) { 425 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) { 426 return -1; 427 } 428 } else { 429 return -1; 430 } 431 } 432 433 if (dst >= dend) { 434 errno = ENAMETOOLONG; 435 return -1; 436 } 437 *dst = '\0'; 438 return 0; 439 } 440 441 442 /* Wide character version of canonicalize. Size is a wide-character size. */ 443 444 int 445 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size) 446 { 447 WIN32_FIND_DATAW fd; 448 HANDLE h; 449 WCHAR *path; /* Working copy of path */ 450 WCHAR *src, *dst, *dend, c; 451 452 /* Reject paths that contain wildcards */ 453 if (wwild(orig_path)) { 454 errno = EINVAL; 455 return -1; 456 } 457 458 if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL) 459 return -1; 460 461 /* Collapse instances of "foo\.." and ensure absoluteness. Note that 462 contrary to the documentation, the _fullpath procedure does not require 463 the drive to be available. */ 464 if(!_wfullpath(path, orig_path, size)) { 465 goto err; 466 } 467 468 if (wdots(path)) /* Check for prohibited combinations of dots */ 469 goto err; 470 471 src = path; /* Start scanning here */ 472 dst = result; /* Place results here */ 473 dend = dst + size; /* Don't go to or past here */ 474 475 /* Copy prefix, assuming path is absolute */ 476 c = src[0]; 477 if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A')) 478 && (src[1] == L':') && (src[2] == L'\\')) { 479 /* Drive specifier */ 480 *src = towupper(*src); /* Canonicalize drive letter */ 481 if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) { 482 goto err; 483 } 484 485 src += 2; 486 } else if ((src[0] == L'\\') && (src[1] == L'\\')) { 487 /* UNC pathname */ 488 WCHAR *p; 489 p = wnextsep(src + 2); /* Skip past host name */ 490 if (!*p) { 491 /* A UNC pathname must begin with "\\\\host\\share", 492 so reject this path as invalid if there is no share name */ 493 errno = EINVAL; 494 goto err; 495 } 496 p = wnextsep(p + 1); /* Skip past share name */ 497 if (!(dst = wcp(dst, dend, L'\0', src, p))) 498 goto err; 499 src = p; 500 } else { 501 /* Invalid path */ 502 errno = EINVAL; 503 goto err; 504 } 505 /* At this point we have copied either a drive specifier ("z:") or a UNC 506 prefix ("\\\\host\\share") to the result buffer, and src points to the 507 first byte of the remainder of the path. We now scan through the rest 508 of the path, looking up each prefix in order to find the true name of 509 the last element of each prefix, thereby computing the full true name of 510 the original path. */ 511 while (*src) { 512 WCHAR *p = wnextsep(src + 1); /* Find next separator */ 513 WCHAR c = *p; 514 WCHAR *pathbuf; 515 int pathlen; 516 517 assert(*src == L'\\'); /* Invariant */ 518 *p = L'\0'; /* Temporarily clear separator */ 519 520 if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) { 521 pathbuf = getPrefixed(path, pathlen); 522 h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */ 523 free(pathbuf); 524 } else 525 h = FindFirstFileW(path, &fd); /* Look up prefix */ 526 527 *p = c; /* Restore separator */ 528 if (h != INVALID_HANDLE_VALUE) { 529 /* Lookup succeeded; append true name to result and continue */ 530 FindClose(h); 531 if (!(dst = wcp(dst, dend, L'\\', fd.cFileName, 532 fd.cFileName + wcslen(fd.cFileName)))){ 533 goto err; 534 } 535 src = p; 536 continue; 537 } else { 538 if (!lastErrorReportable()) { 539 if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){ 540 goto err; 541 } 542 break; 543 } else { 544 goto err; 545 } 546 } 547 } 548 549 if (dst >= dend) { 550 errno = ENAMETOOLONG; 551 goto err; 552 } 553 *dst = L'\0'; 554 free(path); 555 return 0; 556 557 err: 558 free(path); 559 return -1; 560 } 561 562 563 /* Wide character version of canonicalizeWithPrefix. */ 564 565 int 566 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size) 567 { 568 WIN32_FIND_DATAW fd; 569 HANDLE h; 570 WCHAR *src, *dst, *dend; 571 WCHAR *pathbuf; 572 int pathlen; 573 574 src = pathWithCanonicalPrefix; 575 dst = result; /* Place results here */ 576 dend = dst + size; /* Don't go to or past here */ 577 578 579 if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) { 580 pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen); 581 h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */ 582 free(pathbuf); 583 } else 584 h = FindFirstFileW(pathWithCanonicalPrefix, &fd); /* Look up prefix */ 585 if (h != INVALID_HANDLE_VALUE) { 586 /* Lookup succeeded; append true name to result and continue */ 587 FindClose(h); 588 if (!(dst = wcp(dst, dend, L'\0', 589 canonicalPrefix, 590 canonicalPrefix + wcslen(canonicalPrefix)))) { 591 return -1; 592 } 593 if (!(dst = wcp(dst, dend, L'\\', 594 fd.cFileName, 595 fd.cFileName + wcslen(fd.cFileName)))) { 596 return -1; 597 } 598 } else { 599 if (!lastErrorReportable()) { 600 if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) { 601 return -1; 602 } 603 } else { 604 return -1; 605 } 606 } 607 608 if (dst >= dend) { 609 errno = ENAMETOOLONG; 610 return -1; 611 } 612 *dst = L'\0'; 613 return 0; 614 } 615 616 617 /* The appropriate location of getPrefixed() should be io_util_md.c, but 618 java.lang.instrument package has hardwired canonicalize_md.c into their 619 dll, to avoid complicate solution such as including io_util_md.c into 620 that package, as a workaround we put this method here. 621 */ 622 623 /* copy \\?\ or \\?\UNC\ to the front of path*/ 624 __declspec(dllexport) WCHAR* 625 getPrefixed(const WCHAR* path, int pathlen) { 626 WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR)); 627 if (pathbuf != 0) { 628 if (path[0] == L'\\' && path[1] == L'\\') { 629 if (path[2] == L'?' && path[3] == L'\\'){ 630 /* if it already has a \\?\ don't do the prefix */ 631 wcscpy(pathbuf, path ); 632 } else { 633 /* only UNC pathname includes double slashes here */ 634 wcscpy(pathbuf, L"\\\\?\\UNC\0"); 635 wcscat(pathbuf, path + 1); 636 } 637 } else { 638 wcscpy(pathbuf, L"\\\\?\\\0"); 639 wcscat(pathbuf, path ); 640 } 641 } 642 return pathbuf; 643 }