1 /* 2 * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * Pathname canonicalization for Win32 file systems 28 */ 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <ctype.h> 34 #include <assert.h> 35 #include <sys/stat.h> 36 37 #include <windows.h> 38 #include <winbase.h> 39 #include <errno.h> 40 #include "io_util_md.h" 41 42 #undef DEBUG_PATH /* Define this to debug path code */ 43 44 #define isfilesep(c) ((c) == '/' || (c) == '\\') 45 #define wisfilesep(c) ((c) == L'/' || (c) == L'\\') 46 #define islb(c) (IsDBCSLeadByte((BYTE)(c))) 47 48 49 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied, 50 or NULL if dend would have been exceeded. If first != '\0', copy that byte 51 before copying bytes from src to send - 1. */ 52 53 static char * 54 cp(char *dst, char *dend, char first, char *src, char *send) 55 { 56 char *p = src, *q = dst; 57 if (first != '\0') { 58 if (q < dend) { 59 *q++ = first; 60 } else { 61 errno = ENAMETOOLONG; 62 return NULL; 63 } 64 } 65 if (send - p > dend - q) { 66 errno = ENAMETOOLONG; 67 return NULL; 68 } 69 while (p < send) { 70 *q++ = *p++; 71 } 72 return q; 73 } 74 75 /* Wide character version of cp */ 76 77 static WCHAR* 78 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send) 79 { 80 WCHAR *p = src, *q = dst; 81 if (first != L'\0') { 82 if (q < dend) { 83 *q++ = first; 84 } else { 85 errno = ENAMETOOLONG; 86 return NULL; 87 } 88 } 89 if (send - p > dend - q) { 90 errno = ENAMETOOLONG; 91 return NULL; 92 } 93 while (p < send) 94 *q++ = *p++; 95 return q; 96 } 97 98 99 /* Find first instance of '\\' at or following start. Return the address of 100 that byte or the address of the null terminator if '\\' is not found. */ 101 102 static char * 103 nextsep(char *start) 104 { 105 char *p = start; 106 int c; 107 while ((c = *p) && (c != '\\')) { 108 p += ((islb(c) && p[1]) ? 2 : 1); 109 } 110 return p; 111 } 112 113 /* Wide character version of nextsep */ 114 115 static WCHAR * 116 wnextsep(WCHAR *start) 117 { 118 WCHAR *p = start; 119 int c; 120 while ((c = *p) && (c != L'\\')) 121 p++; 122 return p; 123 } 124 125 /* Tell whether the given string contains any wildcard characters */ 126 127 static int 128 wild(char *start) 129 { 130 char *p = start; 131 int c; 132 while (c = *p) { 133 if ((c == '*') || (c == '?')) return 1; 134 p += ((islb(c) && p[1]) ? 2 : 1); 135 } 136 return 0; 137 } 138 139 /* Wide character version of wild */ 140 141 static int 142 wwild(WCHAR *start) 143 { 144 WCHAR *p = start; 145 int c; 146 while (c = *p) { 147 if ((c == L'*') || (c == L'?')) 148 return 1; 149 p++; 150 } 151 return 0; 152 } 153 154 /* Tell whether the given string contains prohibited combinations of dots. 155 In the canonicalized form no path element may have dots at its end. 156 Allowed canonical paths: c:\xa...dksd\..ksa\.lk c:\...a\.b\cd..x.x 157 Prohibited canonical paths: c:\..\x c:\x.\d c:\... 158 */ 159 static int 160 dots(char *start) 161 { 162 char *p = start; 163 while (*p) { 164 if ((p = strchr(p, '.')) == NULL) // find next occurence of '.' 165 return 0; // no more dots 166 p++; // next char 167 while ((*p) == '.') // go to the end of dots 168 p++; 169 if (*p && (*p != '\\')) // path element does not end with a dot 170 p++; // go to the next char 171 else 172 return 1; // path element does end with a dot - prohibited 173 } 174 return 0; // no prohibited combinations of dots found 175 } 176 177 /* Wide character version of dots */ 178 static int 179 wdots(WCHAR *start) 180 { 181 WCHAR *p = start; 182 while (*p) { 183 if ((p = wcschr(p, L'.')) == NULL) // find next occurence of '.' 184 return 0; // no more dots 185 p++; // next char 186 while ((*p) == L'.') // go to the end of dots 187 p++; 188 if (*p && (*p != L'\\')) // path element does not end with a dot 189 p++; // go to the next char 190 else 191 return 1; // path element does end with a dot - prohibited 192 } 193 return 0; // no prohibited combinations of dots found 194 } 195 196 /* If the lookup of a particular prefix fails because the file does not exist, 197 because it is of the wrong type, because access is denied, or because the 198 network is unreachable then canonicalization does not fail, it terminates 199 successfully after copying the rest of the original path to the result path. 200 Other I/O errors cause an error return. 201 */ 202 203 int 204 lastErrorReportable() 205 { 206 DWORD errval = GetLastError(); 207 if ((errval == ERROR_FILE_NOT_FOUND) 208 || (errval == ERROR_DIRECTORY) 209 || (errval == ERROR_PATH_NOT_FOUND) 210 || (errval == ERROR_BAD_NETPATH) 211 || (errval == ERROR_BAD_NET_NAME) 212 || (errval == ERROR_ACCESS_DENIED) 213 || (errval == ERROR_NETWORK_UNREACHABLE) 214 || (errval == ERROR_NETWORK_ACCESS_DENIED)) { 215 return 0; 216 } 217 218 #ifdef DEBUG_PATH 219 jio_fprintf(stderr, "canonicalize: errval %d\n", errval); 220 #endif 221 return 1; 222 } 223 224 /* Convert a pathname to canonical form. The input orig_path is assumed to 225 have been converted to native form already, via JVM_NativePath(). This is 226 necessary because _fullpath() rejects duplicate separator characters on 227 Win95, though it accepts them on NT. */ 228 229 int 230 canonicalize(char *orig_path, char *result, int size) 231 { 232 WIN32_FIND_DATA fd; 233 HANDLE h; 234 char path[1024]; /* Working copy of path */ 235 char *src, *dst, *dend; 236 237 /* Reject paths that contain wildcards */ 238 if (wild(orig_path)) { 239 errno = EINVAL; 240 return -1; 241 } 242 243 /* Collapse instances of "foo\.." and ensure absoluteness. Note that 244 contrary to the documentation, the _fullpath procedure does not require 245 the drive to be available. It also does not reliably change all 246 occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */ 247 if(!_fullpath(path, orig_path, sizeof(path))) { 248 return -1; 249 } 250 251 /* Correction for Win95: _fullpath may leave a trailing "\\" 252 on a UNC pathname */ 253 if ((path[0] == '\\') && (path[1] == '\\')) { 254 char *p = path + strlen(path); 255 if ((p[-1] == '\\') && !islb(p[-2])) { 256 p[-1] = '\0'; 257 } 258 } 259 260 if (dots(path)) /* Check for prohibited combinations of dots */ 261 return -1; 262 263 src = path; /* Start scanning here */ 264 dst = result; /* Place results here */ 265 dend = dst + size; /* Don't go to or past here */ 266 267 /* Copy prefix, assuming path is absolute */ 268 if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) { 269 /* Drive specifier */ 270 *src = toupper(*src); /* Canonicalize drive letter */ 271 if (!(dst = cp(dst, dend, '\0', src, src + 2))) { 272 return -1; 273 } 274 src += 2; 275 } else if ((src[0] == '\\') && (src[1] == '\\')) { 276 /* UNC pathname */ 277 char *p; 278 p = nextsep(src + 2); /* Skip past host name */ 279 if (!*p) { 280 /* A UNC pathname must begin with "\\\\host\\share", 281 so reject this path as invalid if there is no share name */ 282 errno = EINVAL; 283 return -1; 284 } 285 p = nextsep(p + 1); /* Skip past share name */ 286 if (!(dst = cp(dst, dend, '\0', src, p))) { 287 return -1; 288 } 289 src = p; 290 } else { 291 /* Invalid path */ 292 errno = EINVAL; 293 return -1; 294 } 295 296 /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */ 297 /* for root pathes like "E:\" . If the path has this form, we should */ 298 /* simply return it, it is already canonicalized. */ 299 if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') { 300 /* At this point we have already copied the drive specifier ("z:")*/ 301 /* so we need to copy "\" and the null character. */ 302 result[2] = '\\'; 303 result[3] = '\0'; 304 return 0; 305 } 306 307 /* At this point we have copied either a drive specifier ("z:") or a UNC 308 prefix ("\\\\host\\share") to the result buffer, and src points to the 309 first byte of the remainder of the path. We now scan through the rest 310 of the path, looking up each prefix in order to find the true name of 311 the last element of each prefix, thereby computing the full true name of 312 the original path. */ 313 while (*src) { 314 char *p = nextsep(src + 1); /* Find next separator */ 315 char c = *p; 316 assert(*src == '\\'); /* Invariant */ 317 *p = '\0'; /* Temporarily clear separator */ 318 h = FindFirstFile(path, &fd); /* Look up prefix */ 319 *p = c; /* Restore separator */ 320 if (h != INVALID_HANDLE_VALUE) { 321 /* Lookup succeeded; append true name to result and continue */ 322 FindClose(h); 323 if (!(dst = cp(dst, dend, '\\', 324 fd.cFileName, 325 fd.cFileName + strlen(fd.cFileName)))) { 326 return -1; 327 } 328 src = p; 329 continue; 330 } else { 331 if (!lastErrorReportable()) { 332 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) { 333 return -1; 334 } 335 break; 336 } else { 337 return -1; 338 } 339 } 340 } 341 342 if (dst >= dend) { 343 errno = ENAMETOOLONG; 344 return -1; 345 } 346 *dst = '\0'; 347 return 0; 348 349 } 350 351 352 /* Convert a pathname to canonical form. The input prefix is assumed 353 to be in canonical form already, and the trailing filename must not 354 contain any wildcard, dot/double dot, or other "tricky" characters 355 that are rejected by the canonicalize() routine above. This 356 routine is present to allow the canonicalization prefix cache to be 357 used while still returning canonical names with the correct 358 capitalization. */ 359 360 int 361 canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size) 362 { 363 WIN32_FIND_DATA fd; 364 HANDLE h; 365 char *src, *dst, *dend; 366 367 src = pathWithCanonicalPrefix; 368 dst = result; /* Place results here */ 369 dend = dst + size; /* Don't go to or past here */ 370 371 h = FindFirstFile(pathWithCanonicalPrefix, &fd); /* Look up file */ 372 if (h != INVALID_HANDLE_VALUE) { 373 /* Lookup succeeded; concatenate true name to prefix */ 374 FindClose(h); 375 if (!(dst = cp(dst, dend, '\0', 376 canonicalPrefix, 377 canonicalPrefix + strlen(canonicalPrefix)))) { 378 return -1; 379 } 380 if (!(dst = cp(dst, dend, '\\', 381 fd.cFileName, 382 fd.cFileName + strlen(fd.cFileName)))) { 383 return -1; 384 } 385 } else { 386 if (!lastErrorReportable()) { 387 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) { 388 return -1; 389 } 390 } else { 391 return -1; 392 } 393 } 394 395 if (dst >= dend) { 396 errno = ENAMETOOLONG; 397 return -1; 398 } 399 *dst = '\0'; 400 return 0; 401 } 402 403 404 /* Wide character version of canonicalize. Size is a wide-character size. */ 405 406 int 407 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size) 408 { 409 WIN32_FIND_DATAW fd; 410 HANDLE h; 411 WCHAR *path; /* Working copy of path */ 412 WCHAR *src, *dst, *dend, c; 413 414 /* Reject paths that contain wildcards */ 415 if (wwild(orig_path)) { 416 errno = EINVAL; 417 return -1; 418 } 419 420 if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL) 421 return -1; 422 423 /* Collapse instances of "foo\.." and ensure absoluteness. Note that 424 contrary to the documentation, the _fullpath procedure does not require 425 the drive to be available. */ 426 if(!_wfullpath(path, orig_path, size)) { 427 goto err; 428 } 429 430 if (wdots(path)) /* Check for prohibited combinations of dots */ 431 goto err; 432 433 src = path; /* Start scanning here */ 434 dst = result; /* Place results here */ 435 dend = dst + size; /* Don't go to or past here */ 436 437 /* Copy prefix, assuming path is absolute */ 438 c = src[0]; 439 if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A')) 440 && (src[1] == L':') && (src[2] == L'\\')) { 441 /* Drive specifier */ 442 *src = towupper(*src); /* Canonicalize drive letter */ 443 if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) { 444 goto err; 445 } 446 447 src += 2; 448 } else if ((src[0] == L'\\') && (src[1] == L'\\')) { 449 /* UNC pathname */ 450 WCHAR *p; 451 p = wnextsep(src + 2); /* Skip past host name */ 452 if (!*p) { 453 /* A UNC pathname must begin with "\\\\host\\share", 454 so reject this path as invalid if there is no share name */ 455 errno = EINVAL; 456 goto err; 457 } 458 p = wnextsep(p + 1); /* Skip past share name */ 459 if (!(dst = wcp(dst, dend, L'\0', src, p))) 460 goto err; 461 src = p; 462 } else { 463 /* Invalid path */ 464 errno = EINVAL; 465 goto err; 466 } 467 /* At this point we have copied either a drive specifier ("z:") or a UNC 468 prefix ("\\\\host\\share") to the result buffer, and src points to the 469 first byte of the remainder of the path. We now scan through the rest 470 of the path, looking up each prefix in order to find the true name of 471 the last element of each prefix, thereby computing the full true name of 472 the original path. */ 473 while (*src) { 474 WCHAR *p = wnextsep(src + 1); /* Find next separator */ 475 WCHAR c = *p; 476 WCHAR *pathbuf; 477 int pathlen; 478 479 assert(*src == L'\\'); /* Invariant */ 480 *p = L'\0'; /* Temporarily clear separator */ 481 482 if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) { 483 pathbuf = getPrefixed(path, pathlen); 484 h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */ 485 free(pathbuf); 486 } else 487 h = FindFirstFileW(path, &fd); /* Look up prefix */ 488 489 *p = c; /* Restore separator */ 490 if (h != INVALID_HANDLE_VALUE) { 491 /* Lookup succeeded; append true name to result and continue */ 492 FindClose(h); 493 if (!(dst = wcp(dst, dend, L'\\', fd.cFileName, 494 fd.cFileName + wcslen(fd.cFileName)))){ 495 goto err; 496 } 497 src = p; 498 continue; 499 } else { 500 if (!lastErrorReportable()) { 501 if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){ 502 goto err; 503 } 504 break; 505 } else { 506 goto err; 507 } 508 } 509 } 510 511 if (dst >= dend) { 512 errno = ENAMETOOLONG; 513 goto err; 514 } 515 *dst = L'\0'; 516 free(path); 517 return 0; 518 519 err: 520 free(path); 521 return -1; 522 } 523 524 525 /* Wide character version of canonicalizeWithPrefix. */ 526 527 int 528 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size) 529 { 530 WIN32_FIND_DATAW fd; 531 HANDLE h; 532 WCHAR *src, *dst, *dend; 533 WCHAR *pathbuf; 534 int pathlen; 535 536 src = pathWithCanonicalPrefix; 537 dst = result; /* Place results here */ 538 dend = dst + size; /* Don't go to or past here */ 539 540 541 if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) { 542 pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen); 543 h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */ 544 free(pathbuf); 545 } else 546 h = FindFirstFileW(pathWithCanonicalPrefix, &fd); /* Look up prefix */ 547 if (h != INVALID_HANDLE_VALUE) { 548 /* Lookup succeeded; append true name to result and continue */ 549 FindClose(h); 550 if (!(dst = wcp(dst, dend, L'\0', 551 canonicalPrefix, 552 canonicalPrefix + wcslen(canonicalPrefix)))) { 553 return -1; 554 } 555 if (!(dst = wcp(dst, dend, L'\\', 556 fd.cFileName, 557 fd.cFileName + wcslen(fd.cFileName)))) { 558 return -1; 559 } 560 } else { 561 if (!lastErrorReportable()) { 562 if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) { 563 return -1; 564 } 565 } else { 566 return -1; 567 } 568 } 569 570 if (dst >= dend) { 571 errno = ENAMETOOLONG; 572 return -1; 573 } 574 *dst = L'\0'; 575 return 0; 576 } 577 578 579 /* The appropriate location of getPrefixed() should be io_util_md.c, but 580 java.lang.instrument package has hardwired canonicalize_md.c into their 581 dll, to avoid complicate solution such as including io_util_md.c into 582 that package, as a workaround we put this method here. 583 */ 584 585 /* copy \\?\ or \\?\UNC\ to the front of path*/ 586 WCHAR* 587 getPrefixed(const WCHAR* path, int pathlen) { 588 WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR)); 589 if (pathbuf != 0) { 590 if (path[0] == L'\\' && path[1] == L'\\') { 591 if (path[2] == L'?' && path[3] == L'\\'){ 592 /* if it already has a \\?\ don't do the prefix */ 593 wcscpy(pathbuf, path ); 594 } else { 595 /* only UNC pathname includes double slashes here */ 596 wcscpy(pathbuf, L"\\\\?\\UNC\0"); 597 wcscat(pathbuf, path + 1); 598 } 599 } else { 600 wcscpy(pathbuf, L"\\\\?\\\0"); 601 wcscat(pathbuf, path ); 602 } 603 } 604 return pathbuf; 605 }