1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <stdio.h>
  27 #include <assert.h>
  28 #include <sys/stat.h>
  29 
  30 #ifdef DEBUG_ARGFILE
  31   #ifndef NO_JNI
  32     #define NO_JNI
  33   #endif
  34   #define JLI_ReportMessage(p1, p2) printf((p1), (p2))
  35 #else
  36   #include "java.h"
  37 #endif
  38 
  39 #include "jli_util.h"
  40 #include "emessages.h"
  41 
  42 #define MAX_ARGF_SIZE 0x7fffffffL
  43 
  44 static char* clone_substring(const char *begin, size_t len) {
  45     char *rv = (char *) JLI_MemAlloc(len + 1);
  46     memcpy(rv, begin, len);
  47     rv[len] = '\0';
  48     return rv;
  49 }
  50 
  51 enum STATE {
  52     FIND_NEXT,
  53     IN_COMMENT,
  54     IN_QUOTE,
  55     IN_ESCAPE,
  56     SKIP_LEAD_WS,
  57     IN_TOKEN
  58 };
  59 
  60 typedef struct {
  61     enum STATE state;
  62     const char* cptr;
  63     const char* eob;
  64     char quote_char;
  65     JLI_List parts;
  66 } __ctx_args;
  67 
  68 #define NOT_FOUND -1
  69 static int firstAppArgIndex = NOT_FOUND;
  70 
  71 static jboolean expectingNoDashArg = JNI_FALSE;
  72 static size_t argsCount = 0;
  73 static jboolean stopExpansion = JNI_FALSE;
  74 
  75 void JLI_InitArgProcessing(jboolean isJava, jboolean disableArgFile) {
  76     // No expansion for relaunch
  77     if (argsCount != 0) {
  78         stopExpansion = JNI_TRUE;
  79         argsCount = 0;
  80     } else {
  81         stopExpansion = disableArgFile;
  82     }
  83 
  84     expectingNoDashArg = JNI_FALSE;
  85 
  86     // for tools, this value remains 0 all the time.
  87     firstAppArgIndex = isJava ? NOT_FOUND : 0;
  88 }
  89 
  90 int JLI_GetAppArgIndex() {
  91     // Will be 0 for tools
  92     return firstAppArgIndex;
  93 }
  94 
  95 static void checkArg(const char *arg) {
  96     size_t idx = 0;
  97     argsCount++;
  98     if (argsCount == 1) {
  99         // ignore first argument, the application name
 100         return;
 101     }
 102 
 103     // All arguments arrive here must be a launcher argument,
 104     // ie. by now, all argfile expansions must have been performed.
 105     if (*arg == '-') {
 106         expectingNoDashArg = JNI_FALSE;
 107         if (IsWhiteSpaceOption(arg)) {
 108             // expect an argument
 109             expectingNoDashArg = JNI_TRUE;
 110 
 111             if (JLI_StrCmp(arg, "-jar") == 0 ||
 112                 JLI_StrCmp(arg, "-m") == 0) {
 113                 // This is tricky, we do expect NoDashArg
 114                 // But that is considered main class to stop expansion
 115                 expectingNoDashArg = JNI_FALSE;
 116                 // We can not just update the idx here because if -jar @file
 117                 // still need expansion of @file to get the argument for -jar
 118             }
 119         } else if (JLI_StrCmp(arg, "-Xdisable-@files") == 0) {
 120             stopExpansion = JNI_TRUE;
 121         }
 122     } else {
 123         if (!expectingNoDashArg) {
 124             // this is main class, argsCount is index to next arg
 125             idx = argsCount;
 126         }
 127         expectingNoDashArg = JNI_FALSE;
 128     }
 129     // only update on java mode and not yet found main class
 130     if (firstAppArgIndex == NOT_FOUND && idx != 0) {
 131         firstAppArgIndex = (int) idx;
 132     }
 133 }
 134 
 135 /*
 136        [\n\r]   +------------+                        +------------+ [\n\r]
 137       +---------+ IN_COMMENT +<------+                | IN_ESCAPE  +---------+
 138       |         +------------+       |                +------------+         |
 139       |    [#]       ^               |[#]                 ^     |            |
 140       |   +----------+               |                [\\]|     |[^\n\r]     |
 141       v   |                          |                    |     v            |
 142 +------------+ [^ \t\n\r\f]  +------------+['"]>      +------------+         |
 143 | FIND_NEXT  +-------------->+ IN_TOKEN   +-----------+ IN_QUOTE   +         |
 144 +------------+               +------------+   <[quote]+------------+         |
 145   |   ^                          |                       |  ^   ^            |
 146   |   |               [ \t\n\r\f]|                 [\n\r]|  |   |[^ \t\n\r\f]v
 147   |   +--------------------------+-----------------------+  |  +--------------+
 148   |                       ['"]                              |  | SKIP_LEAD_WS |
 149   +---------------------------------------------------------+  +--------------+
 150 */
 151 static char* nextToken(__ctx_args *pctx) {
 152     const char* nextc = pctx->cptr;
 153     const char* const eob = pctx->eob;
 154     const char* anchor = nextc;
 155     char *token;
 156 
 157     for (; nextc < eob; nextc++) {
 158         register char ch = *nextc;
 159 
 160         // Skip white space characters
 161         if (pctx->state == FIND_NEXT || pctx->state == SKIP_LEAD_WS) {
 162             while (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' || ch == '\f') {
 163                 nextc++;
 164                 if (nextc >= eob) {
 165                     return NULL;
 166                 }
 167                 ch = *nextc;
 168             }
 169             pctx->state = (pctx->state == FIND_NEXT) ? IN_TOKEN : IN_QUOTE;
 170             anchor = nextc;
 171         // Deal with escape sequences
 172         } else if (pctx->state == IN_ESCAPE) {
 173             // concatenation directive
 174             if (ch == '\n' || ch == '\r') {
 175                 pctx->state = SKIP_LEAD_WS;
 176             } else {
 177             // escaped character
 178                 char* escaped = (char*) JLI_MemAlloc(2 * sizeof(char));
 179                 escaped[1] = '\0';
 180                 switch (ch) {
 181                     case 'n':
 182                         escaped[0] = '\n';
 183                         break;
 184                     case 'r':
 185                         escaped[0] = '\r';
 186                         break;
 187                     case 't':
 188                         escaped[0] = '\t';
 189                         break;
 190                     case 'f':
 191                         escaped[0] = '\f';
 192                         break;
 193                     default:
 194                         escaped[0] = ch;
 195                         break;
 196                 }
 197                 JLI_List_add(pctx->parts, escaped);
 198                 pctx->state = IN_QUOTE;
 199             }
 200             // anchor to next character
 201             anchor = nextc + 1;
 202             continue;
 203         // ignore comment to EOL
 204         } else if (pctx->state == IN_COMMENT) {
 205             while (ch != '\n' && ch != '\r') {
 206                 nextc++;
 207                 if (nextc > eob) {
 208                     return NULL;
 209                 }
 210                 ch = *nextc;
 211             }
 212             pctx->state = FIND_NEXT;
 213             continue;
 214         }
 215 
 216         assert(pctx->state != IN_ESCAPE);
 217         assert(pctx->state != FIND_NEXT);
 218         assert(pctx->state != SKIP_LEAD_WS);
 219         assert(pctx->state != IN_COMMENT);
 220 
 221         switch(ch) {
 222             case ' ':
 223             case '\t':
 224             case '\f':
 225                 if (pctx->state == IN_QUOTE) {
 226                     continue;
 227                 }
 228                 // fall through
 229             case '\n':
 230             case '\r':
 231                 if (pctx->parts->size == 0) {
 232                     token = clone_substring(anchor, nextc - anchor);
 233                 } else {
 234                     JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
 235                     token = JLI_List_combine(pctx->parts);
 236                     JLI_List_free(pctx->parts);
 237                     pctx->parts = JLI_List_new(4);
 238                 }
 239                 pctx->cptr = nextc + 1;
 240                 pctx->state = FIND_NEXT;
 241                 return token;
 242             case '#':
 243                 if (pctx->state == IN_QUOTE) {
 244                     continue;
 245                 }
 246                 pctx->state = IN_COMMENT;
 247                 break;
 248             case '\\':
 249                 if (pctx->state != IN_QUOTE) {
 250                     continue;
 251                 }
 252                 JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
 253                 pctx->state = IN_ESCAPE;
 254                 break;
 255             case '\'':
 256             case '"':
 257                 if (pctx->state == IN_QUOTE && pctx->quote_char != ch) {
 258                     // not matching quote
 259                     continue;
 260                 }
 261                 // partial before quote
 262                 if (anchor != nextc) {
 263                     JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
 264                 }
 265                 // anchor after quote character
 266                 anchor = nextc + 1;
 267                 if (pctx->state == IN_TOKEN) {
 268                     pctx->quote_char = ch;
 269                     pctx->state = IN_QUOTE;
 270                 } else {
 271                     pctx->state = IN_TOKEN;
 272                 }
 273                 break;
 274             default:
 275                 break;
 276         }
 277     }
 278 
 279     assert(nextc == eob);
 280     if (anchor != nextc) {
 281         // not yet return until end of stream, we have part of a token.
 282         JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
 283     }
 284     return NULL;
 285 }
 286 
 287 static JLI_List readArgFile(FILE *file) {
 288     char buf[4096];
 289     JLI_List rv;
 290     __ctx_args ctx;
 291     size_t size;
 292     char *token;
 293 
 294     ctx.state = FIND_NEXT;
 295     ctx.parts = JLI_List_new(4);
 296 
 297     /* arbitrarily pick 8, seems to be a reasonable number of arguments */
 298     rv = JLI_List_new(8);
 299 
 300     while (!feof(file)) {
 301         size = fread(buf, sizeof(char), sizeof(buf), file);
 302         if (ferror(file)) {
 303             JLI_List_free(rv);
 304             return NULL;
 305         }
 306 
 307         /* nextc is next character to read from the buffer
 308          * eob is the end of input
 309          * token is the copied token value, NULL if no a complete token
 310          */
 311         ctx.cptr = buf;
 312         ctx.eob = buf + size;
 313         token = nextToken(&ctx);
 314         while (token != NULL) {
 315             checkArg(token);
 316             JLI_List_add(rv, token);
 317             token = nextToken(&ctx);
 318         }
 319     }
 320 
 321     // remaining partial token
 322     if (ctx.state == IN_TOKEN || ctx.state == IN_QUOTE) {
 323         if (ctx.parts->size != 0) {
 324             JLI_List_add(rv, JLI_List_combine(ctx.parts));
 325         }
 326     }
 327     JLI_List_free(ctx.parts);
 328 
 329     return rv;
 330 }
 331 
 332 /*
 333  * if the arg represent a file, that is, prefix with a single '@',
 334  * return a list of arguments from the file.
 335  * otherwise, return NULL.
 336  */
 337 static JLI_List expandArgFile(const char *arg) {
 338     FILE *fptr;
 339     struct stat st;
 340     JLI_List rv;
 341 
 342     /* failed to access the file */
 343     if (stat(arg, &st) != 0) {
 344         JLI_ReportMessage(CFG_ERROR6, arg);
 345         exit(1);
 346     }
 347 
 348     if (st.st_size > MAX_ARGF_SIZE) {
 349         JLI_ReportMessage(CFG_ERROR10, MAX_ARGF_SIZE);
 350         exit(1);
 351     }
 352 
 353     fptr = fopen(arg, "r");
 354     /* arg file cannot be openned */
 355     if (fptr == NULL) {
 356         JLI_ReportMessage(CFG_ERROR6, arg);
 357         exit(1);
 358     }
 359 
 360     rv = readArgFile(fptr);
 361     fclose(fptr);
 362 
 363     /* error occurred reading the file */
 364     if (rv == NULL) {
 365         JLI_ReportMessage(DLL_ERROR4, arg);
 366         exit(1);
 367     }
 368 
 369     return rv;
 370 }
 371 
 372 JLI_List JLI_PreprocessArg(const char *arg)
 373 {
 374     JLI_List rv;
 375 
 376     if (firstAppArgIndex > 0) {
 377         // In user application arg, no more work.
 378         return NULL;
 379     }
 380 
 381     if (stopExpansion) {
 382         // still looking for user application arg
 383         checkArg(arg);
 384         return NULL;
 385     }
 386 
 387     if (arg[0] != '@') {
 388         checkArg(arg);
 389         return NULL;
 390     }
 391 
 392     if (arg[1] == '\0') {
 393         // @ by itself is an argument
 394         checkArg(arg);
 395         return NULL;
 396     }
 397 
 398     arg++;
 399     if (arg[0] == '@') {
 400         // escaped @argument
 401         rv = JLI_List_new(1);
 402         checkArg(arg);
 403         JLI_List_add(rv, JLI_StringDup(arg));
 404     } else {
 405         rv = expandArgFile(arg);
 406     }
 407     return rv;
 408 }
 409 
 410 #ifdef DEBUG_ARGFILE
 411 /*
 412  * Stand-alone sanity test, build with following command line
 413  * $ CC -DDEBUG_ARGFILE -DNO_JNI -g args.c jli_util.c
 414  */
 415 
 416 void fail(char *expected, char *actual, size_t idx) {
 417     printf("FAILED: Token[%lu] expected to be <%s>, got <%s>\n", idx, expected, actual);
 418     exit(1);
 419 }
 420 
 421 void test_case(char *case_data, char **tokens, size_t cnt_tokens) {
 422     size_t actual_cnt;
 423     char *token;
 424     __ctx_args ctx;
 425 
 426     actual_cnt = 0;
 427 
 428     ctx.state = FIND_NEXT;
 429     ctx.parts = JLI_List_new(4);
 430     ctx.cptr = case_data;
 431     ctx.eob = case_data + strlen(case_data);
 432 
 433     printf("Test case: <%s>, expected %lu tokens.\n", case_data, cnt_tokens);
 434 
 435     for (token = nextToken(&ctx); token != NULL; token = nextToken(&ctx)) {
 436         // should not have more tokens than expected
 437         if (actual_cnt >= cnt_tokens) {
 438             printf("FAILED: Extra token detected: <%s>\n", token);
 439             exit(2);
 440         }
 441         if (JLI_StrCmp(token, tokens[actual_cnt]) != 0) {
 442             fail(tokens[actual_cnt], token, actual_cnt);
 443         }
 444         actual_cnt++;
 445     }
 446 
 447     char* last = NULL;
 448     if (ctx.parts->size != 0) {
 449         last = JLI_List_combine(ctx.parts);
 450     }
 451     JLI_List_free(ctx.parts);
 452 
 453     if (actual_cnt >= cnt_tokens) {
 454         // same number of tokens, should have nothing left to parse
 455         if (last != NULL) {
 456             if (*last != '#') {
 457                 printf("Leftover detected: %s", last);
 458                 exit(2);
 459             }
 460         }
 461     } else {
 462         if (JLI_StrCmp(last, tokens[actual_cnt]) != 0) {
 463             fail(tokens[actual_cnt], last, actual_cnt);
 464         }
 465         actual_cnt++;
 466     }
 467     if (actual_cnt != cnt_tokens) {
 468         printf("FAILED: Number of tokens not match, expected %lu, got %lu\n",
 469             cnt_tokens, actual_cnt);
 470         exit(3);
 471     }
 472 
 473     printf("PASS\n");
 474 }
 475 
 476 #define DO_CASE(name) \
 477     test_case(name[0], name + 1, sizeof(name)/sizeof(char*) - 1)
 478 
 479 int main(int argc, char** argv) {
 480     size_t i, j;
 481 
 482     char* case1[] = { "-version -cp \"c:\\\\java libs\\\\one.jar\" \n",
 483         "-version", "-cp", "c:\\java libs\\one.jar" };
 484     DO_CASE(case1);
 485 
 486     // note the open quote at the end
 487     char* case2[] = { "com.foo.Panda \"Furious 5\"\fand\t'Shi Fu' \"escape\tprison",
 488         "com.foo.Panda", "Furious 5", "and", "Shi Fu", "escape\tprison"};
 489     DO_CASE(case2);
 490 
 491     char* escaped_chars[] = { "escaped chars testing \"\\a\\b\\c\\f\\n\\r\\t\\v\\9\\6\\23\\82\\28\\377\\477\\278\\287\"",
 492         "escaped", "chars", "testing", "abc\f\n\r\tv96238228377477278287"};
 493     DO_CASE(escaped_chars);
 494 
 495     char* mixed_quote[]  = { "\"mix 'single quote' in double\" 'mix \"double quote\" in single' partial\"quote me\"this",
 496         "mix 'single quote' in double", "mix \"double quote\" in single", "partialquote methis"};
 497     DO_CASE(mixed_quote);
 498 
 499     char* comments[]  = { "line one #comment\n'line #2' #rest are comment\r\n#comment on line 3\nline 4 #comment to eof",
 500         "line", "one", "line #2", "line", "4"};
 501     DO_CASE(comments);
 502 
 503     char* open_quote[] = { "This is an \"open quote \n    across line\n\t, note for WS.",
 504         "This", "is", "an", "open quote ", "across", "line", ",", "note", "for", "WS." };
 505     DO_CASE(open_quote);
 506 
 507     char* escape_in_open_quote[] = { "Try \"this \\\\\\\\ escape\\n double quote \\\" in open quote",
 508         "Try", "this \\\\ escape\n double quote \" in open quote" };
 509     DO_CASE(escape_in_open_quote);
 510 
 511     char* quote[] = { "'-Dmy.quote.single'='Property in single quote. Here a double quote\" Add some slashes \\\\/'",
 512         "-Dmy.quote.single=Property in single quote. Here a double quote\" Add some slashes \\/" };
 513     DO_CASE(quote);
 514 
 515     char* multi[] = { "\"Open quote to \n  new \"line \\\n\r   third\\\n\r\\\tand\ffourth\"",
 516         "Open quote to ", "new", "line third\tand\ffourth" };
 517     DO_CASE(multi);
 518 
 519     char* escape_quote[] = { "c:\\\"partial quote\"\\lib",
 520         "c:\\partial quote\\lib" };
 521     DO_CASE(escape_quote);
 522 
 523     if (argc > 1) {
 524         for (i = 0; i < argc; i++) {
 525             JLI_List tokens = JLI_PreprocessArg(argv[i]);
 526             if (NULL != tokens) {
 527                 for (j = 0; j < tokens->size; j++) {
 528                     printf("Token[%lu]: <%s>\n", (unsigned long) j, tokens->elements[j]);
 529                 }
 530             }
 531         }
 532     }
 533 }
 534 
 535 #endif // DEBUG_ARGFILE