1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <stdio.h>
  27 #include <assert.h>
  28 #include <sys/stat.h>
  29 
  30 #ifdef DEBUG_ARGFILE
  31   #ifndef NO_JNI
  32     #define NO_JNI
  33   #endif
  34   #define JLI_ReportMessage(p1, p2) printf((p1), (p2))
  35 #else
  36   #include "java.h"
  37 #endif
  38 
  39 #include "jli_util.h"
  40 #include "emessages.h"
  41 
  42 #define MAX_ARGF_SIZE 0x7fffffffL
  43 
  44 static char* clone_substring(const char *begin, size_t len) {
  45     char *rv = (char *) JLI_MemAlloc(len + 1);
  46     memcpy(rv, begin, len);
  47     rv[len] = '\0';
  48     return rv;
  49 }
  50 
  51 enum STATE {
  52     FIND_NEXT,
  53     IN_COMMENT,
  54     IN_QUOTE,
  55     IN_ESCAPE,
  56     SKIP_LEAD_WS,
  57     IN_TOKEN
  58 };
  59 
  60 typedef struct {
  61     enum STATE state;
  62     const char* cptr;
  63     const char* eob;
  64     char quote_char;
  65     JLI_List parts;
  66 } __ctx_args;
  67 
  68 #define NOT_FOUND -1
  69 static int firstAppArgIndex = NOT_FOUND;
  70 
  71 static jboolean expectingNoDashArg = JNI_FALSE;
  72 static size_t argsCount = 0;
  73 static jboolean stopExpansion = JNI_FALSE;
  74 
  75 void JLI_InitArgProcessing(jboolean isJava, jboolean disableArgFile) {
  76     // No expansion for relaunch
  77     if (argsCount != 0) {
  78         stopExpansion = JNI_TRUE;
  79         argsCount = 0;
  80     } else {
  81         stopExpansion = disableArgFile;
  82     }
  83 
  84     expectingNoDashArg = JNI_FALSE;
  85 
  86     // for tools, this value remains 0 all the time.
  87     firstAppArgIndex = isJava ? NOT_FOUND : 0;
  88 }
  89 
  90 int JLI_GetAppArgIndex() {
  91     // Will be 0 for tools
  92     return firstAppArgIndex;
  93 }
  94 
  95 static void checkArg(const char *arg) {
  96     size_t idx = 0;
  97     argsCount++;
  98     if (argsCount == 1) {
  99         // ignore first argument, the application name
 100         return;
 101     }
 102 
 103     // All arguments arrive here must be a launcher argument,
 104     // ie. by now, all argfile expansions must have been performed.
 105     if (*arg++ == '-') {
 106         expectingNoDashArg = JNI_FALSE;
 107         if (JLI_StrCmp(arg, "cp") == 0 ||
 108             JLI_StrCmp(arg, "classpath") == 0 ||
 109             JLI_StrCmp(arg, "addmods") == 0 ||
 110             JLI_StrCmp(arg, "limitmods") == 0 ||
 111             JLI_StrCmp(arg, "mp") == 0 ||
 112             JLI_StrCmp(arg, "modulepath") == 0 ||
 113             JLI_StrCmp(arg, "upgrademodulepath") == 0) {
 114             expectingNoDashArg = JNI_TRUE;
 115         } else if (JLI_StrCmp(arg, "jar") == 0 ||
 116                    JLI_StrCmp(arg, "m") == 0) {
 117             // This is tricky, we do expect NoDashArg
 118             // But that is considered main class to stop expansion
 119             expectingNoDashArg = JNI_FALSE;
 120             // We can not just update the idx here because if -jar @file
 121             // still need expansion of @file to get the argument for -jar
 122         } else if (JLI_StrCmp(arg, "Xdisable-@files") == 0) {
 123             stopExpansion = JNI_TRUE;
 124         }
 125     } else {
 126         if (!expectingNoDashArg) {
 127             // this is main class, argsCount is index to next arg
 128             idx = argsCount;
 129         }
 130         expectingNoDashArg = JNI_FALSE;
 131     }
 132     // only update on java mode and not yet found main class
 133     if (firstAppArgIndex == -1 && idx != 0) {
 134         firstAppArgIndex = (int) idx;
 135     }
 136 }
 137 
 138 /*
 139        [\n\r]   +------------+                        +------------+ [\n\r]
 140       +---------+ IN_COMMENT +<------+                | IN_ESCAPE  +---------+
 141       |         +------------+       |                +------------+         |
 142       |    [#]       ^               |[#]                 ^     |            |
 143       |   +----------+               |                [\\]|     |[^\n\r]     |
 144       v   |                          |                    |     v            |
 145 +------------+ [^ \t\n\r\f]  +------------+['"]>      +------------+         |
 146 | FIND_NEXT  +-------------->+ IN_TOKEN   +-----------+ IN_QUOTE   +         |
 147 +------------+               +------------+   <[quote]+------------+         |
 148   |   ^                          |                       |  ^   ^            |
 149   |   |               [ \t\n\r\f]|                 [\n\r]|  |   |[^ \t\n\r\f]v
 150   |   +--------------------------+-----------------------+  |  +--------------+
 151   |                       ['"]                              |  | SKIP_LEAD_WS |
 152   +---------------------------------------------------------+  +--------------+
 153 */
 154 static char* nextToken(__ctx_args *pctx) {
 155     const char* nextc = pctx->cptr;
 156     const char* const eob = pctx->eob;
 157     const char* anchor = nextc;
 158     char *token;
 159 
 160     for (; nextc < eob; nextc++) {
 161         register char ch = *nextc;
 162 
 163         // Skip white space characters
 164         if (pctx->state == FIND_NEXT || pctx->state == SKIP_LEAD_WS) {
 165             while (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' || ch == '\f') {
 166                 nextc++;
 167                 if (nextc >= eob) {
 168                     return NULL;
 169                 }
 170                 ch = *nextc;
 171             }
 172             pctx->state = (pctx->state == FIND_NEXT) ? IN_TOKEN : IN_QUOTE;
 173             anchor = nextc;
 174         // Deal with escape sequences
 175         } else if (pctx->state == IN_ESCAPE) {
 176             // concatenation directive
 177             if (ch == '\n' || ch == '\r') {
 178                 pctx->state = SKIP_LEAD_WS;
 179             } else {
 180             // escaped character
 181                 char* escaped = (char*) JLI_MemAlloc(2 * sizeof(char));
 182                 escaped[1] = '\0';
 183                 switch (ch) {
 184                     case 'n':
 185                         escaped[0] = '\n';
 186                         break;
 187                     case 'r':
 188                         escaped[0] = '\r';
 189                         break;
 190                     case 't':
 191                         escaped[0] = '\t';
 192                         break;
 193                     case 'f':
 194                         escaped[0] = '\f';
 195                         break;
 196                     default:
 197                         escaped[0] = ch;
 198                         break;
 199                 }
 200                 JLI_List_add(pctx->parts, escaped);
 201                 pctx->state = IN_QUOTE;
 202             }
 203             // anchor to next character
 204             anchor = nextc + 1;
 205             continue;
 206         // ignore comment to EOL
 207         } else if (pctx->state == IN_COMMENT) {
 208             while (ch != '\n' && ch != '\r') {
 209                 nextc++;
 210                 if (nextc > eob) {
 211                     return NULL;
 212                 }
 213                 ch = *nextc;
 214             }
 215             pctx->state = FIND_NEXT;
 216             continue;
 217         }
 218 
 219         assert(pctx->state != IN_ESCAPE);
 220         assert(pctx->state != FIND_NEXT);
 221         assert(pctx->state != SKIP_LEAD_WS);
 222         assert(pctx->state != IN_COMMENT);
 223 
 224         switch(ch) {
 225             case ' ':
 226             case '\t':
 227             case '\f':
 228                 if (pctx->state == IN_QUOTE) {
 229                     continue;
 230                 }
 231                 // fall through
 232             case '\n':
 233             case '\r':
 234                 if (pctx->parts->size == 0) {
 235                     token = clone_substring(anchor, nextc - anchor);
 236                 } else {
 237                     JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
 238                     token = JLI_List_combine(pctx->parts);
 239                     JLI_List_free(pctx->parts);
 240                     pctx->parts = JLI_List_new(4);
 241                 }
 242                 pctx->cptr = nextc + 1;
 243                 pctx->state = FIND_NEXT;
 244                 return token;
 245             case '#':
 246                 if (pctx->state == IN_QUOTE) {
 247                     continue;
 248                 }
 249                 pctx->state = IN_COMMENT;
 250                 break;
 251             case '\\':
 252                 if (pctx->state != IN_QUOTE) {
 253                     continue;
 254                 }
 255                 JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
 256                 pctx->state = IN_ESCAPE;
 257                 break;
 258             case '\'':
 259             case '"':
 260                 if (pctx->state == IN_QUOTE && pctx->quote_char != ch) {
 261                     // not matching quote
 262                     continue;
 263                 }
 264                 // partial before quote
 265                 if (anchor != nextc) {
 266                     JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
 267                 }
 268                 // anchor after quote character
 269                 anchor = nextc + 1;
 270                 if (pctx->state == IN_TOKEN) {
 271                     pctx->quote_char = ch;
 272                     pctx->state = IN_QUOTE;
 273                 } else {
 274                     pctx->state = IN_TOKEN;
 275                 }
 276                 break;
 277             default:
 278                 break;
 279         }
 280     }
 281 
 282     assert(nextc == eob);
 283     if (anchor != nextc) {
 284         // not yet return until end of stream, we have part of a token.
 285         JLI_List_addSubstring(pctx->parts, anchor, nextc - anchor);
 286     }
 287     return NULL;
 288 }
 289 
 290 static JLI_List readArgFile(FILE *file) {
 291     char buf[4096];
 292     JLI_List rv;
 293     __ctx_args ctx;
 294     size_t size;
 295     char *token;
 296 
 297     ctx.state = FIND_NEXT;
 298     ctx.parts = JLI_List_new(4);
 299 
 300     /* arbitrarily pick 8, seems to be a reasonable number of arguments */
 301     rv = JLI_List_new(8);
 302 
 303     while (!feof(file)) {
 304         size = fread(buf, sizeof(char), sizeof(buf), file);
 305         if (ferror(file)) {
 306             JLI_List_free(rv);
 307             return NULL;
 308         }
 309 
 310         /* nextc is next character to read from the buffer
 311          * eob is the end of input
 312          * token is the copied token value, NULL if no a complete token
 313          */
 314         ctx.cptr = buf;
 315         ctx.eob = buf + size;
 316         token = nextToken(&ctx);
 317         while (token != NULL) {
 318             checkArg(token);
 319             JLI_List_add(rv, token);
 320             token = nextToken(&ctx);
 321         }
 322     }
 323 
 324     // remaining partial token
 325     if (ctx.state == IN_TOKEN || ctx.state == IN_QUOTE) {
 326         if (ctx.parts->size != 0) {
 327             JLI_List_add(rv, JLI_List_combine(ctx.parts));
 328         }
 329     }
 330     JLI_List_free(ctx.parts);
 331 
 332     return rv;
 333 }
 334 
 335 /*
 336  * if the arg represent a file, that is, prefix with a single '@',
 337  * return a list of arguments from the file.
 338  * otherwise, return NULL.
 339  */
 340 static JLI_List expandArgFile(const char *arg) {
 341     FILE *fptr;
 342     struct stat st;
 343     JLI_List rv;
 344 
 345     /* failed to access the file */
 346     if (stat(arg, &st) != 0) {
 347         JLI_ReportMessage(CFG_ERROR6, arg);
 348         exit(1);
 349     }
 350 
 351     if (st.st_size > MAX_ARGF_SIZE) {
 352         JLI_ReportMessage(CFG_ERROR10, MAX_ARGF_SIZE);
 353         exit(1);
 354     }
 355 
 356     fptr = fopen(arg, "r");
 357     /* arg file cannot be openned */
 358     if (fptr == NULL) {
 359         JLI_ReportMessage(CFG_ERROR6, arg);
 360         exit(1);
 361     }
 362 
 363     rv = readArgFile(fptr);
 364     fclose(fptr);
 365 
 366     /* error occurred reading the file */
 367     if (rv == NULL) {
 368         JLI_ReportMessage(DLL_ERROR4, arg);
 369         exit(1);
 370     }
 371 
 372     return rv;
 373 }
 374 
 375 JLI_List JLI_PreprocessArg(const char *arg)
 376 {
 377     JLI_List rv;
 378 
 379     if (firstAppArgIndex > 0) {
 380         // In user application arg, no more work.
 381         return NULL;
 382     }
 383 
 384     if (stopExpansion) {
 385         // still looking for user application arg
 386         checkArg(arg);
 387         return NULL;
 388     }
 389 
 390     if (arg[0] != '@') {
 391         checkArg(arg);
 392         return NULL;
 393     }
 394 
 395     if (arg[1] == '\0') {
 396         // @ by itself is an argument
 397         checkArg(arg);
 398         return NULL;
 399     }
 400 
 401     arg++;
 402     if (arg[0] == '@') {
 403         // escaped @argument
 404         rv = JLI_List_new(1);
 405         checkArg(arg);
 406         JLI_List_add(rv, JLI_StringDup(arg));
 407     } else {
 408         rv = expandArgFile(arg);
 409     }
 410     return rv;
 411 }
 412 
 413 #ifdef DEBUG_ARGFILE
 414 /*
 415  * Stand-alone sanity test, build with following command line
 416  * $ CC -DDEBUG_ARGFILE -DNO_JNI -g args.c jli_util.c
 417  */
 418 
 419 void fail(char *expected, char *actual, size_t idx) {
 420     printf("FAILED: Token[%lu] expected to be <%s>, got <%s>\n", idx, expected, actual);
 421     exit(1);
 422 }
 423 
 424 void test_case(char *case_data, char **tokens, size_t cnt_tokens) {
 425     size_t actual_cnt;
 426     char *token;
 427     __ctx_args ctx;
 428 
 429     actual_cnt = 0;
 430 
 431     ctx.state = FIND_NEXT;
 432     ctx.parts = JLI_List_new(4);
 433     ctx.cptr = case_data;
 434     ctx.eob = case_data + strlen(case_data);
 435 
 436     printf("Test case: <%s>, expected %lu tokens.\n", case_data, cnt_tokens);
 437 
 438     for (token = nextToken(&ctx); token != NULL; token = nextToken(&ctx)) {
 439         // should not have more tokens than expected
 440         if (actual_cnt >= cnt_tokens) {
 441             printf("FAILED: Extra token detected: <%s>\n", token);
 442             exit(2);
 443         }
 444         if (JLI_StrCmp(token, tokens[actual_cnt]) != 0) {
 445             fail(tokens[actual_cnt], token, actual_cnt);
 446         }
 447         actual_cnt++;
 448     }
 449 
 450     char* last = NULL;
 451     if (ctx.parts->size != 0) {
 452         last = JLI_List_combine(ctx.parts);
 453     }
 454     JLI_List_free(ctx.parts);
 455 
 456     if (actual_cnt >= cnt_tokens) {
 457         // same number of tokens, should have nothing left to parse
 458         if (last != NULL) {
 459             if (*last != '#') {
 460                 printf("Leftover detected: %s", last);
 461                 exit(2);
 462             }
 463         }
 464     } else {
 465         if (JLI_StrCmp(last, tokens[actual_cnt]) != 0) {
 466             fail(tokens[actual_cnt], last, actual_cnt);
 467         }
 468         actual_cnt++;
 469     }
 470     if (actual_cnt != cnt_tokens) {
 471         printf("FAILED: Number of tokens not match, expected %lu, got %lu\n",
 472             cnt_tokens, actual_cnt);
 473         exit(3);
 474     }
 475 
 476     printf("PASS\n");
 477 }
 478 
 479 #define DO_CASE(name) \
 480     test_case(name[0], name + 1, sizeof(name)/sizeof(char*) - 1)
 481 
 482 int main(int argc, char** argv) {
 483     size_t i, j;
 484 
 485     char* case1[] = { "-version -cp \"c:\\\\java libs\\\\one.jar\" \n",
 486         "-version", "-cp", "c:\\java libs\\one.jar" };
 487     DO_CASE(case1);
 488 
 489     // note the open quote at the end
 490     char* case2[] = { "com.foo.Panda \"Furious 5\"\fand\t'Shi Fu' \"escape\tprison",
 491         "com.foo.Panda", "Furious 5", "and", "Shi Fu", "escape\tprison"};
 492     DO_CASE(case2);
 493 
 494     char* escaped_chars[] = { "escaped chars testing \"\\a\\b\\c\\f\\n\\r\\t\\v\\9\\6\\23\\82\\28\\377\\477\\278\\287\"",
 495         "escaped", "chars", "testing", "abc\f\n\r\tv96238228377477278287"};
 496     DO_CASE(escaped_chars);
 497 
 498     char* mixed_quote[]  = { "\"mix 'single quote' in double\" 'mix \"double quote\" in single' partial\"quote me\"this",
 499         "mix 'single quote' in double", "mix \"double quote\" in single", "partialquote methis"};
 500     DO_CASE(mixed_quote);
 501 
 502     char* comments[]  = { "line one #comment\n'line #2' #rest are comment\r\n#comment on line 3\nline 4 #comment to eof",
 503         "line", "one", "line #2", "line", "4"};
 504     DO_CASE(comments);
 505 
 506     char* open_quote[] = { "This is an \"open quote \n    across line\n\t, note for WS.",
 507         "This", "is", "an", "open quote ", "across", "line", ",", "note", "for", "WS." };
 508     DO_CASE(open_quote);
 509 
 510     char* escape_in_open_quote[] = { "Try \"this \\\\\\\\ escape\\n double quote \\\" in open quote",
 511         "Try", "this \\\\ escape\n double quote \" in open quote" };
 512     DO_CASE(escape_in_open_quote);
 513 
 514     char* quote[] = { "'-Dmy.quote.single'='Property in single quote. Here a double quote\" Add some slashes \\\\/'",
 515         "-Dmy.quote.single=Property in single quote. Here a double quote\" Add some slashes \\/" };
 516     DO_CASE(quote);
 517 
 518     char* multi[] = { "\"Open quote to \n  new \"line \\\n\r   third\\\n\r\\\tand\ffourth\"",
 519         "Open quote to ", "new", "line third\tand\ffourth" };
 520     DO_CASE(multi);
 521 
 522     char* escape_quote[] = { "c:\\\"partial quote\"\\lib",
 523         "c:\\partial quote\\lib" };
 524     DO_CASE(escape_quote);
 525 
 526     if (argc > 1) {
 527         for (i = 0; i < argc; i++) {
 528             JLI_List tokens = JLI_PreprocessArg(argv[i]);
 529             if (NULL != tokens) {
 530                 for (j = 0; j < tokens->size; j++) {
 531                     printf("Token[%lu]: <%s>\n", (unsigned long) j, tokens->elements[j]);
 532                 }
 533             }
 534         }
 535     }
 536 }
 537 
 538 #endif // DEBUG_ARGFILE