New src/java.base/share/native/libjli/args.c

   1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <stdio.h>
  27 
  28 #ifdef DEBUG_ARGFILE
  29   #ifndef NO_JNI
  30     #define NO_JNI
  31   #endif
  32   #define JLI_ReportMessage(p1, p2) printf((p1), (p2))
  33 #else
  34   #include "java.h"
  35 #endif
  36 
  37 #include "jli_util.h"
  38 #include "emessages.h"
  39 
  40 static char* clone_substring(char *begin, size_t len) {
  41     char *rv;
  42 
  43     rv = (char *) JLI_MemAlloc(len + 1);
  44     memcpy(rv, begin, len);
  45     rv[len] = '\0';
  46     return rv;
  47 }
  48 
  49 /*
  50  * get quoted token, *cptr points to the start of the token, the beginning
  51  * quote
  52  */
  53 static char* dequote(char** cptr, const char * const eob) {
  54     char *nextc = *cptr;
  55     char quote_char = *nextc;
  56     char *anchor = nextc + 1;
  57     char *c;
  58     char d, c2, first;
  59     /* arbitarily pick 4, hopefully in most cases no more than 4 escaped chars */
  60     JLI_List escaped = JLI_List_new(4);
  61 
  62 #ifdef DQ_READ
  63     #error "DQ_READ had been defined!"
  64 #endif
  65 #define DQ_READ(V) \
  66     nextc++; \
  67     if (nextc >= eob) { \
  68         JLI_List_free(escaped); \
  69         return NULL; \
  70     } \
  71     V = (*nextc)
  72 /* end DQ_READ */
  73 
  74     DQ_READ(d);
  75     while (d != quote_char && d != '\n' && d != '\r') {
  76         if (d == '\\') {
  77             /* string before '\\' */
  78             JLI_List_addSubstring(escaped, anchor, nextc - anchor);
  79             c = JLI_MemAlloc(2 * sizeof(char));
  80             c[1] = '\0';
  81             DQ_READ(c[0]);
  82             first = c[0];   /* To allow \377, but not \477 */
  83             if (c[0] >= '0' && c[0] <= '7') {
  84                 c[0] = c[0] - '0';
  85                 DQ_READ(c2);
  86                 if ('0' <= c2 && c2 <= '7') {
  87                     c[0] = (c[0] << 3) + (c2 - '0');
  88                     DQ_READ(c2);
  89                     if ('0' <= c2 && c2 <= '7' && first <= '3') {
  90                         c[0] = (c[0] << 3) + (c2 - '0');
  91                         DQ_READ(d);
  92                     } else {
  93                         d = c2;
  94                     }
  95                 } else {
  96                   d = c2;
  97                 }
  98             } else {
  99                 switch (c[0]) {
 100                 case 'a':
 101                     c[0] = 0x7;
 102                     break;
 103                 case 'b':
 104                     c[0] = '\b';
 105                     break;
 106                 case 'f':
 107                     c[0] = 0xC;
 108                     break;
 109                 case 'n':
 110                     c[0] = '\n';
 111                     break;
 112                 case 'r':
 113                     c[0] = '\r';
 114                     break;
 115                 case 't':
 116                     c[0] = '\t';
 117                     break;
 118                 case 'v':
 119                     c[0] = 0xB;
 120                     break;
 121                 }
 122                 DQ_READ(d);
 123             }
 124             JLI_List_add(escaped, c);
 125             anchor = nextc;
 126         } else {
 127             DQ_READ(d);
 128         }
 129     }
 130 #undef DQ_READ
 131 
 132     /* nextc points to end quote or eol */
 133     // assert(nextc < eob);
 134     *cptr = nextc + 1;
 135     if (escaped->size == 0) {
 136         return clone_substring(anchor, nextc - anchor);
 137     } else {
 138         JLI_List_addSubstring(escaped, anchor, nextc - anchor);
 139         return JLI_List_combine(escaped);
 140     }
 141 }
 142 
 143 /*
 144  * cptr: the output parameter of a char* points at next read location
 145  * eob: the pointer points to end of buffer, one after the last character
 146  * return the pointer to start of next token, NULL if the buffer contains no token.
 147  * *cptr will point to next character after the token.
 148  * if returns NULL and cptr != eob indicated partial token remains in the buffer
 149  */
 150 static char* nextToken(char** cptr, const char * const eob) {
 151     char *nextc = *cptr;
 152     char *anchor;
 153     size_t len;
 154     char *rv;
 155 
 156     /* whitespaceChars(0, ' ') */
 157     while (nextc < eob && *nextc <= ' ') nextc++;
 158     if (nextc == eob) {
 159         *cptr = nextc;
 160         return NULL;
 161     }
 162 
 163     /* start with non-whitespace character */
 164     anchor = nextc;
 165     switch (*nextc) {
 166         case '#': /* commentChar('#') */
 167             /* ignore until eol */
 168             for(nextc++; nextc < eob && *nextc != '\n'; nextc++);
 169             if (nextc < eob) {
 170                 /* skip comment, continue read for next token */
 171                 *cptr = nextc++;
 172                 return nextToken(cptr, eob);
 173             }
 174             break;
 175         case '"': /* quoteChar('"') */
 176         case '\'': /* quoteChar('\'') */
 177             *cptr = nextc;
 178             return dequote(cptr, eob);
 179         default:
 180             for (nextc++; nextc < eob && *nextc > ' '; nextc++) {
 181                 /* comment or quote break a token just like whitespace */
 182                 if (*nextc == '#' || *nextc == '"' || *nextc == '\'') {
 183                     break;
 184                 }
 185             }
 186             if (nextc < eob) {
 187                 len = nextc - anchor;
 188                 rv = clone_substring(anchor, len);
 189                 *cptr = nextc;
 190                 return rv;
 191             }
 192     }
 193 
 194     /* nextc == eob, partial of next token
 195      * need to combine with next read
 196      */
 197     *cptr = anchor;
 198     return NULL;
 199 }
 200 
 201 static JLI_List readArgFile(FILE *file) {
 202     char buf[4096];
 203     JLI_List rv;
 204     size_t size = 0;
 205     size_t buf_size = sizeof(buf);
 206     char *nextc = buf;
 207     char *token = NULL;
 208     char *eob = buf;
 209     char *extra_buf = NULL;
 210     size_t extra_size = sizeof(buf);
 211     char *active_buf = buf;
 212 
 213     /* arbitrarily pick 8, seems to be a reasonable number of arguments */
 214     rv = JLI_List_new(8);
 215 
 216     while (!feof(file)) {
 217         size = fread(eob, sizeof(char), buf_size - size, file);
 218         if (ferror(file)) {
 219             JLI_List_free(rv);
 220             if (extra_buf != NULL) {
 221                 JLI_MemFree(extra_buf);
 222             }
 223             return NULL;
 224         }
 225 
 226         /* nextc is next character to read from the buffer
 227          * eob is the end of input
 228          * token is the copied token value, NULL if no a complete token
 229          */
 230         nextc = active_buf;
 231         eob = eob + size;
 232         token = nextToken(&nextc, eob);
 233         while (token != NULL) {
 234             JLI_List_add(rv, token);
 235             token = nextToken(&nextc, eob);
 236         }
 237 
 238         /* remaining characters of a non-complete token,
 239          * need to combine with next read.
 240          * if size = 0, means the input is complete read and no token
 241          * available in this buffer
 242          */
 243         size = eob - nextc;
 244         if (size == 0) {
 245             active_buf = buf;
 246             buf_size = sizeof(buf);
 247         } else {
 248             if (size < sizeof(buf)) {
 249                 // use stack whenever possible
 250                 active_buf = buf;
 251                 buf_size = sizeof(buf);
 252                 memmove(active_buf, nextc, size);
 253             } else {
 254                 if (size >= extra_size) {
 255                     // increase 4K
 256                     extra_size += sizeof(buf);
 257                     extra_buf = (char*) JLI_MemRealloc(extra_buf, extra_size);
 258                     if (active_buf == buf) {
 259                         memmove(extra_buf, nextc, size);
 260                     } // else realloc takes care of copy
 261                 } else {
 262                     // size fit in extra but still to large for buf
 263                     memmove(extra_buf, nextc, size);
 264                 }
 265                 active_buf = extra_buf;
 266                 buf_size = extra_size;
 267             }
 268         }
 269         // assert(size < buf_size);
 270         eob = active_buf + size;
 271     }
 272 
 273     /* last token from remaining */
 274     if (active_buf < eob) {
 275         if (*active_buf == '"' || *active_buf == '\'') {
 276             nextc = active_buf + 1;
 277         } else {
 278             nextc = active_buf;
 279         }
 280         if (*nextc != '#') {
 281             token = clone_substring(nextc, eob - nextc);
 282             JLI_List_add(rv, token);
 283         }
 284     }
 285     if (extra_buf != NULL) {
 286         JLI_MemFree(extra_buf);
 287     }
 288     return rv;
 289 }
 290 
 291 /*
 292  * if the arg represent a file, that is, prefix with a single '@',
 293  * return a list of arguments from the file.
 294  * otherwise, return NULL.
 295  */
 296 JLI_List JLI_ExpandArgFile(const char *arg, jboolean not_java) {
 297     JLI_List rv;
 298     FILE *fptr;
 299 
 300     if (not_java) {
 301         if (JLI_StrNCmp("-J@", arg, 3) != 0) {
 302             return NULL;
 303         } else {
 304             arg += 3;
 305         }
 306     } else if (*arg != '@') {
 307         return NULL;
 308     } else {
 309         arg++;
 310     }
 311 
 312     if (*arg == '@' || *arg == '\0') {
 313         return NULL;
 314     }
 315 
 316     fptr = fopen(arg, "r");
 317     /* arg file cannot be openned */
 318     if (fptr == NULL) {
 319         JLI_ReportMessage(CFG_ERROR6, arg);
 320         exit(1);
 321     }
 322 
 323     rv = readArgFile(fptr);
 324     fclose(fptr);
 325 
 326     /* error occurred reading the file */
 327     if (rv == NULL) {
 328         JLI_ReportMessage(DLL_ERROR4, arg);
 329     }
 330 
 331     return rv;
 332 }
 333 
 334 #ifdef DEBUG_ARGFILE
 335 
 336 void fail(char *expected, char *actual, size_t idx) {
 337     printf("FAILED: Token[%lu] expected to be <%s>, got <%s>\n", idx, expected, actual);
 338     exit(1);
 339 }
 340 
 341 void test_case(char *case_data, char **tokens, size_t cnt_tokens) {
 342     char *b, *eob;
 343     size_t actual_cnt;
 344     char *token;
 345     //char **tokens = case_data + 1;
 346 
 347     b = case_data;
 348     eob = b + strlen(b);
 349     actual_cnt = 0;
 350 
 351     printf("Test case: <%s>, expected %lu tokens.\n", b, cnt_tokens);
 352 
 353     for (token = nextToken(&b, eob); token != NULL; token = nextToken(&b, eob)) {
 354         // should not have more tokens than expected
 355         if (actual_cnt >= cnt_tokens) {
 356             printf("FAILED: Extra token detected: <%s>\n", token);
 357             exit(2);
 358         }
 359         if (JLI_StrCmp(token, tokens[actual_cnt]) != 0) {
 360             fail(tokens[actual_cnt], token, actual_cnt);
 361         }
 362         actual_cnt++;
 363     }
 364 
 365     if (actual_cnt >= cnt_tokens) {
 366         // same number of tokens, should have nothing left to parse
 367         if (b != eob) {
 368             // remainder could be comment
 369             if (*b != '#') {
 370                 printf("FAILED: Leftover detected: <%s>\n", b);
 371                 exit(2);
 372             }
 373         }
 374     } else {
 375         if (JLI_StrCmp(b, tokens[actual_cnt]) != 0) {
 376             fail(tokens[actual_cnt], b, actual_cnt);
 377         }
 378         actual_cnt++;
 379     }
 380     if (actual_cnt != cnt_tokens) {
 381         printf("FAILED: Number of tokens not match, expected %lu, got %lu\n",
 382             cnt_tokens, actual_cnt);
 383         exit(3);
 384     }
 385 
 386     printf("PASS\n");
 387 }
 388 
 389 #define DO_CASE(name) \
 390     test_case(name[0], name + 1, sizeof(name)/sizeof(char*) - 1)
 391 
 392 int main(int argc, char** argv) {
 393     int i, j;
 394 
 395     char* case1[] = { "-version -cp \"c:\\\\java libs\\\\one.jar\" \n",
 396         "-version", "-cp", "c:\\java libs\\one.jar" };
 397     DO_CASE(case1);
 398 
 399     // note the open quote at the end
 400     char* case2[] = { "com.foo.Panda \"Furious 5\" 'Shi Fu' \"escape\tprison",
 401         "com.foo.Panda", "Furious 5", "Shi Fu", "\"escape\tprison"};
 402     DO_CASE(case2);
 403 
 404     char* escaped_chars[] = { "escaped chars testing \"\\a\\b\\c\\f\\n\\r\\t\\v\\9\\6\\23\\82\\28\\377\\477\\278\\287\"",
 405         "escaped", "chars", "testing", "\a\bc\f\n\r\t\v9\006\02382\0028\377\0477\0278\00287"};
 406     DO_CASE(escaped_chars);
 407 
 408     char* mixed_quote[]  = { "\"mix 'single quote' in double\" 'mix \"double quote\" in single'",
 409         "mix 'single quote' in double", "mix \"double quote\" in single"};
 410     DO_CASE(mixed_quote);
 411 
 412     char* comments[]  = { "line one #comment\n'line #2' #rest are comment\r#comment on line 3\nline 4 #comment to eof",
 413         "line", "one", "line #2", "line", "4"};
 414     DO_CASE(comments);
 415 
 416     if (argc > 1) {
 417         for (i = 0; i < argc; i++) {
 418             JLI_List tokens = JLI_ExpandArgFile(argv[i], JNI_FALSE);
 419             if (NULL != tokens) {
 420                 for (j = 0; j < tokens->size; j++) {
 421                     printf("Token[%d]: <%s>\n", j, tokens->elements[j]);
 422                 }
 423             }
 424         }
 425     }
 426 }
 427 
 428 #endif // DEBUG_ARGFILE