1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * The Universal Permissive License (UPL), Version 1.0
   6  *
   7  * Subject to the condition set forth below, permission is hereby granted to
   8  * any person obtaining a copy of this software, associated documentation
   9  * and/or data (collectively the "Software"), free of charge and under any
  10  * and all copyright rights in the Software, and any and all patent rights
  11  * owned or freely licensable by each licensor hereunder covering either (i)
  12  * the unmodified Software as contributed to or provided by such licensor,
  13  * or (ii) the Larger Works (as defined below), to deal in both
  14  *
  15  * (a) the Software, and
  16  *
  17  * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file
  18  * if one is included with the Software (each a "Larger Work" to which the
  19  * Software is contributed by such licensors),
  20  *
  21  * without restriction, including without limitation the rights to copy,
  22  * create derivative works of, display, perform, and distribute the Software
  23  * and make, use, sell, offer for sale, import, export, have made, and have
  24  * sold the Software and the Larger Work(s), and to sublicense the foregoing
  25  * rights on either these or other terms.
  26  *
  27  * This license is subject to the following condition:
  28  *
  29  * The above copyright notice and either this complete permission notice or
  30  * at a minimum a reference to the UPL must be included in all copies or
  31  * substantial portions of the Software.
  32  *
  33  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  34  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  35  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
  36  * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
  37  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  38  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  39  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  40  *
  41  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  42  * or visit www.oracle.com if you need additional information or have any
  43  * questions.
  44  *
  45  */
  46 
  47 /* hsdis.c -- dump a range of addresses as native instructions
  48    This implements the plugin protocol required by the
  49    HotSpot PrintAssembly option.
  50 */
  51 
  52 #include <config.h> /* required by bfd.h */
  53 #include <errno.h>
  54 #include <inttypes.h>
  55 #include <string.h>
  56 
  57 #include <libiberty.h>
  58 #include <bfd.h>
  59 #include <bfdver.h>
  60 #include <dis-asm.h>
  61 
  62 #include "hsdis.h"
  63 
  64 #ifndef bool
  65 #define bool int
  66 #define true 1
  67 #define false 0
  68 #endif /*bool*/
  69 
  70 /* short names for stuff in hsdis.h */
  71 typedef decode_instructions_event_callback_ftype  event_callback_t;
  72 typedef decode_instructions_printf_callback_ftype printf_callback_t;
  73 
  74 /* disassemble_info.application_data object */
  75 struct hsdis_app_data {
  76   /* virtual address of data */
  77   uintptr_t start_va, end_va;
  78   /* the instructions to be decoded */
  79   unsigned char* buffer;
  80   uintptr_t length;
  81   event_callback_t  event_callback;  void* event_stream;
  82   printf_callback_t printf_callback; void* printf_stream;
  83   bool losing;
  84   bool do_newline;
  85 
  86   /* the architecture being disassembled */
  87   const char* arch_name;
  88   const bfd_arch_info_type* arch_info;
  89 
  90   /* the disassembler we are going to use: */
  91   disassembler_ftype      dfn;
  92   struct disassemble_info dinfo; /* the actual struct! */
  93 
  94   char mach_option[64];
  95   char insn_options[256];
  96 };
  97 
  98 static void* decode(struct hsdis_app_data* app_data, const char* options);
  99 
 100 #define DECL_APP_DATA(dinfo) \
 101   struct hsdis_app_data* app_data = (struct hsdis_app_data*) (dinfo)->application_data
 102 
 103 #define DECL_EVENT_CALLBACK(app_data) \
 104   event_callback_t  event_callback = (app_data)->event_callback; \
 105   void*             event_stream   = (app_data)->event_stream
 106 
 107 #define DECL_PRINTF_CALLBACK(app_data) \
 108   printf_callback_t  printf_callback = (app_data)->printf_callback; \
 109   void*              printf_stream   = (app_data)->printf_stream
 110 
 111 
 112 static void print_help(struct hsdis_app_data* app_data,
 113                        const char* msg, const char* arg);
 114 static void setup_app_data(struct hsdis_app_data* app_data,
 115                            const char* options);
 116 static const char* format_insn_close(const char* close,
 117                                      disassemble_info* dinfo,
 118                                      char* buf, size_t bufsize);
 119 
 120 void*
 121 #ifdef DLL_ENTRY
 122   DLL_ENTRY
 123 #endif
 124 decode_instructions_virtual(uintptr_t start_va, uintptr_t end_va,
 125                             unsigned char* buffer, uintptr_t length,
 126                             event_callback_t  event_callback_arg,  void* event_stream_arg,
 127                             printf_callback_t printf_callback_arg, void* printf_stream_arg,
 128                             const char* options, int newline) {
 129   struct hsdis_app_data app_data;
 130   memset(&app_data, 0, sizeof(app_data));
 131   app_data.start_va    = start_va;
 132   app_data.end_va      = end_va;
 133   app_data.buffer = buffer;
 134   app_data.length = length;
 135   app_data.event_callback  = event_callback_arg;
 136   app_data.event_stream    = event_stream_arg;
 137   app_data.printf_callback = printf_callback_arg;
 138   app_data.printf_stream   = printf_stream_arg;
 139   app_data.do_newline = newline == 0 ? false : true;
 140 
 141   return decode(&app_data, options);
 142 }
 143 
 144 /* This is the compatability interface for older version of hotspot */
 145 void*
 146 #ifdef DLL_ENTRY
 147   DLL_ENTRY
 148 #endif
 149 decode_instructions(void* start_pv, void* end_pv,
 150                     event_callback_t  event_callback_arg,  void* event_stream_arg,
 151                     printf_callback_t printf_callback_arg, void* printf_stream_arg,
 152                     const char* options) {
 153   decode_instructions_virtual((uintptr_t)start_pv,
 154                              (uintptr_t)end_pv,
 155                              (unsigned char*)start_pv,
 156                              (uintptr_t)end_pv - (uintptr_t)start_pv,
 157                              event_callback_arg,
 158                              event_stream_arg,
 159                              printf_callback_arg,
 160                              printf_stream_arg,
 161                              options, false);
 162 }
 163 
 164 static void* decode(struct hsdis_app_data* app_data, const char* options) {
 165   setup_app_data(app_data, options);
 166   char buf[128];
 167 
 168   {
 169     /* now reload everything from app_data: */
 170     DECL_EVENT_CALLBACK(app_data);
 171     DECL_PRINTF_CALLBACK(app_data);
 172     uintptr_t start = app_data->start_va;
 173     uintptr_t end   = app_data->end_va;
 174     uintptr_t p     = start;
 175 
 176     (*event_callback)(event_stream, "insns", (void*)start);
 177 
 178     (*event_callback)(event_stream, "mach name='%s'",
 179                       (void*) app_data->arch_info->printable_name);
 180     if (app_data->dinfo.bytes_per_line != 0) {
 181       (*event_callback)(event_stream, "format bytes-per-line='%p'/",
 182                         (void*)(intptr_t) app_data->dinfo.bytes_per_line);
 183     }
 184 
 185     while (p < end && !app_data->losing) {
 186       (*event_callback)(event_stream, "insn", (void*) p);
 187 
 188       /* reset certain state, so we can read it with confidence */
 189       app_data->dinfo.insn_info_valid    = 0;
 190       app_data->dinfo.branch_delay_insns = 0;
 191       app_data->dinfo.data_size          = 0;
 192       app_data->dinfo.insn_type          = 0;
 193 
 194       int size = (*app_data->dfn)((bfd_vma) p, &app_data->dinfo);
 195 
 196       if (size > 0)  p += size;
 197       else           app_data->losing = true;
 198 
 199       if (!app_data->losing) {
 200         const char* insn_close = format_insn_close("/insn", &app_data->dinfo,
 201                                                    buf, sizeof(buf));
 202         (*event_callback)(event_stream, insn_close, (void*) p);
 203 
 204         if (app_data->do_newline) {
 205           /* follow each complete insn by a nice newline */
 206           (*printf_callback)(printf_stream, "\n");
 207         }
 208       }
 209     }
 210 
 211     if (app_data->losing) (*event_callback)(event_stream, "/insns", (void*) p);
 212     return (void*) p;
 213   }
 214 }
 215 
 216 /* take the address of the function, for luck, and also test the typedef: */
 217 const decode_func_vtype decode_func_virtual_address = &decode_instructions_virtual;
 218 const decode_func_stype decode_func_address = &decode_instructions;
 219 
 220 static const char* format_insn_close(const char* close,
 221                                      disassemble_info* dinfo,
 222                                      char* buf, size_t bufsize) {
 223   if (!dinfo->insn_info_valid)
 224     return close;
 225   enum dis_insn_type itype = dinfo->insn_type;
 226   int dsize = dinfo->data_size, delays = dinfo->branch_delay_insns;
 227   if ((itype == dis_nonbranch && (dsize | delays) == 0)
 228       || (strlen(close) + 3*20 > bufsize))
 229     return close;
 230 
 231   const char* type = "unknown";
 232   switch (itype) {
 233   case dis_nonbranch:   type = NULL;         break;
 234   case dis_branch:      type = "branch";     break;
 235   case dis_condbranch:  type = "condbranch"; break;
 236   case dis_jsr:         type = "jsr";        break;
 237   case dis_condjsr:     type = "condjsr";    break;
 238   case dis_dref:        type = "dref";       break;
 239   case dis_dref2:       type = "dref2";      break;
 240   }
 241 
 242   strcpy(buf, close);
 243   char* p = buf;
 244   if (type)    sprintf(p += strlen(p), " type='%s'", type);
 245   if (dsize)   sprintf(p += strlen(p), " dsize='%d'", dsize);
 246   if (delays)  sprintf(p += strlen(p), " delay='%d'", delays);
 247   return buf;
 248 }
 249 
 250 /* handler functions */
 251 
 252 static int
 253 hsdis_read_memory_func(bfd_vma memaddr,
 254                        bfd_byte* myaddr,
 255                        unsigned int length,
 256                        struct disassemble_info* dinfo) {
 257   DECL_APP_DATA(dinfo);
 258   /* convert the virtual address memaddr into an address within memory buffer */
 259   uintptr_t offset = ((uintptr_t) memaddr) - app_data->start_va;
 260   if (offset + length > app_data->length) {
 261     /* read is out of bounds */
 262     return EIO;
 263   } else {
 264     memcpy(myaddr, (bfd_byte*) (app_data->buffer + offset), length);
 265     return 0;
 266   }
 267 }
 268 
 269 static void
 270 hsdis_print_address_func(bfd_vma vma, struct disassemble_info* dinfo) {
 271   /* the actual value to print: */
 272   void* addr_value = (void*) (uintptr_t) vma;
 273   DECL_APP_DATA(dinfo);
 274   DECL_EVENT_CALLBACK(app_data);
 275 
 276   /* issue the event: */
 277   void* result =
 278     (*event_callback)(event_stream, "addr/", addr_value);
 279   if (result == NULL) {
 280     /* event declined */
 281     generic_print_address(vma, dinfo);
 282   }
 283 }
 284 
 285 
 286 /* configuration */
 287 
 288 static void set_optional_callbacks(struct hsdis_app_data* app_data);
 289 static void parse_caller_options(struct hsdis_app_data* app_data,
 290                                  const char* caller_options);
 291 static const char* native_arch_name();
 292 static enum bfd_endian native_endian();
 293 static const bfd_arch_info_type* find_arch_info(const char* arch_nane);
 294 static bfd* get_native_bfd(const bfd_arch_info_type* arch_info,
 295                            /* to avoid malloc: */
 296                            bfd* empty_bfd, bfd_target* empty_xvec);
 297 static void init_disassemble_info_from_bfd(struct disassemble_info* dinfo,
 298                                            void *stream,
 299                                            fprintf_ftype fprintf_func,
 300                                            bfd* bfd,
 301                                            char* disassembler_options);
 302 static void parse_fake_insn(disassembler_ftype dfn,
 303                             struct disassemble_info* dinfo);
 304 
 305 static void setup_app_data(struct hsdis_app_data* app_data,
 306                            const char* caller_options) {
 307   /* Make reasonable defaults for null callbacks.
 308      A non-null stream for a null callback is assumed to be a FILE* for output.
 309      Events are rendered as XML.
 310   */
 311   set_optional_callbacks(app_data);
 312 
 313   /* Look into caller_options for anything interesting. */
 314   if (caller_options != NULL)
 315     parse_caller_options(app_data, caller_options);
 316 
 317   /* Discover which architecture we are going to disassemble. */
 318   app_data->arch_name = &app_data->mach_option[0];
 319   if (app_data->arch_name[0] == '\0')
 320     app_data->arch_name = native_arch_name();
 321   app_data->arch_info = find_arch_info(app_data->arch_name);
 322 
 323   /* Make a fake bfd to hold the arch. and byteorder info. */
 324   struct {
 325     bfd_target empty_xvec;
 326     bfd        empty_bfd;
 327   } buf;
 328   bfd* native_bfd = get_native_bfd(app_data->arch_info,
 329                                    /* to avoid malloc: */
 330                                    &buf.empty_bfd, &buf.empty_xvec);
 331   init_disassemble_info_from_bfd(&app_data->dinfo,
 332                                  app_data->printf_stream,
 333                                  app_data->printf_callback,
 334                                  native_bfd,
 335                                  /* On PowerPC we get warnings, if we pass empty options */
 336                                  (caller_options == NULL) ? NULL : app_data->insn_options);
 337 
 338   /* Finish linking together the various callback blocks. */
 339   app_data->dinfo.application_data = (void*) app_data;
 340   app_data->dfn = disassembler(bfd_get_arch(native_bfd),
 341                                bfd_big_endian(native_bfd),
 342                                bfd_get_mach(native_bfd),
 343                                native_bfd);
 344   app_data->dinfo.print_address_func = hsdis_print_address_func;
 345   app_data->dinfo.read_memory_func = hsdis_read_memory_func;
 346 
 347   if (app_data->dfn == NULL) {
 348     const char* bad = app_data->arch_name;
 349     static bool complained;
 350     if (bad == &app_data->mach_option[0])
 351       print_help(app_data, "bad mach=%s", bad);
 352     else if (!complained)
 353       print_help(app_data, "bad native mach=%s; please port hsdis to this platform", bad);
 354     complained = true;
 355     /* must bail out */
 356     app_data->losing = true;
 357     return;
 358   }
 359 
 360   parse_fake_insn(app_data->dfn, &app_data->dinfo);
 361 }
 362 
 363 
 364 /* ignore all events, return a null */
 365 static void* null_event_callback(void* ignore_stream, const char* ignore_event, void* arg) {
 366   return NULL;
 367 }
 368 
 369 /* print all events as XML markup */
 370 static void* xml_event_callback(void* stream, const char* event, void* arg) {
 371   FILE* fp = (FILE*) stream;
 372 #define NS_PFX "dis:"
 373   if (event[0] != '/') {
 374     /* issue the tag, with or without a formatted argument */
 375     fprintf(fp, "<"NS_PFX);
 376     fprintf(fp, event, arg);
 377     fprintf(fp, ">");
 378   } else {
 379     ++event;                    /* skip slash */
 380     const char* argp = strchr(event, ' ');
 381     if (argp == NULL) {
 382       /* no arguments; just issue the closing tag */
 383       fprintf(fp, "</"NS_PFX"%s>", event);
 384     } else {
 385       /* split out the closing attributes as <dis:foo_done attr='val'/> */
 386       int event_prefix = (argp - event);
 387       fprintf(fp, "<"NS_PFX"%.*s_done", event_prefix, event);
 388       fprintf(fp, argp, arg);
 389       fprintf(fp, "/></"NS_PFX"%.*s>", event_prefix, event);
 390     }
 391   }
 392   return NULL;
 393 }
 394 
 395 static void set_optional_callbacks(struct hsdis_app_data* app_data) {
 396   if (app_data->printf_callback == NULL) {
 397     int (*fprintf_callback)(FILE*, const char*, ...) = &fprintf;
 398     FILE* fprintf_stream = stdout;
 399     app_data->printf_callback = (printf_callback_t) fprintf_callback;
 400     if (app_data->printf_stream == NULL)
 401       app_data->printf_stream   = (void*)           fprintf_stream;
 402   }
 403   if (app_data->event_callback == NULL) {
 404     if (app_data->event_stream == NULL)
 405       app_data->event_callback = &null_event_callback;
 406     else
 407       app_data->event_callback = &xml_event_callback;
 408   }
 409 
 410 }
 411 
 412 static void parse_caller_options(struct hsdis_app_data* app_data, const char* caller_options) {
 413   char* iop_base = app_data->insn_options;
 414   char* iop_limit = iop_base + sizeof(app_data->insn_options) - 1;
 415   char* iop = iop_base;
 416   const char* p;
 417   for (p = caller_options; p != NULL; ) {
 418     const char* q = strchr(p, ',');
 419     size_t plen = (q == NULL) ? strlen(p) : ((q++) - p);
 420     if (plen == 4 && strncmp(p, "help", plen) == 0) {
 421       print_help(app_data, NULL, NULL);
 422     } else if (plen >= 5 && strncmp(p, "mach=", 5) == 0) {
 423       char*  mach_option = app_data->mach_option;
 424       size_t mach_size   = sizeof(app_data->mach_option);
 425       mach_size -= 1;           /*leave room for the null*/
 426       if (plen > mach_size)  plen = mach_size;
 427       strncpy(mach_option, p, plen);
 428       mach_option[plen] = '\0';
 429     } else if (plen > 6 && strncmp(p, "hsdis-", 6) == 0) {
 430       // do not pass these to the next level
 431     } else {
 432       /* just copy it; {i386,sparc}-dis.c might like to see it  */
 433       if (iop > iop_base && iop < iop_limit)  (*iop++) = ',';
 434       if (iop + plen > iop_limit)
 435         plen = iop_limit - iop;
 436       strncpy(iop, p, plen);
 437       iop += plen;
 438     }
 439     p = q;
 440   }
 441   *iop = '\0';
 442 }
 443 
 444 static void print_help(struct hsdis_app_data* app_data,
 445                        const char* msg, const char* arg) {
 446   DECL_PRINTF_CALLBACK(app_data);
 447   if (msg != NULL) {
 448     (*printf_callback)(printf_stream, "hsdis: ");
 449     (*printf_callback)(printf_stream, msg, arg);
 450     (*printf_callback)(printf_stream, "\n");
 451   }
 452   (*printf_callback)(printf_stream, "hsdis output options:\n");
 453   if (printf_callback == (printf_callback_t) &fprintf)
 454     disassembler_usage((FILE*) printf_stream);
 455   else
 456     disassembler_usage(stderr); /* better than nothing */
 457   (*printf_callback)(printf_stream, "  mach=<arch>   select disassembly mode\n");
 458 #if defined(LIBARCH_i386) || defined(LIBARCH_amd64)
 459   (*printf_callback)(printf_stream, "  mach=i386     select 32-bit mode\n");
 460   (*printf_callback)(printf_stream, "  mach=x86-64   select 64-bit mode\n");
 461   (*printf_callback)(printf_stream, "  suffix        always print instruction suffix\n");
 462 #endif
 463   (*printf_callback)(printf_stream, "  help          print this message\n");
 464 }
 465 
 466 
 467 /* low-level bfd and arch stuff that binutils doesn't do for us */
 468 
 469 static const bfd_arch_info_type* find_arch_info(const char* arch_name) {
 470   const bfd_arch_info_type* arch_info = bfd_scan_arch(arch_name);
 471   if (arch_info == NULL) {
 472     extern const bfd_arch_info_type bfd_default_arch_struct;
 473     arch_info = &bfd_default_arch_struct;
 474   }
 475   return arch_info;
 476 }
 477 
 478 static const char* native_arch_name() {
 479   const char* res = NULL;
 480 #ifdef LIBARCH_i386
 481   res = "i386";
 482 #endif
 483 #ifdef LIBARCH_amd64
 484   res = "i386:x86-64";
 485 #endif
 486 #ifdef LIBARCH_sparc
 487   res = "sparc:v8plusb";
 488 #endif
 489 #ifdef LIBARCH_sparcv9
 490   res = "sparc:v9b";
 491 #endif
 492 #if defined(LIBARCH_ppc64) || defined(LIBARCH_ppc64le)
 493   res = "powerpc:common64";
 494 #endif
 495   if (res == NULL)
 496     res = "architecture not set in Makefile!";
 497   return res;
 498 }
 499 
 500 static enum bfd_endian native_endian() {
 501   int32_t endian_test = 'x';
 502   if (*(const char*) &endian_test == 'x')
 503     return BFD_ENDIAN_LITTLE;
 504   else
 505     return BFD_ENDIAN_BIG;
 506 }
 507 
 508 static bfd* get_native_bfd(const bfd_arch_info_type* arch_info,
 509                            bfd* empty_bfd, bfd_target* empty_xvec) {
 510   memset(empty_bfd,  0, sizeof(*empty_bfd));
 511   memset(empty_xvec, 0, sizeof(*empty_xvec));
 512   empty_xvec->flavour = bfd_target_unknown_flavour;
 513   empty_xvec->byteorder = native_endian();
 514   empty_bfd->xvec = empty_xvec;
 515   empty_bfd->arch_info = arch_info;
 516   return empty_bfd;
 517 }
 518 
 519 static int read_zero_data_only(bfd_vma ignore_p,
 520                                bfd_byte* myaddr, unsigned int length,
 521                                struct disassemble_info *ignore_info) {
 522   memset(myaddr, 0, length);
 523   return 0;
 524 }
 525 static int print_to_dev_null(void* ignore_stream, const char* ignore_format, ...) {
 526   return 0;
 527 }
 528 
 529 /* Prime the pump by running the selected disassembler on a null input.
 530    This forces the machine-specific disassembler to divulge invariant
 531    information like bytes_per_line.
 532  */
 533 static void parse_fake_insn(disassembler_ftype dfn,
 534                             struct disassemble_info* dinfo) {
 535   typedef int (*read_memory_ftype)
 536     (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
 537      struct disassemble_info *info);
 538   read_memory_ftype read_memory_func = dinfo->read_memory_func;
 539   fprintf_ftype     fprintf_func     = dinfo->fprintf_func;
 540 
 541   dinfo->read_memory_func = &read_zero_data_only;
 542   dinfo->fprintf_func     = &print_to_dev_null;
 543   (*dfn)(0, dinfo);
 544 
 545   /* put it back */
 546   dinfo->read_memory_func = read_memory_func;
 547   dinfo->fprintf_func     = fprintf_func;
 548 }
 549 
 550 static void init_disassemble_info_from_bfd(struct disassemble_info* dinfo,
 551                                            void *stream,
 552                                            fprintf_ftype fprintf_func,
 553                                            bfd* abfd,
 554                                            char* disassembler_options) {
 555   init_disassemble_info(dinfo, stream, fprintf_func);
 556 
 557   dinfo->flavour = bfd_get_flavour(abfd);
 558   dinfo->arch = bfd_get_arch(abfd);
 559   dinfo->mach = bfd_get_mach(abfd);
 560   dinfo->disassembler_options = disassembler_options;
 561   dinfo->octets_per_byte = bfd_octets_per_byte (abfd);
 562   dinfo->skip_zeroes = sizeof(void*) * 2;
 563   dinfo->skip_zeroes_at_end = sizeof(void*)-1;
 564   dinfo->disassembler_needs_relocs = FALSE;
 565 
 566   if (bfd_big_endian(abfd))
 567     dinfo->display_endian = dinfo->endian = BFD_ENDIAN_BIG;
 568   else if (bfd_little_endian(abfd))
 569     dinfo->display_endian = dinfo->endian = BFD_ENDIAN_LITTLE;
 570   else
 571     dinfo->endian = native_endian();
 572 
 573   disassemble_init_for_target(dinfo);
 574 }