1 /*
   2  * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2020, NTT DATA.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include <cstring>
  27 
  28 #include "dwarf.hpp"
  29 #include "libproc_impl.h"
  30 
  31 /* from read_leb128() in dwarf.c in binutils */
  32 uintptr_t DwarfParser::read_leb(bool sign) {
  33   uintptr_t result = 0L;
  34   unsigned char b;
  35   unsigned int shift = 0;
  36 
  37   while (true) {
  38     b = *_buf++;
  39     result |= static_cast<uintptr_t>(b & 0x7f) << shift;
  40     shift += 7;
  41     if ((b & 0x80) == 0) {
  42       break;
  43     }
  44   }
  45 
  46   if (sign && (shift < (8 * sizeof(result))) && (b & 0x40)) {
  47     result |= static_cast<uintptr_t>(-1L) << shift;
  48   }
  49 
  50   return result;
  51 }
  52 
  53 uint64_t DwarfParser::get_entry_length() {
  54   uint64_t length = *(reinterpret_cast<uint32_t *>(_buf));
  55   _buf += 4;
  56   if (length == 0xffffffff) {
  57     length = *(reinterpret_cast<uint64_t *>(_buf));
  58     _buf += 8;
  59   }
  60   return length;
  61 }
  62 
  63 bool DwarfParser::process_cie(unsigned char *start_of_entry, uint32_t id) {
  64   unsigned char *orig_pos = _buf;
  65   _buf = start_of_entry - id;
  66 
  67   uint64_t length = get_entry_length();
  68   if (length == 0L) {
  69     return false;
  70   }
  71   unsigned char *end = _buf + length;
  72 
  73   _buf += 4; // Skip ID (This value of CIE would be always 0)
  74   _buf++;    // Skip version (assume to be "1")
  75 
  76   char *augmentation_string = reinterpret_cast<char *>(_buf);
  77   bool has_ehdata = (strcmp("eh", augmentation_string) == 0);
  78   _buf += strlen(augmentation_string) + 1; // includes '\0'
  79   if (has_ehdata) {
  80     _buf += sizeof(void *); // Skip EH data
  81   }
  82 
  83   _code_factor = read_leb(false);
  84   _data_factor = static_cast<int>(read_leb(true));
  85   _return_address_reg = static_cast<enum DWARF_Register>(*_buf++);
  86 
  87   if (strpbrk(augmentation_string, "LP") != NULL) {
  88     // Language personality routine (P) and LSDA (L) are not supported
  89     // because we need compliant Unwind Library Interface,
  90     // but we want to unwind without it.
  91     //
  92     //   Unwind Library Interface (SysV ABI AMD64 6.2)
  93     //     https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf
  94     return false;
  95   } else if (strchr(augmentation_string, 'R') != NULL) {
  96     read_leb(false); // augmentation length
  97     _encoding = *_buf++;
  98   }
  99 
 100   // Clear state
 101   _current_pc = 0L;
 102   _cfa_reg = RSP;
 103   _return_address_reg = RA;
 104   _cfa_offset = 0;
 105   _ra_cfa_offset = 0;
 106   _bp_cfa_offset = 0;
 107   _bp_offset_available = false;
 108 
 109   parse_dwarf_instructions(0L, static_cast<uintptr_t>(-1L), end);
 110 
 111   _buf = orig_pos;
 112   return true;
 113 }
 114 
 115 void DwarfParser::parse_dwarf_instructions(uintptr_t begin, uintptr_t pc, const unsigned char *end) {
 116   uintptr_t operand1;
 117   _current_pc = begin;
 118 
 119   /* for remember state */
 120   enum DWARF_Register rem_cfa_reg = MAX_VALUE;
 121   int rem_cfa_offset = 0;
 122   int rem_ra_cfa_offset = 0;
 123   int rem_bp_cfa_offset = 0;
 124 
 125   while ((_buf < end) && (_current_pc < pc)) {
 126     unsigned char op = *_buf++;
 127     unsigned char opa = op & 0x3f;
 128     if (op & 0xc0) {
 129       op &= 0xc0;
 130     }
 131 
 132     switch (op) {
 133       case 0x0:  // DW_CFA_nop
 134         return;
 135       case 0x01: // DW_CFA_set_loc
 136         operand1 = get_decoded_value();
 137         if (_current_pc != 0L) {
 138           _current_pc = operand1;
 139         }
 140         break;
 141       case 0x0c: // DW_CFA_def_cfa
 142         _cfa_reg = static_cast<enum DWARF_Register>(read_leb(false));
 143         _cfa_offset = read_leb(false);
 144         break;
 145       case 0x80: {// DW_CFA_offset
 146         operand1 = read_leb(false);
 147         enum DWARF_Register reg = static_cast<enum DWARF_Register>(opa);
 148         if (reg == RBP) {
 149           _bp_cfa_offset = operand1 * _data_factor;
 150           _bp_offset_available = true;
 151         } else if (reg == RA) {
 152           _ra_cfa_offset = operand1 * _data_factor;
 153         }
 154         break;
 155       }
 156       case 0xe:  // DW_CFA_def_cfa_offset
 157         _cfa_offset = read_leb(false);
 158         break;
 159       case 0x40: // DW_CFA_advance_loc
 160         if (_current_pc != 0L) {
 161           _current_pc += opa * _code_factor;
 162         }
 163         break;
 164       case 0x02: { // DW_CFA_advance_loc1
 165         unsigned char ofs = *_buf++;
 166         if (_current_pc != 0L) {
 167           _current_pc += ofs * _code_factor;
 168         }
 169         break;
 170       }
 171       case 0x03: { // DW_CFA_advance_loc2
 172         unsigned short ofs = *(reinterpret_cast<unsigned short *>(_buf));
 173         _buf += 2;
 174         if (_current_pc != 0L) {
 175           _current_pc += ofs * _code_factor;
 176         }
 177         break;
 178       }
 179       case 0x04: { // DW_CFA_advance_loc4
 180         unsigned int ofs = *(reinterpret_cast<unsigned int *>(_buf));
 181         _buf += 4;
 182         if (_current_pc != 0L) {
 183           _current_pc += ofs * _code_factor;
 184         }
 185         break;
 186       }
 187       case 0x0d: {// DW_CFA_def_cfa_register
 188         _cfa_reg = static_cast<enum DWARF_Register>(read_leb(false));
 189         break;
 190       }
 191       case 0x0a: // DW_CFA_remember_state
 192         rem_cfa_reg = _cfa_reg;
 193         rem_cfa_offset = _cfa_offset;
 194         rem_ra_cfa_offset = _ra_cfa_offset;
 195         rem_bp_cfa_offset = _bp_cfa_offset;
 196         break;
 197       case 0x0b: // DW_CFA_restore_state
 198         _cfa_reg = rem_cfa_reg;
 199         _cfa_offset = rem_cfa_offset;
 200         _ra_cfa_offset = rem_ra_cfa_offset;
 201         _bp_cfa_offset = rem_bp_cfa_offset;
 202         break;
 203       default:
 204         print_debug("DWARF: Unknown opcode: 0x%x\n", op);
 205         return;
 206     }
 207   }
 208 }
 209 
 210 /* from dwarf.c in binutils */
 211 uint32_t DwarfParser::get_decoded_value() {
 212   int size;
 213   uintptr_t result;
 214 
 215   switch (_encoding & 0x7) {
 216     case 0:  // DW_EH_PE_absptr
 217       size = sizeof(void *);
 218       result = *(reinterpret_cast<uintptr_t *>(_buf));
 219       break;
 220     case 2:  // DW_EH_PE_udata2
 221       size = 2;
 222       result = *(reinterpret_cast<unsigned int *>(_buf));
 223       break;
 224     case 3:  // DW_EH_PE_udata4
 225       size = 4;
 226       result = *(reinterpret_cast<uint32_t *>(_buf));
 227       break;
 228     case 4:  // DW_EH_PE_udata8
 229       size = 8;
 230       result = *(reinterpret_cast<uint64_t *>(_buf));
 231       break;
 232     default:
 233       return 0;
 234   }
 235 
 236   // On x86-64, we have to handle it as 32 bit value, and it is PC relative.
 237   //   https://gcc.gnu.org/ml/gcc-help/2010-09/msg00166.html
 238 #if defined(_LP64)
 239   if (size == 8) {
 240     result += _lib->eh_frame.v_addr + static_cast<uintptr_t>(_buf - _lib->eh_frame.data);
 241     size = 4;
 242   } else
 243 #endif
 244   if ((_encoding & 0x70) == 0x10) { // 0x10 = DW_EH_PE_pcrel
 245     result += _lib->eh_frame.v_addr + static_cast<uintptr_t>(_buf - _lib->eh_frame.data);
 246   } else  if (size == 2) {
 247     result = static_cast<int>(result) + _lib->eh_frame.v_addr + static_cast<uintptr_t>(_buf - _lib->eh_frame.data);
 248     size = 4;
 249   }
 250 
 251   _buf += size;
 252   return static_cast<uint32_t>(result);
 253 }
 254 
 255 unsigned int DwarfParser::get_pc_range() {
 256   int size;
 257   uintptr_t result;
 258 
 259   switch (_encoding & 0x7) {
 260     case 0:  // DW_EH_PE_absptr
 261       size = sizeof(void *);
 262       result = *(reinterpret_cast<uintptr_t *>(_buf));
 263       break;
 264     case 2:  // DW_EH_PE_udata2
 265       size = 2;
 266       result = *(reinterpret_cast<unsigned int *>(_buf));
 267       break;
 268     case 3:  // DW_EH_PE_udata4
 269       size = 4;
 270       result = *(reinterpret_cast<uint32_t *>(_buf));
 271       break;
 272     case 4:  // DW_EH_PE_udata8
 273       size = 8;
 274       result = *(reinterpret_cast<uint64_t *>(_buf));
 275       break;
 276     default:
 277       return 0;
 278   }
 279 
 280   // On x86-64, we have to handle it as 32 bit value, and it is PC relative.
 281   //   https://gcc.gnu.org/ml/gcc-help/2010-09/msg00166.html
 282 #if defined(_LP64)
 283   if ((size == 8) || (size == 2)) {
 284     size = 4;
 285   }
 286 #endif
 287 
 288   _buf += size;
 289   return static_cast<unsigned int>(result);
 290 }
 291 
 292 bool DwarfParser::process_dwarf(const uintptr_t pc) {
 293   // https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html
 294   _buf = _lib->eh_frame.data;
 295   unsigned char *end = _lib->eh_frame.data + _lib->eh_frame.size;
 296   while (_buf <= end) {
 297     uint64_t length = get_entry_length();
 298     if (length == 0L) {
 299       return false;
 300     }
 301     unsigned char *next_entry = _buf + length;
 302     unsigned char *start_of_entry = _buf;
 303     uint32_t id = *(reinterpret_cast<uint32_t *>(_buf));
 304     _buf += 4;
 305     if (id != 0) { // FDE
 306       uintptr_t pc_begin = get_decoded_value() + _lib->eh_frame.library_base_addr;
 307       uintptr_t pc_end = pc_begin + get_pc_range();
 308 
 309       if ((pc >= pc_begin) && (pc < pc_end)) {
 310         // Process CIE
 311         if (!process_cie(start_of_entry, id)) {
 312           return false;
 313         }
 314 
 315         // Skip Augumenation
 316         uintptr_t augmentation_length = read_leb(false);
 317         _buf += augmentation_length; // skip
 318 
 319         // Process FDE
 320         parse_dwarf_instructions(pc_begin, pc, next_entry);
 321         return true;
 322       }
 323     }
 324 
 325     _buf = next_entry;
 326   }
 327 
 328   return false;
 329 }