blob: 98ab048e93f61b816d1729f613d131c0cfb18da1 [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- Reading of syms & debug info from Mach-O files. ---*/
/*--- readmacho.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2005-2015 Apple Inc.
Greg Parker gparker@apple.com
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
#if defined(VGO_darwin)
#include "pub_core_basics.h"
#include "pub_core_vki.h"
#include "pub_core_libcbase.h"
#include "pub_core_libcprint.h"
#include "pub_core_libcassert.h"
#include "pub_core_libcfile.h"
#include "pub_core_libcproc.h"
#include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
#include "pub_core_machine.h" /* VG_ELF_CLASS */
#include "pub_core_options.h"
#include "pub_core_oset.h"
#include "pub_core_tooliface.h" /* VG_(needs) */
#include "pub_core_xarray.h"
#include "pub_core_clientstate.h"
#include "pub_core_debuginfo.h"
#include "priv_misc.h"
#include "priv_image.h"
#include "priv_d3basics.h"
#include "priv_tytypes.h"
#include "priv_storage.h"
#include "priv_readmacho.h"
#include "priv_readdwarf.h"
#include "priv_readdwarf3.h"
/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <mach-o/fat.h>
/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
#if VG_WORDSIZE == 4
# define MAGIC MH_MAGIC
# define MACH_HEADER mach_header
# define LC_SEGMENT_CMD LC_SEGMENT
# define SEGMENT_COMMAND segment_command
# define SECTION section
# define NLIST nlist
#else
# define MAGIC MH_MAGIC_64
# define MACH_HEADER mach_header_64
# define LC_SEGMENT_CMD LC_SEGMENT_64
# define SEGMENT_COMMAND segment_command_64
# define SECTION section_64
# define NLIST nlist_64
#endif
/*------------------------------------------------------------*/
/*--- ---*/
/*--- Mach-O file mapping/unmapping helpers ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/* A DiSlice is used to handle the thin/fat distinction for MachO images.
(1) the entire mapped-in ("primary") image, fat headers, kitchen sink,
whatnot: the entire file. This is the DiImage* that is the backing
for the DiSlice.
(2) the Mach-O object of interest, which is presumably somewhere inside
the primary image. map_image_aboard() below, which generates this
info, will carefully check that the macho_ fields denote a section of
memory that falls entirely inside the primary image.
*/
Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
{
/* (JRS: the Mach-O headers might not be in this mapped data,
because we only mapped a page for this initial check,
or at least not very much, and what's at the start of the file
is in general a so-called fat header. The Mach-O object we're
interested in could be arbitrarily far along the image, and so
we can't assume its header will fall within this page.) */
/* But we can say that either it's a fat object, in which case it
begins with a fat header, or it's unadorned Mach-O, in which
case it starts with a normal header. At least do what checks we
can to establish whether or not we're looking at something
sane. */
const struct fat_header* fh_be = buf;
const struct MACH_HEADER* mh = buf;
vg_assert(buf);
if (szB < sizeof(struct fat_header))
return False;
if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
return True;
if (szB < sizeof(struct MACH_HEADER))
return False;
if (mh->magic == MAGIC)
return True;
return False;
}
/* Unmap an image mapped in by map_image_aboard. */
static void unmap_image ( /*MOD*/DiSlice* sli )
{
vg_assert(sli);
if (ML_(sli_is_valid)(*sli)) {
ML_(img_done)(sli->img);
*sli = DiSlice_INVALID;
}
}
/* Open the given file, find the thin part if necessary, do some
checks, and return a DiSlice containing details of both the thin
part and (implicitly, via the contained DiImage*) the fat part.
returns DiSlice_INVALID if it fails. If it succeeds, the returned
slice is guaranteed to refer to a valid(ish) Mach-O image. */
static DiSlice map_image_aboard ( DebugInfo* di, /* only for err msgs */
const HChar* filename )
{
DiSlice sli = DiSlice_INVALID;
/* First off, try to map the thing in. */
DiImage* mimg = ML_(img_from_local_file)(filename);
if (mimg == NULL) {
VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n",
filename );
VG_(message)(Vg_UserMsg, " no symbols or debug info loaded\n" );
return DiSlice_INVALID;
}
/* Now we have a viable DiImage* for it. Look for the embedded
Mach-O object. If not findable, close the image and fail. */
DiOffT fh_be_ioff = 0;
struct fat_header fh_be;
struct fat_header fh;
// Assume initially that we have a thin image, and narrow
// the bounds if it turns out to be fat. This stores |mimg| as
// |sli.img|, so NULL out |mimg| after this point, for the sake of
// clarity.
sli = ML_(sli_from_img)(mimg);
mimg = NULL;
// Check for fat header.
if (ML_(img_size)(sli.img) < sizeof(struct fat_header)) {
ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
goto close_and_fail;
}
// Fat header is always BIG-ENDIAN
ML_(img_get)(&fh_be, sli.img, fh_be_ioff, sizeof(fh_be));
VG_(memset)(&fh, 0, sizeof(fh));
fh.magic = VG_(ntohl)(fh_be.magic);
fh.nfat_arch = VG_(ntohl)(fh_be.nfat_arch);
if (fh.magic == FAT_MAGIC) {
// Look for a good architecture.
if (ML_(img_size)(sli.img) < sizeof(struct fat_header)
+ fh.nfat_arch * sizeof(struct fat_arch)) {
ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
goto close_and_fail;
}
DiOffT arch_be_ioff;
Int f;
for (f = 0, arch_be_ioff = sizeof(struct fat_header);
f < fh.nfat_arch;
f++, arch_be_ioff += sizeof(struct fat_arch)) {
# if defined(VGA_ppc)
Int cputype = CPU_TYPE_POWERPC;
# elif defined(VGA_ppc64be)
Int cputype = CPU_TYPE_POWERPC64BE;
# elif defined(VGA_ppc64le)
Int cputype = CPU_TYPE_POWERPC64LE;
# elif defined(VGA_x86)
Int cputype = CPU_TYPE_X86;
# elif defined(VGA_amd64)
Int cputype = CPU_TYPE_X86_64;
# else
# error "unknown architecture"
# endif
struct fat_arch arch_be;
struct fat_arch arch;
ML_(img_get)(&arch_be, sli.img, arch_be_ioff, sizeof(arch_be));
VG_(memset)(&arch, 0, sizeof(arch));
arch.cputype = VG_(ntohl)(arch_be.cputype);
arch.cpusubtype = VG_(ntohl)(arch_be.cpusubtype);
arch.offset = VG_(ntohl)(arch_be.offset);
arch.size = VG_(ntohl)(arch_be.size);
if (arch.cputype == cputype) {
if (ML_(img_size)(sli.img) < arch.offset + arch.size) {
ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
goto close_and_fail;
}
/* Found a suitable arch. Narrow down the slice accordingly. */
sli.ioff = arch.offset;
sli.szB = arch.size;
break;
}
}
if (f == fh.nfat_arch) {
ML_(symerr)(di, True,
"No acceptable architecture found in fat file.");
goto close_and_fail;
}
}
/* Sanity check what we found. */
/* assured by logic above */
vg_assert(ML_(img_size)(sli.img) >= sizeof(struct fat_header));
if (sli.szB < sizeof(struct MACH_HEADER)) {
ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
goto close_and_fail;
}
if (sli.szB > ML_(img_size)(sli.img)) {
ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
goto close_and_fail;
}
if (sli.ioff >= 0 && sli.ioff + sli.szB <= ML_(img_size)(sli.img)) {
/* thin entirely within fat, as expected */
} else {
ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
goto close_and_fail;
}
/* Peer at the Mach header for the thin object, starting at the
beginning of the slice, to check it's at least marginally
sane. */
struct MACH_HEADER mh;
ML_(cur_read_get)(&mh, ML_(cur_from_sli)(sli), sizeof(mh));
if (mh.magic != MAGIC) {
ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
goto close_and_fail;
}
if (sli.szB < sizeof(struct MACH_HEADER) + mh.sizeofcmds) {
ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
goto close_and_fail;
}
/* "main image is plausible" */
vg_assert(sli.img);
vg_assert(ML_(img_size)(sli.img) > 0);
/* "thin image exists and is a sub-part (or all) of main image" */
vg_assert(sli.ioff >= 0);
vg_assert(sli.szB > 0);
vg_assert(sli.ioff + sli.szB <= ML_(img_size)(sli.img));
return sli; /* success */
/*NOTREACHED*/
close_and_fail:
unmap_image(&sli);
return DiSlice_INVALID; /* bah! */
}
/*------------------------------------------------------------*/
/*--- ---*/
/*--- Mach-O symbol table reading ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/* Read a symbol table (nlist). Add the resulting candidate symbols
to 'syms'; the caller will post-process them and hand them off to
ML_(addSym) itself. */
static
void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
struct _DebugInfo* di,
DiCursor symtab_cur, UInt symtab_count,
DiCursor strtab_cur, UInt strtab_sz )
{
Int i;
DiSym disym;
// "start_according_to_valgrind"
static const HChar* s_a_t_v = NULL; /* do not make non-static */
for (i = 0; i < symtab_count; i++) {
struct NLIST nl;
ML_(cur_read_get)(&nl,
ML_(cur_plus)(symtab_cur, i * sizeof(struct NLIST)),
sizeof(nl));
Addr sym_addr = 0;
if ((nl.n_type & N_TYPE) == N_SECT) {
sym_addr = di->text_bias + nl.n_value;
/*} else if ((nl.n_type & N_TYPE) == N_ABS) {
GrP fixme don't ignore absolute symbols?
sym_addr = nl.n_value; */
} else {
continue;
}
if (di->trace_symtab) {
HChar* str = ML_(cur_read_strdup)(
ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
"di.read_symtab.1");
VG_(printf)("nlist raw: avma %010lx %s\n", sym_addr, str );
ML_(dinfo_free)(str);
}
/* If no part of the symbol falls within the mapped range,
ignore it. */
if (sym_addr <= di->text_avma
|| sym_addr >= di->text_avma+di->text_size) {
continue;
}
/* skip names which point outside the string table;
following these risks segfaulting Valgrind */
if (nl.n_un.n_strx < 0 || nl.n_un.n_strx >= strtab_sz) {
continue;
}
HChar* name
= ML_(cur_read_strdup)( ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
"di.read_symtab.2");
/* skip nameless symbols; these appear to be common, but
useless */
if (*name == 0) {
ML_(dinfo_free)(name);
continue;
}
VG_(bzero_inline)(&disym, sizeof(disym));
disym.avmas.main = sym_addr;
SET_TOCPTR_AVMA(disym, 0);
SET_LOCAL_EP_AVMA(disym, 0);
disym.pri_name = ML_(addStr)(di, name, -1);
disym.sec_names = NULL;
disym.size = // let canonicalize fix it
di->text_avma+di->text_size - sym_addr;
disym.isText = True;
disym.isIFunc = False;
// Lots of user function names get prepended with an underscore. Eg. the
// function 'f' becomes the symbol '_f'. And the "below main"
// function is called "start". So we skip the leading underscore, and
// if we see 'start' and --show-below-main=no, we rename it as
// "start_according_to_valgrind", which makes it easy to spot later
// and display as "(below main)".
if (disym.pri_name[0] == '_') {
disym.pri_name++;
}
else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
if (s_a_t_v == NULL)
s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
vg_assert(s_a_t_v);
disym.pri_name = s_a_t_v;
}
vg_assert(disym.pri_name);
VG_(addToXA)( syms, &disym );
ML_(dinfo_free)(name);
}
}
/* Compare DiSyms by their start address, and for equal addresses, use
the primary name as a secondary sort key. */
static Int cmp_DiSym_by_start_then_name ( const void* v1, const void* v2 )
{
const DiSym* s1 = (const DiSym*)v1;
const DiSym* s2 = (const DiSym*)v2;
if (s1->avmas.main < s2->avmas.main) return -1;
if (s1->avmas.main > s2->avmas.main) return 1;
return VG_(strcmp)(s1->pri_name, s2->pri_name);
}
/* 'cand' is a bunch of candidate symbols obtained by reading
nlist-style symbol table entries. Their ends may overlap, so sort
them and truncate them accordingly. The code in this routine is
copied almost verbatim from read_symbol_table() in readxcoff.c. */
static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
Bool trace_symtab )
{
Word nsyms, i, j, k, m;
nsyms = VG_(sizeXA)(syms);
VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
VG_(sortXA)(syms);
/* We only know for sure the start addresses (actual VMAs) of
symbols, and an overestimation of their end addresses. So sort
by start address, then clip each symbol so that its end address
does not overlap with the next one along.
There is a small refinement: if a group of symbols have the same
address, treat them as a group: find the next symbol along that
has a higher start address, and clip all of the group
accordingly. This clips the group as a whole so as not to
overlap following symbols. This leaves prefersym() in
storage.c, which is not nlist-specific, to later decide which of
the symbols in the group to keep.
Another refinement is that we need to get rid of symbols which,
after clipping, have identical starts, ends, and names. So the
sorting uses the name as a secondary key.
*/
for (i = 0; i < nsyms; i++) {
for (k = i+1;
k < nsyms
&& ((DiSym*)VG_(indexXA)(syms,i))->avmas.main
== ((DiSym*)VG_(indexXA)(syms,k))->avmas.main;
k++)
;
/* So now [i .. k-1] is a group all with the same start address.
Clip their ending addresses so they don't overlap [k]. In
the normal case (no overlaps), k == i+1. */
if (k < nsyms) {
DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
for (m = i; m < k; m++) {
DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
vg_assert(here->avmas.main < next->avmas.main);
if (here->avmas.main + here->size > next->avmas.main)
here->size = next->avmas.main - here->avmas.main;
}
}
i = k-1;
vg_assert(i <= nsyms);
}
j = 0;
if (nsyms > 0) {
j = 1;
for (i = 1; i < nsyms; i++) {
DiSym *s_j1, *s_j, *s_i;
vg_assert(j <= i);
s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
s_j = (DiSym*)VG_(indexXA)(syms, j);
s_i = (DiSym*)VG_(indexXA)(syms, i);
if (s_i->avmas.main != s_j1->avmas.main
|| s_i->size != s_j1->size
|| 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
*s_j = *s_i;
j++;
} else {
if (trace_symtab)
VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n",
s_i->avmas.main, s_i->pri_name );
}
}
}
vg_assert(j >= 0 && j <= nsyms);
VG_(dropTailXA)(syms, nsyms - j);
}
/*------------------------------------------------------------*/
/*--- ---*/
/*--- Mach-O top-level processing ---*/
/*--- ---*/
/*------------------------------------------------------------*/
#if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
#define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
#endif
static Bool file_exists_p(const HChar *path)
{
struct vg_stat sbuf;
SysRes res = VG_(stat)(path, &sbuf);
return sr_isError(res) ? False : True;
}
/* Search for an existing dSYM file as a possible separate debug file.
Adapted from gdb. */
static HChar *
find_separate_debug_file (const HChar *executable_name)
{
const HChar *basename_str;
HChar *dot_ptr;
HChar *slash_ptr;
HChar *dsymfile;
/* Make sure the object file name itself doesn't contain ".dSYM" in it or we
will end up with an infinite loop where after we add a dSYM symbol file,
it will then enter this function asking if there is a debug file for the
dSYM file itself. */
if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
{
/* Check for the existence of a .dSYM file for a given executable. */
basename_str = VG_(basename) (executable_name);
dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
VG_(strlen) (executable_name)
+ VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
+ VG_(strlen) (basename_str)
+ 1
);
/* First try for the dSYM in the same directory as the original file. */
VG_(strcpy) (dsymfile, executable_name);
VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
VG_(strcat) (dsymfile, basename_str);
if (file_exists_p (dsymfile))
return dsymfile;
/* Now search for any parent directory that has a '.' in it so we can find
Mac OS X applications, bundles, plugins, and any other kinds of files.
Mac OS X application bundles wil have their program in
"/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
".bundle" or ".plugin" for other types of bundles). So we look for any
prior '.' character and try appending the apple dSYM extension and
subdirectory and see if we find an existing dSYM file (in the above
MyApp example the dSYM would be at either:
"/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
"/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
{
/* Find the directory delimiter that follows the '.' character since
we now look for a .dSYM that follows any bundle extension. */
slash_ptr = VG_(strchr) (dot_ptr, '/');
if (slash_ptr)
{
/* NULL terminate the string at the '/' character and append
the path down to the dSYM file. */
*slash_ptr = '\0';
VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
VG_(strcat) (slash_ptr, basename_str);
if (file_exists_p (dsymfile))
return dsymfile;
}
/* NULL terminate the string at the '.' character and append
the path down to the dSYM file. */
*dot_ptr = '\0';
VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
VG_(strcat) (dot_ptr, basename_str);
if (file_exists_p (dsymfile))
return dsymfile;
/* NULL terminate the string at the '.' locatated by the strrchr()
function again. */
*dot_ptr = '\0';
/* We found a previous extension '.' character and did not find a
dSYM file so now find previous directory delimiter so we don't
try multiple times on a file name that may have a version number
in it such as "/some/path/MyApp.6.0.4.app". */
slash_ptr = VG_(strrchr) (dsymfile, '/');
if (!slash_ptr)
break;
/* NULL terminate the string at the previous directory character
and search again. */
*slash_ptr = '\0';
}
}
return NULL;
}
/* Given a DiSlice covering the entire Mach-O thin image, find the
DiSlice for the specified (segname, sectname) pairing, if
possible. Also return the section's .addr field in *svma if
svma is non-NULL. */
static DiSlice getsectdata ( DiSlice img,
const HChar *segname, const HChar *sectname,
/*OUT*/Addr* svma )
{
DiCursor cur = ML_(cur_from_sli)(img);
struct MACH_HEADER mh;
ML_(cur_step_get)(&mh, &cur, sizeof(mh));
Int c;
for (c = 0; c < mh.ncmds; c++) {
struct load_command cmd;
ML_(cur_read_get)(&cmd, cur, sizeof(cmd));
if (cmd.cmd == LC_SEGMENT_CMD) {
struct SEGMENT_COMMAND seg;
ML_(cur_read_get)(&seg, cur, sizeof(seg));
if (0 == VG_(strncmp(&seg.segname[0],
segname, sizeof(seg.segname)))) {
DiCursor sects_cur = ML_(cur_plus)(cur, sizeof(seg));
Int s;
for (s = 0; s < seg.nsects; s++) {
struct SECTION sect;
ML_(cur_step_get)(&sect, &sects_cur, sizeof(sect));
if (0 == VG_(strncmp(sect.sectname, sectname,
sizeof(sect.sectname)))) {
DiSlice res = img;
res.ioff = sect.offset;
res.szB = sect.size;
if (svma) *svma = (Addr)sect.addr;
return res;
}
}
}
}
cur = ML_(cur_plus)(cur, cmd.cmdsize);
}
return DiSlice_INVALID;
}
/* Brute force just simply search for uuid[0..15] in |sli| */
static Bool check_uuid_matches ( DiSlice sli, UChar* uuid )
{
if (sli.szB < 16)
return False;
/* Work through the slice in 1 KB chunks. */
UChar first = uuid[0];
DiOffT min_off = sli.ioff;
DiOffT max1_off = sli.ioff + sli.szB;
DiOffT curr_off = min_off;
vg_assert(min_off < max1_off);
while (1) {
vg_assert(curr_off >= min_off && curr_off <= max1_off);
if (curr_off == max1_off) break;
DiOffT avail = max1_off - curr_off;
vg_assert(avail > 0 && avail <= max1_off);
if (avail > 1024) avail = 1024;
UChar buf[1024];
SizeT nGot = ML_(img_get_some)(buf, sli.img, curr_off, avail);
vg_assert(nGot >= 1 && nGot <= avail);
UInt i;
/* Scan through the 1K chunk we got, looking for the start char. */
for (i = 0; i < (UInt)nGot; i++) {
if (LIKELY(buf[i] != first))
continue;
/* first char matches. See if we can get 16 bytes at this
offset, and compare. */
if (curr_off + i < max1_off && max1_off - (curr_off + i) >= 16) {
UChar buff16[16];
ML_(img_get)(&buff16[0], sli.img, curr_off + i, 16);
if (0 == VG_(memcmp)(&buff16[0], &uuid[0], 16))
return True;
}
}
curr_off += nGot;
}
return False;
}
/* Heuristic kludge: return True if this looks like an installed
standard library; hence we shouldn't consider automagically running
dsymutil on it. */
static Bool is_systemish_library_name ( const HChar* name )
{
vg_assert(name);
if (0 == VG_(strncasecmp)(name, "/usr/", 5)
|| 0 == VG_(strncasecmp)(name, "/bin/", 5)
|| 0 == VG_(strncasecmp)(name, "/sbin/", 6)
|| 0 == VG_(strncasecmp)(name, "/opt/", 5)
|| 0 == VG_(strncasecmp)(name, "/sw/", 4)
|| 0 == VG_(strncasecmp)(name, "/System/", 8)
|| 0 == VG_(strncasecmp)(name, "/Library/", 9)
|| 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
return True;
} else {
return False;
}
}
Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
{
DiSlice msli = DiSlice_INVALID; // the main image
DiSlice dsli = DiSlice_INVALID; // the debuginfo image
DiCursor sym_cur = DiCursor_INVALID;
DiCursor dysym_cur = DiCursor_INVALID;
HChar* dsymfilename = NULL;
Bool have_uuid = False;
UChar uuid[16];
Word i;
const DebugInfoMapping* rx_map = NULL;
const DebugInfoMapping* rw_map = NULL;
/* mmap the object file to look for di->soname and di->text_bias
and uuid and nlist */
/* This should be ensured by our caller (that we're in the accept
state). */
vg_assert(di->fsm.have_rx_map);
vg_assert(di->fsm.have_rw_map);
for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
if (map->rx && !rx_map)
rx_map = map;
if (map->rw && !rw_map)
rw_map = map;
if (rx_map && rw_map)
break;
}
vg_assert(rx_map);
vg_assert(rw_map);
if (VG_(clo_verbosity) > 1)
VG_(message)(Vg_DebugMsg,
"%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename,
rx_map->avma, rw_map->avma );
VG_(memset)(&uuid, 0, sizeof(uuid));
msli = map_image_aboard( di, di->fsm.filename );
if (!ML_(sli_is_valid)(msli)) {
ML_(symerr)(di, False, "Connect to main image failed.");
goto fail;
}
vg_assert(msli.img != NULL && msli.szB > 0);
/* Poke around in the Mach-O header, to find some important
stuff. */
// Find LC_SYMTAB and LC_DYSYMTAB, if present.
// Read di->soname from LC_ID_DYLIB if present,
// or from LC_ID_DYLINKER if present,
// or use "NONE".
// Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
// Get uuid for later dsym search
di->text_bias = 0;
{
DiCursor cmd_cur = ML_(cur_from_sli)(msli);
struct MACH_HEADER mh;
ML_(cur_step_get)(&mh, &cmd_cur, sizeof(mh));
/* Now cur_cmd points just after the Mach header, right at the
start of the load commands, which is where we need it to start
the following loop. */
Int c;
for (c = 0; c < mh.ncmds; c++) {
struct load_command cmd;
ML_(cur_read_get)(&cmd, cmd_cur, sizeof(cmd));
if (cmd.cmd == LC_SYMTAB) {
sym_cur = cmd_cur;
}
else if (cmd.cmd == LC_DYSYMTAB) {
dysym_cur = cmd_cur;
}
else if (cmd.cmd == LC_ID_DYLIB && mh.filetype == MH_DYLIB) {
// GrP fixme bundle?
struct dylib_command dcmd;
ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
DiCursor dylibname_cur
= ML_(cur_plus)(cmd_cur, dcmd.dylib.name.offset);
HChar* dylibname
= ML_(cur_read_strdup)(dylibname_cur, "di.rmdi.1");
HChar* soname = VG_(strrchr)(dylibname, '/');
if (!soname) soname = dylibname;
else soname++;
di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
soname);
ML_(dinfo_free)(dylibname);
}
else if (cmd.cmd==LC_ID_DYLINKER && mh.filetype==MH_DYLINKER) {
struct dylinker_command dcmd;
ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
DiCursor dylinkername_cur
= ML_(cur_plus)(cmd_cur, dcmd.name.offset);
HChar* dylinkername
= ML_(cur_read_strdup)(dylinkername_cur, "di.rmdi.2");
HChar* soname = VG_(strrchr)(dylinkername, '/');
if (!soname) soname = dylinkername;
else soname++;
di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
soname);
ML_(dinfo_free)(dylinkername);
}
// A comment from Julian about why varinfo[35] fail:
//
// My impression is, from comparing the output of otool -l for these
// executables with the logic in ML_(read_macho_debug_info),
// specifically the part that begins "else if (cmd->cmd ==
// LC_SEGMENT_CMD) {", that it's a complete hack which just happens
// to work ok for text symbols. In particular, it appears to assume
// that in a "struct load_command" of type LC_SEGMENT_CMD, the first
// "struct SEGMENT_COMMAND" inside it is going to contain the info we
// need. However, otool -l shows, and also the Apple docs state,
// that a struct load_command may contain an arbitrary number of
// struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
// snarf the first. But I'm not sure about this.
//
// The "Try for __DATA" block below simply adds acquisition of data
// svma/bias values using the same assumption. It also needs
// (probably) to deal with bss sections, but I don't understand how
// this all ties together really, so it requires further study.
//
// If you can get your head around the relationship between MachO
// segments, sections and load commands, this might be relatively
// easy to fix properly.
//
// Basically we need to come up with plausible numbers for di->
// {text,data,bss}_{avma,svma}, from which the _bias numbers are
// then trivially derived. Then I think the debuginfo reader should
// work pretty well.
else if (cmd.cmd == LC_SEGMENT_CMD) {
struct SEGMENT_COMMAND seg;
ML_(cur_read_get)(&seg, cmd_cur, sizeof(seg));
/* Try for __TEXT */
if (!di->text_present
&& 0 == VG_(strcmp)(&seg.segname[0], "__TEXT")
/* DDD: is the next line a kludge? -- JRS */
&& seg.fileoff == 0 && seg.filesize != 0) {
di->text_present = True;
di->text_svma = (Addr)seg.vmaddr;
di->text_avma = rx_map->avma;
di->text_size = seg.vmsize;
di->text_bias = di->text_avma - di->text_svma;
/* Make the _debug_ values be the same as the
svma/bias for the primary object, since there is
no secondary (debuginfo) object, but nevertheless
downstream biasing of Dwarf3 relies on the
_debug_ values. */
di->text_debug_svma = di->text_svma;
di->text_debug_bias = di->text_bias;
}
/* Try for __DATA */
if (!di->data_present
&& 0 == VG_(strcmp)(&seg.segname[0], "__DATA")
/* && DDD:seg->fileoff == 0 */ && seg.filesize != 0) {
di->data_present = True;
di->data_svma = (Addr)seg.vmaddr;
di->data_avma = rw_map->avma;
di->data_size = seg.vmsize;
di->data_bias = di->data_avma - di->data_svma;
di->data_debug_svma = di->data_svma;
di->data_debug_bias = di->data_bias;
}
}
else if (cmd.cmd == LC_UUID) {
ML_(cur_read_get)(&uuid, cmd_cur, sizeof(uuid));
have_uuid = True;
}
// Move the cursor along
cmd_cur = ML_(cur_plus)(cmd_cur, cmd.cmdsize);
}
}
if (!di->soname) {
di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
}
if (di->trace_symtab) {
VG_(printf)("\n");
VG_(printf)("SONAME = %s\n", di->soname);
VG_(printf)("\n");
}
/* Now we have the base object to hand. Read symbols from it. */
// We already asserted that ..
vg_assert(msli.img != NULL && msli.szB > 0);
if (ML_(cur_is_valid)(sym_cur) && ML_(cur_is_valid)(dysym_cur)) {
struct symtab_command symcmd;
struct dysymtab_command dysymcmd;
ML_(cur_read_get)(&symcmd, sym_cur, sizeof(symcmd));
ML_(cur_read_get)(&dysymcmd, dysym_cur, sizeof(dysymcmd));
/* Read nlist symbol table */
DiCursor syms = DiCursor_INVALID;
DiCursor strs = DiCursor_INVALID;
XArray* /* DiSym */ candSyms = NULL;
Word nCandSyms;
if (msli.szB < symcmd.stroff + symcmd.strsize
|| msli.szB < symcmd.symoff + symcmd.nsyms
* sizeof(struct NLIST)) {
ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
goto fail;
}
if (dysymcmd.ilocalsym + dysymcmd.nlocalsym > symcmd.nsyms
|| dysymcmd.iextdefsym + dysymcmd.nextdefsym > symcmd.nsyms) {
ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
goto fail;
}
syms = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.symoff);
strs = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.stroff);
if (VG_(clo_verbosity) > 1)
VG_(message)(Vg_DebugMsg,
" reading syms from primary file (%d %d)\n",
dysymcmd.nextdefsym, dysymcmd.nlocalsym );
/* Read candidate symbols into 'candSyms', so we can truncate
overlapping ends and generally tidy up, before presenting
them to ML_(addSym). */
candSyms = VG_(newXA)(
ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
ML_(dinfo_free), sizeof(DiSym)
);
// extern symbols
read_symtab(candSyms,
di,
ML_(cur_plus)(syms,
dysymcmd.iextdefsym * sizeof(struct NLIST)),
dysymcmd.nextdefsym, strs, symcmd.strsize);
// static and private_extern symbols
read_symtab(candSyms,
di,
ML_(cur_plus)(syms,
dysymcmd.ilocalsym * sizeof(struct NLIST)),
dysymcmd.nlocalsym, strs, symcmd.strsize);
/* tidy up the cand syms -- trim overlapping ends. May resize
candSyms. */
tidy_up_cand_syms( candSyms, di->trace_symtab );
/* and finally present them to ML_(addSym) */
nCandSyms = VG_(sizeXA)( candSyms );
for (i = 0; i < nCandSyms; i++) {
DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
vg_assert(cand->pri_name != NULL);
vg_assert(cand->sec_names == NULL);
if (di->trace_symtab)
VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n",
cand->avmas.main, cand->avmas.main + cand->size - 1,
cand->pri_name );
ML_(addSym)( di, cand );
}
VG_(deleteXA)( candSyms );
}
/* If there's no UUID in the primary, don't even bother to try and
read any DWARF, since we won't be able to verify it matches.
Our policy is not to load debug info unless we can verify that
it matches the primary. Just declare success at this point.
And don't complain to the user, since that would cause us to
complain on objects compiled without -g. (Some versions of
XCode are observed to omit a UUID entry for object linked(?)
without -g. Others don't appear to omit it.) */
if (!have_uuid)
goto success;
/* mmap the dSYM file to look for DWARF debug info. If successful,
use the .macho_img and .macho_img_szB in dsli. */
dsymfilename = find_separate_debug_file( di->fsm.filename );
/* Try to load it. */
if (dsymfilename) {
Bool valid;
if (VG_(clo_verbosity) > 1)
VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename);
dsli = map_image_aboard( di, dsymfilename );
if (!ML_(sli_is_valid)(dsli)) {
ML_(symerr)(di, False, "Connect to debuginfo image failed "
"(first attempt).");
goto fail;
}
/* check it has the right uuid. */
vg_assert(have_uuid);
valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
if (valid)
goto read_the_dwarf;
if (VG_(clo_verbosity) > 1)
VG_(message)(Vg_DebugMsg, " dSYM does not have "
"correct UUID (out of date?)\n");
}
/* There was no dsym file, or it doesn't match. We'll have to try
regenerating it, unless --dsymutil=no, in which case just complain
instead. */
/* If this looks like a lib that we shouldn't run dsymutil on, just
give up. (possible reasons: is system lib, or in /usr etc, or
the dsym dir would not be writable by the user, or we're running
as root) */
vg_assert(di->fsm.filename);
if (is_systemish_library_name(di->fsm.filename))
goto success;
if (!VG_(clo_dsymutil)) {
if (VG_(clo_verbosity) == 1) {
VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
}
if (VG_(clo_verbosity) > 0)
VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
"--dsymutil=yes\n",
VG_(clo_verbosity) > 1 ? " " : "",
dsymfilename ? "has wrong UUID" : "is missing");
goto success;
}
/* Run dsymutil */
{ Int r;
const HChar* dsymutil = "/usr/bin/dsymutil ";
HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
VG_(strlen)(dsymutil)
+ VG_(strlen)(di->fsm.filename)
+ 32 /* misc */ );
VG_(strcpy)(cmd, dsymutil);
if (0) VG_(strcat)(cmd, "--verbose ");
VG_(strcat)(cmd, "\"");
VG_(strcat)(cmd, di->fsm.filename);
VG_(strcat)(cmd, "\"");
VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
r = VG_(system)( cmd );
if (r)
VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
ML_(dinfo_free)(cmd);
dsymfilename = find_separate_debug_file(di->fsm.filename);
}
/* Try again to load it. */
if (dsymfilename) {
Bool valid;
if (VG_(clo_verbosity) > 1)
VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename);
dsli = map_image_aboard( di, dsymfilename );
if (!ML_(sli_is_valid)(dsli)) {
ML_(symerr)(di, False, "Connect to debuginfo image failed "
"(second attempt).");
goto fail;
}
/* check it has the right uuid. */
vg_assert(have_uuid);
vg_assert(have_uuid);
valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
if (!valid) {
if (VG_(clo_verbosity) > 0) {
VG_(message)(Vg_DebugMsg,
"WARNING: did not find expected UUID %02X%02X%02X%02X"
"-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
" in dSYM dir\n",
(UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
(UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
(UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
(UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
(UInt)uuid[14], (UInt)uuid[15] );
VG_(message)(Vg_DebugMsg,
"WARNING: for %s\n", di->fsm.filename);
}
unmap_image( &dsli );
/* unmap_image zeroes out dsli, so it's safe for "fail:" to
re-try unmap_image. */
goto fail;
}
}
/* Right. Finally we have our best try at the dwarf image, so go
on to reading stuff out of it. */
read_the_dwarf:
if (ML_(sli_is_valid)(dsli) && dsli.szB > 0) {
// "_mscn" is "mach-o section"
DiSlice debug_info_mscn
= getsectdata(dsli, "__DWARF", "__debug_info", NULL);
DiSlice debug_abbv_mscn
= getsectdata(dsli, "__DWARF", "__debug_abbrev", NULL);
DiSlice debug_line_mscn
= getsectdata(dsli, "__DWARF", "__debug_line", NULL);
DiSlice debug_str_mscn
= getsectdata(dsli, "__DWARF", "__debug_str", NULL);
DiSlice debug_ranges_mscn
= getsectdata(dsli, "__DWARF", "__debug_ranges", NULL);
DiSlice debug_loc_mscn
= getsectdata(dsli, "__DWARF", "__debug_loc", NULL);
/* It appears (jrs, 2014-oct-19) that section "__eh_frame" in
segment "__TEXT" appears in both the main and dsym files, but
only the main one gives the right results. Since it's in the
__TEXT segment, we calculate the __eh_frame avma using its
svma and the text bias, and that sounds reasonable. */
Addr eh_frame_svma = 0;
DiSlice eh_frame_mscn
= getsectdata(msli, "__TEXT", "__eh_frame", &eh_frame_svma);
if (ML_(sli_is_valid)(eh_frame_mscn)) {
vg_assert(di->text_bias == di->text_debug_bias);
ML_(read_callframe_info_dwarf3)(di, eh_frame_mscn,
eh_frame_svma + di->text_bias,
True/*is_ehframe*/);
}
if (ML_(sli_is_valid)(debug_info_mscn)) {
if (VG_(clo_verbosity) > 1) {
if (0)
VG_(message)(Vg_DebugMsg,
"Reading dwarf3 for %s (%#lx) from %s"
" (%lld %lld %lld %lld %lld %lld)\n",
di->fsm.filename, di->text_avma, dsymfilename,
debug_info_mscn.szB, debug_abbv_mscn.szB,
debug_line_mscn.szB, debug_str_mscn.szB,
debug_ranges_mscn.szB, debug_loc_mscn.szB
);
VG_(message)(Vg_DebugMsg,
" reading dwarf3 from dsyms file\n");
}
/* The old reader: line numbers and unwind info only */
ML_(read_debuginfo_dwarf3) ( di,
debug_info_mscn,
DiSlice_INVALID, /* .debug_types */
debug_abbv_mscn,
debug_line_mscn,
debug_str_mscn,
DiSlice_INVALID /* ALT .debug_str */ );
/* The new reader: read the DIEs in .debug_info to acquire
information on variable types and locations or inline info.
But only if the tool asks for it, or the user requests it on
the command line. */
if (VG_(clo_read_var_info) /* the user or tool asked for it */
|| VG_(clo_read_inline_info)) {
ML_(new_dwarf3_reader)(
di, debug_info_mscn,
DiSlice_INVALID, /* .debug_types */
debug_abbv_mscn,
debug_line_mscn,
debug_str_mscn,
debug_ranges_mscn,
debug_loc_mscn,
DiSlice_INVALID, /* ALT .debug_info */
DiSlice_INVALID, /* ALT .debug_abbv */
DiSlice_INVALID, /* ALT .debug_line */
DiSlice_INVALID /* ALT .debug_str */
);
}
}
}
if (dsymfilename) ML_(dinfo_free)(dsymfilename);
success:
unmap_image(&msli);
unmap_image(&dsli);
return True;
/* NOTREACHED */
fail:
ML_(symerr)(di, True, "Error reading Mach-O object.");
unmap_image(&msli);
unmap_image(&dsli);
return False;
}
#endif // defined(VGO_darwin)
/*--------------------------------------------------------------------*/
/*--- end ---*/
/*--------------------------------------------------------------------*/