blob: b42506ccee5141a2b46781f9ba62c07cb3442ab0 [file] [log] [blame]
// Copyright (c) 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
// google_breakpad::Mach_O::Reader. See macho_reader.h for details.
#include "common/mac/macho_reader.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits>
// Unfortunately, CPU_TYPE_ARM is not define for 10.4.
#if !defined(CPU_TYPE_ARM)
#define CPU_TYPE_ARM 12
#endif
#if !defined(CPU_TYPE_ARM_64)
#define CPU_TYPE_ARM_64 16777228
#endif
namespace google_breakpad {
namespace mach_o {
// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
// arguments, so you can't place expressions that do necessary work in
// the argument of an assert. Nor can you assign the result of the
// expression to a variable and assert that the variable's value is
// true: you'll get unused variable warnings when NDEBUG is #defined.
//
// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
// the result is true if NDEBUG is not #defined.
#if defined(NDEBUG)
#define ASSERT_ALWAYS_EVAL(x) (x)
#else
#define ASSERT_ALWAYS_EVAL(x) assert(x)
#endif
void FatReader::Reporter::BadHeader() {
fprintf(stderr, "%s: file is neither a fat binary file"
" nor a Mach-O object file\n", filename_.c_str());
}
void FatReader::Reporter::TooShort() {
fprintf(stderr, "%s: file too short for the data it claims to contain\n",
filename_.c_str());
}
void FatReader::Reporter::MisplacedObjectFile() {
fprintf(stderr, "%s: file too short for the object files it claims"
" to contain\n", filename_.c_str());
}
bool FatReader::Read(const uint8_t* buffer, size_t size) {
buffer_.start = buffer;
buffer_.end = buffer + size;
ByteCursor cursor(&buffer_);
// Fat binaries always use big-endian, so read the magic number in
// that endianness. To recognize Mach-O magic numbers, which can use
// either endianness, check for both the proper and reversed forms
// of the magic numbers.
cursor.set_big_endian(true);
if (cursor >> magic_) {
if (magic_ == FAT_MAGIC) {
// How many object files does this fat binary contain?
uint32_t object_files_count;
if (!(cursor >> object_files_count)) { // nfat_arch
reporter_->TooShort();
return false;
}
// Read the list of object files.
object_files_.resize(object_files_count);
for (size_t i = 0; i < object_files_count; i++) {
struct fat_arch objfile;
// Read this object file entry, byte-swapping as appropriate.
cursor >> objfile.cputype
>> objfile.cpusubtype
>> objfile.offset
>> objfile.size
>> objfile.align;
SuperFatArch super_fat_arch(objfile);
object_files_[i] = super_fat_arch;
if (!cursor) {
reporter_->TooShort();
return false;
}
// Does the file actually have the bytes this entry refers to?
size_t fat_size = buffer_.Size();
if (objfile.offset > fat_size ||
objfile.size > fat_size - objfile.offset) {
reporter_->MisplacedObjectFile();
return false;
}
}
return true;
} else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
// If this is a little-endian Mach-O file, fix the cursor's endianness.
if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
cursor.set_big_endian(false);
// Record the entire file as a single entry in the object file list.
object_files_.resize(1);
// Get the cpu type and subtype from the Mach-O header.
if (!(cursor >> object_files_[0].cputype
>> object_files_[0].cpusubtype)) {
reporter_->TooShort();
return false;
}
object_files_[0].offset = 0;
object_files_[0].size = static_cast<uint64_t>(buffer_.Size());
// This alignment is correct for 32 and 64-bit x86 and ppc.
// See get_align in the lipo source for other architectures:
// http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
object_files_[0].align = 12; // 2^12 == 4096
return true;
}
}
reporter_->BadHeader();
return false;
}
void Reader::Reporter::BadHeader() {
fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
}
void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
cpu_subtype_t cpu_subtype,
cpu_type_t expected_cpu_type,
cpu_subtype_t expected_cpu_subtype) {
fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
" type %d, subtype %d\n",
filename_.c_str(), cpu_type, cpu_subtype,
expected_cpu_type, expected_cpu_subtype);
}
void Reader::Reporter::HeaderTruncated() {
fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
filename_.c_str());
}
void Reader::Reporter::LoadCommandRegionTruncated() {
fprintf(stderr, "%s: file too short to hold load command region"
" given in Mach-O header\n", filename_.c_str());
}
void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
LoadCommandType type) {
fprintf(stderr, "%s: file's header claims there are %zu"
" load commands, but load command #%zu",
filename_.c_str(), claimed, i);
if (type) fprintf(stderr, ", of type %d,", type);
fprintf(stderr, " extends beyond the end of the load command region\n");
}
void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
fprintf(stderr, "%s: the contents of load command #%zu, of type %d,"
" extend beyond the size given in the load command's header\n",
filename_.c_str(), i, type);
}
void Reader::Reporter::SectionsMissing(const string& name) {
fprintf(stderr, "%s: the load command for segment '%s'"
" is too short to hold the section headers it claims to have\n",
filename_.c_str(), name.c_str());
}
void Reader::Reporter::MisplacedSegmentData(const string& name) {
fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
" the end of the file\n", filename_.c_str(), name.c_str());
}
void Reader::Reporter::MisplacedSectionData(const string& section,
const string& segment) {
fprintf(stderr, "%s: the section '%s' in segment '%s'"
" claims its contents lie outside the segment's contents\n",
filename_.c_str(), section.c_str(), segment.c_str());
}
void Reader::Reporter::MisplacedSymbolTable() {
fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
" table's contents are located beyond the end of the file\n",
filename_.c_str());
}
void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
fprintf(stderr, "%s: CPU type %d is not supported\n",
filename_.c_str(), cpu_type);
}
bool Reader::Read(const uint8_t* buffer,
size_t size,
cpu_type_t expected_cpu_type,
cpu_subtype_t expected_cpu_subtype) {
assert(!buffer_.start);
buffer_.start = buffer;
buffer_.end = buffer + size;
ByteCursor cursor(&buffer_, true);
uint32_t magic;
if (!(cursor >> magic)) {
reporter_->HeaderTruncated();
return false;
}
if (expected_cpu_type != CPU_TYPE_ANY) {
uint32_t expected_magic;
// validate that magic matches the expected cpu type
switch (expected_cpu_type) {
case CPU_TYPE_ARM:
case CPU_TYPE_I386:
expected_magic = MH_CIGAM;
break;
case CPU_TYPE_POWERPC:
expected_magic = MH_MAGIC;
break;
case CPU_TYPE_ARM_64:
case CPU_TYPE_X86_64:
expected_magic = MH_CIGAM_64;
break;
case CPU_TYPE_POWERPC64:
expected_magic = MH_MAGIC_64;
break;
default:
reporter_->UnsupportedCPUType(expected_cpu_type);
return false;
}
if (expected_magic != magic) {
reporter_->BadHeader();
return false;
}
}
// Since the byte cursor is in big-endian mode, a reversed magic number
// always indicates a little-endian file, regardless of our own endianness.
switch (magic) {
case MH_MAGIC: big_endian_ = true; bits_64_ = false; break;
case MH_CIGAM: big_endian_ = false; bits_64_ = false; break;
case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break;
case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break;
default:
reporter_->BadHeader();
return false;
}
cursor.set_big_endian(big_endian_);
uint32_t commands_size, reserved;
cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
>> commands_size >> flags_;
if (bits_64_)
cursor >> reserved;
if (!cursor) {
reporter_->HeaderTruncated();
return false;
}
if (expected_cpu_type != CPU_TYPE_ANY &&
(expected_cpu_type != cpu_type_ ||
expected_cpu_subtype != cpu_subtype_)) {
reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
expected_cpu_type, expected_cpu_subtype);
return false;
}
cursor
.PointTo(&load_commands_.start, commands_size)
.PointTo(&load_commands_.end, 0);
if (!cursor) {
reporter_->LoadCommandRegionTruncated();
return false;
}
return true;
}
bool Reader::WalkLoadCommands(Reader::LoadCommandHandler* handler) const {
ByteCursor list_cursor(&load_commands_, big_endian_);
for (size_t index = 0; index < load_command_count_; ++index) {
// command refers to this load command alone, so that cursor will
// refuse to read past the load command's end. But since we haven't
// read the size yet, let command initially refer to the entire
// remainder of the load command series.
ByteBuffer command(list_cursor.here(), list_cursor.Available());
ByteCursor cursor(&command, big_endian_);
// Read the command type and size --- fields common to all commands.
uint32_t type, size;
if (!(cursor >> type)) {
reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
return false;
}
if (!(cursor >> size) || size > command.Size()) {
reporter_->LoadCommandsOverrun(load_command_count_, index, type);
return false;
}
// Now that we've read the length, restrict command's range to this
// load command only.
command.end = command.start + size;
switch (type) {
case LC_SEGMENT:
case LC_SEGMENT_64: {
Segment segment;
segment.bits_64 = (type == LC_SEGMENT_64);
size_t word_size = segment.bits_64 ? 8 : 4;
cursor.CString(&segment.name, 16);
cursor
.Read(word_size, false, &segment.vmaddr)
.Read(word_size, false, &segment.vmsize)
.Read(word_size, false, &segment.fileoff)
.Read(word_size, false, &segment.filesize);
cursor >> segment.maxprot
>> segment.initprot
>> segment.nsects
>> segment.flags;
if (!cursor) {
reporter_->LoadCommandTooShort(index, type);
return false;
}
if (segment.fileoff > buffer_.Size() ||
segment.filesize > buffer_.Size() - segment.fileoff) {
reporter_->MisplacedSegmentData(segment.name);
return false;
}
// Mach-O files in .dSYM bundles have the contents of the loaded
// segments removed, and their file offsets and file sizes zeroed
// out. To help us handle this special case properly, give such
// segments' contents NULL starting and ending pointers.
if (segment.fileoff == 0 && segment.filesize == 0) {
segment.contents.start = segment.contents.end = NULL;
} else {
segment.contents.start = buffer_.start + segment.fileoff;
segment.contents.end = segment.contents.start + segment.filesize;
}
// The section list occupies the remainder of this load command's space.
segment.section_list.start = cursor.here();
segment.section_list.end = command.end;
if (!handler->SegmentCommand(segment))
return false;
break;
}
case LC_SYMTAB: {
uint32_t symoff, nsyms, stroff, strsize;
cursor >> symoff >> nsyms >> stroff >> strsize;
if (!cursor) {
reporter_->LoadCommandTooShort(index, type);
return false;
}
// How big are the entries in the symbol table?
// sizeof(struct nlist_64) : sizeof(struct nlist),
// but be paranoid about alignment vs. target architecture.
size_t symbol_size = bits_64_ ? 16 : 12;
// How big is the entire symbol array?
size_t symbols_size = nsyms * symbol_size;
if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
reporter_->MisplacedSymbolTable();
return false;
}
ByteBuffer entries(buffer_.start + symoff, symbols_size);
ByteBuffer names(buffer_.start + stroff, strsize);
if (!handler->SymtabCommand(entries, names))
return false;
break;
}
default: {
if (!handler->UnknownCommand(type, command))
return false;
break;
}
}
list_cursor.set_here(command.end);
}
return true;
}
// A load command handler that looks for a segment of a given name.
class Reader::SegmentFinder : public LoadCommandHandler {
public:
// Create a load command handler that looks for a segment named NAME,
// and sets SEGMENT to describe it if found.
SegmentFinder(const string& name, Segment* segment)
: name_(name), segment_(segment), found_() { }
// Return true if the traversal found the segment, false otherwise.
bool found() const { return found_; }
bool SegmentCommand(const Segment& segment) {
if (segment.name == name_) {
*segment_ = segment;
found_ = true;
return false;
}
return true;
}
private:
// The name of the segment our creator is looking for.
const string& name_;
// Where we should store the segment if found. (WEAK)
Segment* segment_;
// True if we found the segment.
bool found_;
};
bool Reader::FindSegment(const string& name, Segment* segment) const {
SegmentFinder finder(name, segment);
WalkLoadCommands(&finder);
return finder.found();
}
bool Reader::WalkSegmentSections(const Segment& segment,
SectionHandler* handler) const {
size_t word_size = segment.bits_64 ? 8 : 4;
ByteCursor cursor(&segment.section_list, big_endian_);
for (size_t i = 0; i < segment.nsects; i++) {
Section section;
section.bits_64 = segment.bits_64;
uint64_t size, offset;
uint32_t dummy32;
cursor
.CString(&section.section_name, 16)
.CString(&section.segment_name, 16)
.Read(word_size, false, &section.address)
.Read(word_size, false, &size)
.Read(sizeof(uint32_t), false, &offset) // clears high bits of |offset|
>> section.align
>> dummy32
>> dummy32
>> section.flags
>> dummy32
>> dummy32;
if (section.bits_64)
cursor >> dummy32;
if (!cursor) {
reporter_->SectionsMissing(segment.name);
return false;
}
// Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle
// 64-bit file offsets gracefully. Segment load commands do contain 64-bit
// file offsets, but sections within do not. Because segments load
// contiguously, recompute each section’s file offset on the basis of its
// containing segment’s file offset and the difference between the section’s
// and segment’s load addresses. If truncation is detected, honor the
// recomputed offset.
if (segment.bits_64 &&
segment.fileoff + segment.filesize >
std::numeric_limits<uint32_t>::max()) {
const uint64_t section_offset_recomputed =
segment.fileoff + section.address - segment.vmaddr;
if (offset == static_cast<uint32_t>(section_offset_recomputed)) {
offset = section_offset_recomputed;
}
}
const uint32_t section_type = section.flags & SECTION_TYPE;
if (section_type == S_ZEROFILL || section_type == S_THREAD_LOCAL_ZEROFILL ||
section_type == S_GB_ZEROFILL) {
// Zero-fill sections have a size, but no contents.
section.contents.start = section.contents.end = NULL;
} else if (segment.contents.start == NULL &&
segment.contents.end == NULL) {
// Mach-O files in .dSYM bundles have the contents of the loaded
// segments removed, and their file offsets and file sizes zeroed
// out. However, the sections within those segments still have
// non-zero sizes. There's no reason to call MisplacedSectionData in
// this case; the caller may just need the section's load
// address. But do set the contents' limits to NULL, for safety.
section.contents.start = section.contents.end = NULL;
} else {
if (offset < size_t(segment.contents.start - buffer_.start) ||
offset > size_t(segment.contents.end - buffer_.start) ||
size > size_t(segment.contents.end - buffer_.start - offset)) {
reporter_->MisplacedSectionData(section.section_name,
section.segment_name);
return false;
}
section.contents.start = buffer_.start + offset;
section.contents.end = section.contents.start + size;
}
if (!handler->HandleSection(section))
return false;
}
return true;
}
// A SectionHandler that builds a SectionMap for the sections within a
// given segment.
class Reader::SectionMapper: public SectionHandler {
public:
// Create a SectionHandler that populates MAP with an entry for
// each section it is given.
SectionMapper(SectionMap* map) : map_(map) { }
bool HandleSection(const Section& section) {
(*map_)[section.section_name] = section;
return true;
}
private:
// The map under construction. (WEAK)
SectionMap* map_;
};
bool Reader::MapSegmentSections(const Segment& segment,
SectionMap* section_map) const {
section_map->clear();
SectionMapper mapper(section_map);
return WalkSegmentSections(segment, &mapper);
}
} // namespace mach_o
} // namespace google_breakpad