| // Copyright (c) 2011 Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
| |
| // dump_symbols.cc: implement google_breakpad::WriteSymbolFile: |
| // Find all the debugging info in a file and dump it as a Breakpad symbol file. |
| |
| #include "common/linux/dump_symbols.h" |
| |
| #include <assert.h> |
| #include <elf.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <limits.h> |
| #include <link.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/mman.h> |
| #include <sys/stat.h> |
| #include <unistd.h> |
| |
| #include <iostream> |
| #include <set> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "common/dwarf/bytereader-inl.h" |
| #include "common/dwarf/dwarf2diehandler.h" |
| #include "common/dwarf_cfi_to_module.h" |
| #include "common/dwarf_cu_to_module.h" |
| #include "common/dwarf_line_to_module.h" |
| #include "common/dwarf_range_list_handler.h" |
| #include "common/linux/crc32.h" |
| #include "common/linux/eintr_wrapper.h" |
| #include "common/linux/elfutils.h" |
| #include "common/linux/elfutils-inl.h" |
| #include "common/linux/elf_symbols_to_module.h" |
| #include "common/linux/file_id.h" |
| #include "common/memory_allocator.h" |
| #include "common/module.h" |
| #include "common/path_helper.h" |
| #include "common/scoped_ptr.h" |
| #ifndef NO_STABS_SUPPORT |
| #include "common/stabs_reader.h" |
| #include "common/stabs_to_module.h" |
| #endif |
| #include "common/using_std_string.h" |
| |
| // This namespace contains helper functions. |
| namespace { |
| |
| using google_breakpad::DumpOptions; |
| using google_breakpad::DwarfCFIToModule; |
| using google_breakpad::DwarfCUToModule; |
| using google_breakpad::DwarfLineToModule; |
| using google_breakpad::DwarfRangeListHandler; |
| using google_breakpad::ElfClass; |
| using google_breakpad::ElfClass32; |
| using google_breakpad::ElfClass64; |
| using google_breakpad::FileID; |
| using google_breakpad::FindElfSectionByName; |
| using google_breakpad::GetOffset; |
| using google_breakpad::IsValidElf; |
| using google_breakpad::kDefaultBuildIdSize; |
| using google_breakpad::Module; |
| using google_breakpad::PageAllocator; |
| #ifndef NO_STABS_SUPPORT |
| using google_breakpad::StabsToModule; |
| #endif |
| using google_breakpad::scoped_ptr; |
| using google_breakpad::wasteful_vector; |
| |
| // Define AARCH64 ELF architecture if host machine does not include this define. |
| #ifndef EM_AARCH64 |
| #define EM_AARCH64 183 |
| #endif |
| |
| // |
| // FDWrapper |
| // |
| // Wrapper class to make sure opened file is closed. |
| // |
| class FDWrapper { |
| public: |
| explicit FDWrapper(int fd) : |
| fd_(fd) {} |
| ~FDWrapper() { |
| if (fd_ != -1) |
| close(fd_); |
| } |
| int get() { |
| return fd_; |
| } |
| int release() { |
| int fd = fd_; |
| fd_ = -1; |
| return fd; |
| } |
| private: |
| int fd_; |
| }; |
| |
| // |
| // MmapWrapper |
| // |
| // Wrapper class to make sure mapped regions are unmapped. |
| // |
| class MmapWrapper { |
| public: |
| MmapWrapper() : is_set_(false) {} |
| ~MmapWrapper() { |
| if (is_set_ && base_ != NULL) { |
| assert(size_ > 0); |
| munmap(base_, size_); |
| } |
| } |
| void set(void* mapped_address, size_t mapped_size) { |
| is_set_ = true; |
| base_ = mapped_address; |
| size_ = mapped_size; |
| } |
| void release() { |
| assert(is_set_); |
| is_set_ = false; |
| base_ = NULL; |
| size_ = 0; |
| } |
| |
| private: |
| bool is_set_; |
| void* base_; |
| size_t size_; |
| }; |
| |
| // Find the preferred loading address of the binary. |
| template<typename ElfClass> |
| typename ElfClass::Addr GetLoadingAddress( |
| const typename ElfClass::Phdr* program_headers, |
| int nheader) { |
| typedef typename ElfClass::Phdr Phdr; |
| |
| // For non-PIC executables (e_type == ET_EXEC), the load address is |
| // the start address of the first PT_LOAD segment. (ELF requires |
| // the segments to be sorted by load address.) For PIC executables |
| // and dynamic libraries (e_type == ET_DYN), this address will |
| // normally be zero. |
| for (int i = 0; i < nheader; ++i) { |
| const Phdr& header = program_headers[i]; |
| if (header.p_type == PT_LOAD) |
| return header.p_vaddr; |
| } |
| return 0; |
| } |
| |
| // Find the set of address ranges for all PT_LOAD segments. |
| template <typename ElfClass> |
| vector<Module::Range> GetPtLoadSegmentRanges( |
| const typename ElfClass::Phdr* program_headers, |
| int nheader) { |
| typedef typename ElfClass::Phdr Phdr; |
| vector<Module::Range> ranges; |
| |
| for (int i = 0; i < nheader; ++i) { |
| const Phdr& header = program_headers[i]; |
| if (header.p_type == PT_LOAD) { |
| ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz)); |
| } |
| } |
| return ranges; |
| } |
| |
| #ifndef NO_STABS_SUPPORT |
| template<typename ElfClass> |
| bool LoadStabs(const typename ElfClass::Ehdr* elf_header, |
| const typename ElfClass::Shdr* stab_section, |
| const typename ElfClass::Shdr* stabstr_section, |
| const bool big_endian, |
| Module* module) { |
| // A callback object to handle data from the STABS reader. |
| StabsToModule handler(module); |
| // Find the addresses of the STABS data, and create a STABS reader object. |
| // On Linux, STABS entries always have 32-bit values, regardless of the |
| // address size of the architecture whose code they're describing, and |
| // the strings are always "unitized". |
| const uint8_t* stabs = |
| GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset); |
| const uint8_t* stabstr = |
| GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset); |
| google_breakpad::StabsReader reader(stabs, stab_section->sh_size, |
| stabstr, stabstr_section->sh_size, |
| big_endian, 4, true, &handler); |
| // Read the STABS data, and do post-processing. |
| if (!reader.Process()) |
| return false; |
| handler.Finalize(); |
| return true; |
| } |
| #endif // NO_STABS_SUPPORT |
| |
| // A range handler that accepts rangelist data parsed by |
| // dwarf2reader::RangeListReader and populates a range vector (typically |
| // owned by a function) with the results. |
| class DumperRangesHandler : public DwarfCUToModule::RangesHandler { |
| public: |
| DumperRangesHandler(dwarf2reader::ByteReader* reader) : |
| reader_(reader) { } |
| |
| bool ReadRanges( |
| enum dwarf2reader::DwarfForm form, uint64_t data, |
| dwarf2reader::RangeListReader::CURangesInfo* cu_info, |
| vector<Module::Range>* ranges) { |
| DwarfRangeListHandler handler(ranges); |
| dwarf2reader::RangeListReader range_list_reader(reader_, cu_info, |
| &handler); |
| return range_list_reader.ReadRanges(form, data); |
| } |
| |
| private: |
| dwarf2reader::ByteReader* reader_; |
| }; |
| |
| // A line-to-module loader that accepts line number info parsed by |
| // dwarf2reader::LineInfo and populates a Module and a line vector |
| // with the results. |
| class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler { |
| public: |
| // Create a line-to-module converter using BYTE_READER. |
| explicit DumperLineToModule(dwarf2reader::ByteReader* byte_reader) |
| : byte_reader_(byte_reader) { } |
| void StartCompilationUnit(const string& compilation_dir) { |
| compilation_dir_ = compilation_dir; |
| } |
| void ReadProgram(const uint8_t* program, uint64_t length, |
| const uint8_t* string_section, |
| uint64_t string_section_length, |
| const uint8_t* line_string_section, |
| uint64_t line_string_section_length, |
| Module* module, std::vector<Module::Line>* lines) { |
| DwarfLineToModule handler(module, compilation_dir_, lines); |
| dwarf2reader::LineInfo parser(program, length, byte_reader_, |
| string_section, string_section_length, |
| line_string_section, |
| line_string_section_length, |
| &handler); |
| parser.Start(); |
| } |
| private: |
| string compilation_dir_; |
| dwarf2reader::ByteReader* byte_reader_; |
| }; |
| |
| template<typename ElfClass> |
| bool LoadDwarf(const string& dwarf_filename, |
| const typename ElfClass::Ehdr* elf_header, |
| const bool big_endian, |
| bool handle_inter_cu_refs, |
| Module* module) { |
| typedef typename ElfClass::Shdr Shdr; |
| |
| const dwarf2reader::Endianness endianness = big_endian ? |
| dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; |
| dwarf2reader::ByteReader byte_reader(endianness); |
| |
| // Construct a context for this file. |
| DwarfCUToModule::FileContext file_context(dwarf_filename, |
| module, |
| handle_inter_cu_refs); |
| |
| // Build a map of the ELF file's sections. |
| const Shdr* sections = |
| GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); |
| int num_sections = elf_header->e_shnum; |
| const Shdr* section_names = sections + elf_header->e_shstrndx; |
| for (int i = 0; i < num_sections; i++) { |
| const Shdr* section = §ions[i]; |
| string name = GetOffset<ElfClass, char>(elf_header, |
| section_names->sh_offset) + |
| section->sh_name; |
| const uint8_t* contents = GetOffset<ElfClass, uint8_t>(elf_header, |
| section->sh_offset); |
| file_context.AddSectionToSectionMap(name, contents, section->sh_size); |
| } |
| |
| // .debug_ranges and .debug_rnglists reader |
| DumperRangesHandler ranges_handler(&byte_reader); |
| |
| // Parse all the compilation units in the .debug_info section. |
| DumperLineToModule line_to_module(&byte_reader); |
| dwarf2reader::SectionMap::const_iterator debug_info_entry = |
| file_context.section_map().find(".debug_info"); |
| assert(debug_info_entry != file_context.section_map().end()); |
| const std::pair<const uint8_t*, uint64_t>& debug_info_section = |
| debug_info_entry->second; |
| // This should never have been called if the file doesn't have a |
| // .debug_info section. |
| assert(debug_info_section.first); |
| uint64_t debug_info_length = debug_info_section.second; |
| for (uint64_t offset = 0; offset < debug_info_length;) { |
| // Make a handler for the root DIE that populates MODULE with the |
| // data that was found. |
| DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset); |
| DwarfCUToModule root_handler(&file_context, &line_to_module, |
| &ranges_handler, &reporter); |
| // Make a Dwarf2Handler that drives the DIEHandler. |
| dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); |
| // Make a DWARF parser for the compilation unit at OFFSET. |
| dwarf2reader::CompilationUnit reader(dwarf_filename, |
| file_context.section_map(), |
| offset, |
| &byte_reader, |
| &die_dispatcher); |
| // Process the entire compilation unit; get the offset of the next. |
| offset += reader.Start(); |
| } |
| return true; |
| } |
| |
| // Fill REGISTER_NAMES with the register names appropriate to the |
| // machine architecture given in HEADER, indexed by the register |
| // numbers used in DWARF call frame information. Return true on |
| // success, or false if HEADER's machine architecture is not |
| // supported. |
| template<typename ElfClass> |
| bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, |
| std::vector<string>* register_names) { |
| switch (elf_header->e_machine) { |
| case EM_386: |
| *register_names = DwarfCFIToModule::RegisterNames::I386(); |
| return true; |
| case EM_ARM: |
| *register_names = DwarfCFIToModule::RegisterNames::ARM(); |
| return true; |
| case EM_AARCH64: |
| *register_names = DwarfCFIToModule::RegisterNames::ARM64(); |
| return true; |
| case EM_MIPS: |
| *register_names = DwarfCFIToModule::RegisterNames::MIPS(); |
| return true; |
| case EM_X86_64: |
| *register_names = DwarfCFIToModule::RegisterNames::X86_64(); |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| template<typename ElfClass> |
| bool LoadDwarfCFI(const string& dwarf_filename, |
| const typename ElfClass::Ehdr* elf_header, |
| const char* section_name, |
| const typename ElfClass::Shdr* section, |
| const bool eh_frame, |
| const typename ElfClass::Shdr* got_section, |
| const typename ElfClass::Shdr* text_section, |
| const bool big_endian, |
| Module* module) { |
| // Find the appropriate set of register names for this file's |
| // architecture. |
| std::vector<string> register_names; |
| if (!DwarfCFIRegisterNames<ElfClass>(elf_header, ®ister_names)) { |
| fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" |
| " cannot convert DWARF call frame information\n", |
| dwarf_filename.c_str(), elf_header->e_machine); |
| return false; |
| } |
| |
| const dwarf2reader::Endianness endianness = big_endian ? |
| dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; |
| |
| // Find the call frame information and its size. |
| const uint8_t* cfi = |
| GetOffset<ElfClass, uint8_t>(elf_header, section->sh_offset); |
| size_t cfi_size = section->sh_size; |
| |
| // Plug together the parser, handler, and their entourages. |
| DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name); |
| DwarfCFIToModule handler(module, register_names, &module_reporter); |
| dwarf2reader::ByteReader byte_reader(endianness); |
| |
| byte_reader.SetAddressSize(ElfClass::kAddrSize); |
| |
| // Provide the base addresses for .eh_frame encoded pointers, if |
| // possible. |
| byte_reader.SetCFIDataBase(section->sh_addr, cfi); |
| if (got_section) |
| byte_reader.SetDataBase(got_section->sh_addr); |
| if (text_section) |
| byte_reader.SetTextBase(text_section->sh_addr); |
| |
| dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename, |
| section_name); |
| dwarf2reader::CallFrameInfo parser(cfi, cfi_size, |
| &byte_reader, &handler, &dwarf_reporter, |
| eh_frame); |
| parser.Start(); |
| return true; |
| } |
| |
| bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, |
| void** elf_header) { |
| int obj_fd = open(obj_file.c_str(), O_RDONLY); |
| if (obj_fd < 0) { |
| fprintf(stderr, "Failed to open ELF file '%s': %s\n", |
| obj_file.c_str(), strerror(errno)); |
| return false; |
| } |
| FDWrapper obj_fd_wrapper(obj_fd); |
| struct stat st; |
| if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { |
| fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", |
| obj_file.c_str(), strerror(errno)); |
| return false; |
| } |
| void* obj_base = mmap(NULL, st.st_size, |
| PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0); |
| if (obj_base == MAP_FAILED) { |
| fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", |
| obj_file.c_str(), strerror(errno)); |
| return false; |
| } |
| map_wrapper->set(obj_base, st.st_size); |
| *elf_header = obj_base; |
| if (!IsValidElf(*elf_header)) { |
| fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); |
| return false; |
| } |
| return true; |
| } |
| |
| // Get the endianness of ELF_HEADER. If it's invalid, return false. |
| template<typename ElfClass> |
| bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, |
| bool* big_endian) { |
| if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { |
| *big_endian = false; |
| return true; |
| } |
| if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { |
| *big_endian = true; |
| return true; |
| } |
| |
| fprintf(stderr, "bad data encoding in ELF header: %d\n", |
| elf_header->e_ident[EI_DATA]); |
| return false; |
| } |
| |
| // Given |left_abspath|, find the absolute path for |right_path| and see if the |
| // two absolute paths are the same. |
| bool IsSameFile(const char* left_abspath, const string& right_path) { |
| char right_abspath[PATH_MAX]; |
| if (!realpath(right_path.c_str(), right_abspath)) |
| return false; |
| return strcmp(left_abspath, right_abspath) == 0; |
| } |
| |
| // Read the .gnu_debuglink and get the debug file name. If anything goes |
| // wrong, return an empty string. |
| string ReadDebugLink(const uint8_t* debuglink, |
| const size_t debuglink_size, |
| const bool big_endian, |
| const string& obj_file, |
| const std::vector<string>& debug_dirs) { |
| // Include '\0' + CRC32 (4 bytes). |
| size_t debuglink_len = strlen(reinterpret_cast<const char*>(debuglink)) + 5; |
| debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round up to 4 bytes. |
| |
| // Sanity check. |
| if (debuglink_len != debuglink_size) { |
| fprintf(stderr, "Mismatched .gnu_debuglink string / section size: " |
| "%zx %zx\n", debuglink_len, debuglink_size); |
| return string(); |
| } |
| |
| char obj_file_abspath[PATH_MAX]; |
| if (!realpath(obj_file.c_str(), obj_file_abspath)) { |
| fprintf(stderr, "Cannot resolve absolute path for %s\n", obj_file.c_str()); |
| return string(); |
| } |
| |
| std::vector<string> searched_paths; |
| string debuglink_path; |
| std::vector<string>::const_iterator it; |
| for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) { |
| const string& debug_dir = *it; |
| debuglink_path = debug_dir + "/" + |
| reinterpret_cast<const char*>(debuglink); |
| |
| // There is the annoying case of /path/to/foo.so having foo.so as the |
| // debug link file name. Thus this may end up opening /path/to/foo.so again, |
| // and there is a small chance of the two files having the same CRC. |
| if (IsSameFile(obj_file_abspath, debuglink_path)) |
| continue; |
| |
| searched_paths.push_back(debug_dir); |
| int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY); |
| if (debuglink_fd < 0) |
| continue; |
| |
| FDWrapper debuglink_fd_wrapper(debuglink_fd); |
| |
| // The CRC is the last 4 bytes in |debuglink|. |
| const dwarf2reader::Endianness endianness = big_endian ? |
| dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE; |
| dwarf2reader::ByteReader byte_reader(endianness); |
| uint32_t expected_crc = |
| byte_reader.ReadFourBytes(&debuglink[debuglink_size - 4]); |
| |
| uint32_t actual_crc = 0; |
| while (true) { |
| const size_t kReadSize = 4096; |
| char buf[kReadSize]; |
| ssize_t bytes_read = HANDLE_EINTR(read(debuglink_fd, &buf, kReadSize)); |
| if (bytes_read < 0) { |
| fprintf(stderr, "Error reading debug ELF file %s.\n", |
| debuglink_path.c_str()); |
| return string(); |
| } |
| if (bytes_read == 0) |
| break; |
| actual_crc = google_breakpad::UpdateCrc32(actual_crc, buf, bytes_read); |
| } |
| if (actual_crc != expected_crc) { |
| fprintf(stderr, "Error reading debug ELF file - CRC32 mismatch: %s\n", |
| debuglink_path.c_str()); |
| continue; |
| } |
| |
| // Found debug file. |
| return debuglink_path; |
| } |
| |
| // Not found case. |
| fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n", |
| obj_file.c_str()); |
| for (it = searched_paths.begin(); it < searched_paths.end(); ++it) { |
| const string& debug_dir = *it; |
| fprintf(stderr, " %s/%s\n", debug_dir.c_str(), debuglink); |
| } |
| return string(); |
| } |
| |
| // |
| // LoadSymbolsInfo |
| // |
| // Holds the state between the two calls to LoadSymbols() in case it's necessary |
| // to follow the .gnu_debuglink section and load debug information from a |
| // different file. |
| // |
| template<typename ElfClass> |
| class LoadSymbolsInfo { |
| public: |
| typedef typename ElfClass::Addr Addr; |
| |
| explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) : |
| debug_dirs_(dbg_dirs), |
| has_loading_addr_(false) {} |
| |
| // Keeps track of which sections have been loaded so sections don't |
| // accidentally get loaded twice from two different files. |
| void LoadedSection(const string& section) { |
| if (loaded_sections_.count(section) == 0) { |
| loaded_sections_.insert(section); |
| } else { |
| fprintf(stderr, "Section %s has already been loaded.\n", |
| section.c_str()); |
| } |
| } |
| |
| // The ELF file and linked debug file are expected to have the same preferred |
| // loading address. |
| void set_loading_addr(Addr addr, const string& filename) { |
| if (!has_loading_addr_) { |
| loading_addr_ = addr; |
| loaded_file_ = filename; |
| return; |
| } |
| |
| if (addr != loading_addr_) { |
| fprintf(stderr, |
| "ELF file '%s' and debug ELF file '%s' " |
| "have different load addresses.\n", |
| loaded_file_.c_str(), filename.c_str()); |
| assert(false); |
| } |
| } |
| |
| // Setters and getters |
| const std::vector<string>& debug_dirs() const { |
| return debug_dirs_; |
| } |
| |
| string debuglink_file() const { |
| return debuglink_file_; |
| } |
| void set_debuglink_file(string file) { |
| debuglink_file_ = file; |
| } |
| |
| private: |
| const std::vector<string>& debug_dirs_; // Directories in which to |
| // search for the debug ELF file. |
| |
| string debuglink_file_; // Full path to the debug ELF file. |
| |
| bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. |
| |
| Addr loading_addr_; // Saves the preferred loading address from the |
| // first call to LoadSymbols(). |
| |
| string loaded_file_; // Name of the file loaded from the first call to |
| // LoadSymbols(). |
| |
| std::set<string> loaded_sections_; // Tracks the Loaded ELF sections |
| // between calls to LoadSymbols(). |
| }; |
| |
| template<typename ElfClass> |
| bool LoadSymbols(const string& obj_file, |
| const bool big_endian, |
| const typename ElfClass::Ehdr* elf_header, |
| const bool read_gnu_debug_link, |
| LoadSymbolsInfo<ElfClass>* info, |
| const DumpOptions& options, |
| Module* module) { |
| typedef typename ElfClass::Addr Addr; |
| typedef typename ElfClass::Phdr Phdr; |
| typedef typename ElfClass::Shdr Shdr; |
| |
| Addr loading_addr = GetLoadingAddress<ElfClass>( |
| GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), |
| elf_header->e_phnum); |
| module->SetLoadAddress(loading_addr); |
| info->set_loading_addr(loading_addr, obj_file); |
| |
| // Allow filtering of extraneous debug information in partitioned libraries. |
| // Such libraries contain debug information for all libraries extracted from |
| // the same combined library, implying extensive duplication. |
| vector<Module::Range> address_ranges = GetPtLoadSegmentRanges<ElfClass>( |
| GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), |
| elf_header->e_phnum); |
| module->SetAddressRanges(address_ranges); |
| |
| const Shdr* sections = |
| GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); |
| const Shdr* section_names = sections + elf_header->e_shstrndx; |
| const char* names = |
| GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); |
| const char* names_end = names + section_names->sh_size; |
| bool found_debug_info_section = false; |
| bool found_usable_info = false; |
| |
| if (options.symbol_data != ONLY_CFI) { |
| #ifndef NO_STABS_SUPPORT |
| // Look for STABS debugging information, and load it if present. |
| const Shdr* stab_section = |
| FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| if (stab_section) { |
| const Shdr* stabstr_section = stab_section->sh_link + sections; |
| if (stabstr_section) { |
| found_debug_info_section = true; |
| found_usable_info = true; |
| info->LoadedSection(".stab"); |
| if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section, |
| big_endian, module)) { |
| fprintf(stderr, "%s: \".stab\" section found, but failed to load" |
| " STABS debugging information\n", obj_file.c_str()); |
| } |
| } |
| } |
| #endif // NO_STABS_SUPPORT |
| |
| // Look for DWARF debugging information, and load it if present. |
| const Shdr* dwarf_section = |
| FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| |
| // .debug_info section type is SHT_PROGBITS for mips on pnacl toolchains, |
| // but MIPS_DWARF for regular gnu toolchains, so both need to be checked |
| if (elf_header->e_machine == EM_MIPS && !dwarf_section) { |
| dwarf_section = |
| FindElfSectionByName<ElfClass>(".debug_info", SHT_MIPS_DWARF, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| } |
| |
| if (dwarf_section) { |
| found_debug_info_section = true; |
| found_usable_info = true; |
| info->LoadedSection(".debug_info"); |
| if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian, |
| options.handle_inter_cu_refs, module)) { |
| fprintf(stderr, "%s: \".debug_info\" section found, but failed to load " |
| "DWARF debugging information\n", obj_file.c_str()); |
| } |
| } |
| |
| // See if there are export symbols available. |
| const Shdr* symtab_section = |
| FindElfSectionByName<ElfClass>(".symtab", SHT_SYMTAB, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| const Shdr* strtab_section = |
| FindElfSectionByName<ElfClass>(".strtab", SHT_STRTAB, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| if (symtab_section && strtab_section) { |
| info->LoadedSection(".symtab"); |
| |
| const uint8_t* symtab = |
| GetOffset<ElfClass, uint8_t>(elf_header, |
| symtab_section->sh_offset); |
| const uint8_t* strtab = |
| GetOffset<ElfClass, uint8_t>(elf_header, |
| strtab_section->sh_offset); |
| bool result = |
| ELFSymbolsToModule(symtab, |
| symtab_section->sh_size, |
| strtab, |
| strtab_section->sh_size, |
| big_endian, |
| ElfClass::kAddrSize, |
| module); |
| found_usable_info = found_usable_info || result; |
| } else { |
| // Look in dynsym only if full symbol table was not available. |
| const Shdr* dynsym_section = |
| FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| const Shdr* dynstr_section = |
| FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| if (dynsym_section && dynstr_section) { |
| info->LoadedSection(".dynsym"); |
| |
| const uint8_t* dynsyms = |
| GetOffset<ElfClass, uint8_t>(elf_header, |
| dynsym_section->sh_offset); |
| const uint8_t* dynstrs = |
| GetOffset<ElfClass, uint8_t>(elf_header, |
| dynstr_section->sh_offset); |
| bool result = |
| ELFSymbolsToModule(dynsyms, |
| dynsym_section->sh_size, |
| dynstrs, |
| dynstr_section->sh_size, |
| big_endian, |
| ElfClass::kAddrSize, |
| module); |
| found_usable_info = found_usable_info || result; |
| } |
| } |
| } |
| |
| if (options.symbol_data != NO_CFI) { |
| // Dwarf Call Frame Information (CFI) is actually independent from |
| // the other DWARF debugging information, and can be used alone. |
| const Shdr* dwarf_cfi_section = |
| FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| |
| // .debug_frame section type is SHT_PROGBITS for mips on pnacl toolchains, |
| // but MIPS_DWARF for regular gnu toolchains, so both need to be checked |
| if (elf_header->e_machine == EM_MIPS && !dwarf_cfi_section) { |
| dwarf_cfi_section = |
| FindElfSectionByName<ElfClass>(".debug_frame", SHT_MIPS_DWARF, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| } |
| |
| if (dwarf_cfi_section) { |
| // Ignore the return value of this function; even without call frame |
| // information, the other debugging information could be perfectly |
| // useful. |
| info->LoadedSection(".debug_frame"); |
| bool result = |
| LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame", |
| dwarf_cfi_section, false, 0, 0, big_endian, |
| module); |
| found_usable_info = found_usable_info || result; |
| } |
| |
| // Linux C++ exception handling information can also provide |
| // unwinding data. |
| const Shdr* eh_frame_section = |
| FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| if (eh_frame_section) { |
| // Pointers in .eh_frame data may be relative to the base addresses of |
| // certain sections. Provide those sections if present. |
| const Shdr* got_section = |
| FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| const Shdr* text_section = |
| FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS, |
| sections, names, names_end, |
| elf_header->e_shnum); |
| info->LoadedSection(".eh_frame"); |
| // As above, ignore the return value of this function. |
| bool result = |
| LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame", |
| eh_frame_section, true, |
| got_section, text_section, big_endian, module); |
| found_usable_info = found_usable_info || result; |
| } |
| } |
| |
| if (!found_debug_info_section) { |
| fprintf(stderr, "%s: file contains no debugging information" |
| " (no \".stab\" or \".debug_info\" sections)\n", |
| obj_file.c_str()); |
| |
| // Failed, but maybe there's a .gnu_debuglink section? |
| if (read_gnu_debug_link) { |
| const Shdr* gnu_debuglink_section |
| = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS, |
| sections, names, |
| names_end, elf_header->e_shnum); |
| if (gnu_debuglink_section) { |
| if (!info->debug_dirs().empty()) { |
| const uint8_t* debuglink_contents = |
| GetOffset<ElfClass, uint8_t>(elf_header, |
| gnu_debuglink_section->sh_offset); |
| string debuglink_file = |
| ReadDebugLink(debuglink_contents, |
| gnu_debuglink_section->sh_size, |
| big_endian, |
| obj_file, |
| info->debug_dirs()); |
| info->set_debuglink_file(debuglink_file); |
| } else { |
| fprintf(stderr, ".gnu_debuglink section found in '%s', " |
| "but no debug path specified.\n", obj_file.c_str()); |
| } |
| } else { |
| fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n", |
| obj_file.c_str()); |
| } |
| } else { |
| // Return true if some usable information was found, since the caller |
| // doesn't want to use .gnu_debuglink. |
| return found_usable_info; |
| } |
| |
| // No debug info was found, let the user try again with .gnu_debuglink |
| // if present. |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // Return the breakpad symbol file identifier for the architecture of |
| // ELF_HEADER. |
| template<typename ElfClass> |
| const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { |
| typedef typename ElfClass::Half Half; |
| Half arch = elf_header->e_machine; |
| switch (arch) { |
| case EM_386: return "x86"; |
| case EM_ARM: return "arm"; |
| case EM_AARCH64: return "arm64"; |
| case EM_MIPS: return "mips"; |
| case EM_PPC64: return "ppc64"; |
| case EM_PPC: return "ppc"; |
| case EM_S390: return "s390"; |
| case EM_SPARC: return "sparc"; |
| case EM_SPARCV9: return "sparcv9"; |
| case EM_X86_64: return "x86_64"; |
| default: return NULL; |
| } |
| } |
| |
| template<typename ElfClass> |
| bool SanitizeDebugFile(const typename ElfClass::Ehdr* debug_elf_header, |
| const string& debuglink_file, |
| const string& obj_filename, |
| const char* obj_file_architecture, |
| const bool obj_file_is_big_endian) { |
| const char* debug_architecture = |
| ElfArchitecture<ElfClass>(debug_elf_header); |
| if (!debug_architecture) { |
| fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", |
| debuglink_file.c_str(), debug_elf_header->e_machine); |
| return false; |
| } |
| if (strcmp(obj_file_architecture, debug_architecture)) { |
| fprintf(stderr, "%s with ELF machine architecture %s does not match " |
| "%s with ELF architecture %s\n", |
| debuglink_file.c_str(), debug_architecture, |
| obj_filename.c_str(), obj_file_architecture); |
| return false; |
| } |
| bool debug_big_endian; |
| if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian)) |
| return false; |
| if (debug_big_endian != obj_file_is_big_endian) { |
| fprintf(stderr, "%s and %s does not match in endianness\n", |
| obj_filename.c_str(), debuglink_file.c_str()); |
| return false; |
| } |
| return true; |
| } |
| |
| template<typename ElfClass> |
| bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header, |
| const string& obj_filename, |
| const string& obj_os, |
| scoped_ptr<Module>& module) { |
| PageAllocator allocator; |
| wasteful_vector<uint8_t> identifier(&allocator, kDefaultBuildIdSize); |
| if (!FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { |
| fprintf(stderr, "%s: unable to generate file identifier\n", |
| obj_filename.c_str()); |
| return false; |
| } |
| |
| const char* architecture = ElfArchitecture<ElfClass>(elf_header); |
| if (!architecture) { |
| fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", |
| obj_filename.c_str(), elf_header->e_machine); |
| return false; |
| } |
| |
| char name_buf[NAME_MAX] = {}; |
| std::string name = google_breakpad::ElfFileSoNameFromMappedFile( |
| elf_header, name_buf, sizeof(name_buf)) |
| ? name_buf |
| : google_breakpad::BaseName(obj_filename); |
| |
| // Add an extra "0" at the end. PDB files on Windows have an 'age' |
| // number appended to the end of the file identifier; this isn't |
| // really used or necessary on other platforms, but be consistent. |
| string id = FileID::ConvertIdentifierToUUIDString(identifier) + "0"; |
| // This is just the raw Build ID in hex. |
| string code_id = FileID::ConvertIdentifierToString(identifier); |
| |
| module.reset(new Module(name, obj_os, architecture, id, code_id)); |
| |
| return true; |
| } |
| |
| template<typename ElfClass> |
| bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, |
| const string& obj_filename, |
| const string& obj_os, |
| const std::vector<string>& debug_dirs, |
| const DumpOptions& options, |
| Module** out_module) { |
| typedef typename ElfClass::Ehdr Ehdr; |
| |
| *out_module = NULL; |
| |
| scoped_ptr<Module> module; |
| if (!InitModuleForElfClass<ElfClass>(elf_header, obj_filename, obj_os, |
| module)) { |
| return false; |
| } |
| |
| // Figure out what endianness this file is. |
| bool big_endian; |
| if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) |
| return false; |
| |
| LoadSymbolsInfo<ElfClass> info(debug_dirs); |
| if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header, |
| !debug_dirs.empty(), &info, |
| options, module.get())) { |
| const string debuglink_file = info.debuglink_file(); |
| if (debuglink_file.empty()) |
| return false; |
| |
| // Load debuglink ELF file. |
| fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); |
| MmapWrapper debug_map_wrapper; |
| Ehdr* debug_elf_header = NULL; |
| if (!LoadELF(debuglink_file, &debug_map_wrapper, |
| reinterpret_cast<void**>(&debug_elf_header)) || |
| !SanitizeDebugFile<ElfClass>(debug_elf_header, debuglink_file, |
| obj_filename, |
| module->architecture().c_str(), |
| big_endian)) { |
| return false; |
| } |
| |
| if (!LoadSymbols<ElfClass>(debuglink_file, big_endian, |
| debug_elf_header, false, &info, |
| options, module.get())) { |
| return false; |
| } |
| } |
| |
| *out_module = module.release(); |
| return true; |
| } |
| |
| } // namespace |
| |
| namespace google_breakpad { |
| |
| // Not explicitly exported, but not static so it can be used in unit tests. |
| bool ReadSymbolDataInternal(const uint8_t* obj_file, |
| const string& obj_filename, |
| const string& obj_os, |
| const std::vector<string>& debug_dirs, |
| const DumpOptions& options, |
| Module** module) { |
| if (!IsValidElf(obj_file)) { |
| fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); |
| return false; |
| } |
| |
| int elfclass = ElfClass(obj_file); |
| if (elfclass == ELFCLASS32) { |
| return ReadSymbolDataElfClass<ElfClass32>( |
| reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, obj_os, |
| debug_dirs, options, module); |
| } |
| if (elfclass == ELFCLASS64) { |
| return ReadSymbolDataElfClass<ElfClass64>( |
| reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, obj_os, |
| debug_dirs, options, module); |
| } |
| |
| return false; |
| } |
| |
| bool WriteSymbolFile(const string& load_path, |
| const string& obj_file, |
| const string& obj_os, |
| const std::vector<string>& debug_dirs, |
| const DumpOptions& options, |
| std::ostream& sym_stream) { |
| Module* module; |
| if (!ReadSymbolData(load_path, obj_file, obj_os, debug_dirs, options, |
| &module)) |
| return false; |
| |
| bool result = module->Write(sym_stream, options.symbol_data); |
| delete module; |
| return result; |
| } |
| |
| // Read the selected object file's debugging information, and write out the |
| // header only to |stream|. Return true on success; if an error occurs, report |
| // it and return false. |
| bool WriteSymbolFileHeader(const string& load_path, |
| const string& obj_file, |
| const string& obj_os, |
| std::ostream& sym_stream) { |
| MmapWrapper map_wrapper; |
| void* elf_header = NULL; |
| if (!LoadELF(load_path, &map_wrapper, &elf_header)) { |
| fprintf(stderr, "Could not load ELF file: %s\n", obj_file.c_str()); |
| return false; |
| } |
| |
| if (!IsValidElf(elf_header)) { |
| fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); |
| return false; |
| } |
| |
| int elfclass = ElfClass(elf_header); |
| scoped_ptr<Module> module; |
| if (elfclass == ELFCLASS32) { |
| if (!InitModuleForElfClass<ElfClass32>( |
| reinterpret_cast<const Elf32_Ehdr*>(elf_header), obj_file, obj_os, |
| module)) { |
| fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str()); |
| return false; |
| } |
| } else if (elfclass == ELFCLASS64) { |
| if (!InitModuleForElfClass<ElfClass64>( |
| reinterpret_cast<const Elf64_Ehdr*>(elf_header), obj_file, obj_os, |
| module)) { |
| fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str()); |
| return false; |
| } |
| } else { |
| fprintf(stderr, "Unsupported module file: %s\n", obj_file.c_str()); |
| return false; |
| } |
| |
| return module->Write(sym_stream, ALL_SYMBOL_DATA); |
| } |
| |
| bool ReadSymbolData(const string& load_path, |
| const string& obj_file, |
| const string& obj_os, |
| const std::vector<string>& debug_dirs, |
| const DumpOptions& options, |
| Module** module) { |
| MmapWrapper map_wrapper; |
| void* elf_header = NULL; |
| if (!LoadELF(load_path, &map_wrapper, &elf_header)) |
| return false; |
| |
| return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header), |
| obj_file, obj_os, debug_dirs, options, module); |
| } |
| |
| } // namespace google_breakpad |