// Copyright 2018 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // This library provides Symbolize() function that symbolizes program // counters to their corresponding symbol names on linux platforms. // This library has a minimal implementation of an ELF symbol table // reader (i.e. it doesn't depend on libelf, etc.). // // The algorithm used in Symbolize() is as follows. // // 1. Go through a list of maps in /proc/self/maps and find the map // containing the program counter. // // 2. Open the mapped file and find a regular symbol table inside. // Iterate over symbols in the symbol table and look for the symbol // containing the program counter. If such a symbol is found, // obtain the symbol name, and demangle the symbol if possible. // If the symbol isn't found in the regular symbol table (binary is // stripped), try the same thing with a dynamic symbol table. // // Note that Symbolize() is originally implemented to be used in // signal handlers, hence it doesn't use malloc() and other unsafe // operations. It should be both thread-safe and async-signal-safe. // // Implementation note: // // We don't use heaps but only use stacks. We want to reduce the // stack consumption so that the symbolizer can run on small stacks. // // Here are some numbers collected with GCC 4.1.0 on x86: // - sizeof(Elf32_Sym) = 16 // - sizeof(Elf32_Shdr) = 40 // - sizeof(Elf64_Sym) = 24 // - sizeof(Elf64_Shdr) = 64 // // This implementation is intended to be async-signal-safe but uses some // functions which are not guaranteed to be so, such as memchr() and // memmove(). We assume they are async-signal-safe. #include <dlfcn.h> #include <elf.h> #include <fcntl.h> #include <link.h> // For ElfW() macro. #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> #include <algorithm> #include <array> #include <atomic> #include <cerrno> #include <cinttypes> #include <climits> #include <cstdint> #include <cstdio> #include <cstdlib> #include <cstring> #include "absl/base/casts.h" #include "absl/base/dynamic_annotations.h" #include "absl/base/internal/low_level_alloc.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/spinlock.h" #include "absl/base/port.h" #include "absl/debugging/internal/demangle.h" #include "absl/debugging/internal/vdso_support.h" #include "absl/strings/string_view.h" #if defined(__FreeBSD__) && !defined(ElfW) #define ElfW … #endif namespace absl { ABSL_NAMESPACE_BEGIN // Value of argv[0]. Used by MaybeInitializeObjFile(). static char *argv0_value = …; void InitializeSymbolizer(const char *argv0) { … } namespace debugging_internal { namespace { // Re-runs fn until it doesn't cause EINTR. #define NO_INTR(fn) … // On Linux, ELF_ST_* are defined in <linux/elf.h>. To make this portable // we define our own ELF_ST_BIND and ELF_ST_TYPE if not available. #ifndef ELF_ST_BIND #define ELF_ST_BIND(info) … #endif #ifndef ELF_ST_TYPE #define ELF_ST_TYPE(info) … #endif // Some platforms use a special .opd section to store function pointers. const char kOpdSectionName[] = …; #if (defined(__powerpc__) && !(_CALL_ELF > 1)) || defined(__ia64) // Use opd section for function descriptors on these platforms, the function // address is the first word of the descriptor. enum { kPlatformUsesOPDSections = 1 }; #else // not PPC or IA64 enum { … }; #endif // This works for PowerPC & IA64 only. A function descriptor consist of two // pointers and the first one is the function's entry. const size_t kFunctionDescriptorSize = …; const int kMaxDecorators = …; // Seems like a reasonable upper limit. struct InstalledSymbolDecorator { … }; int g_num_decorators; InstalledSymbolDecorator g_decorators[kMaxDecorators]; struct FileMappingHint { … }; // Protects g_decorators. // We are using SpinLock and not a Mutex here, because we may be called // from inside Mutex::Lock itself, and it prohibits recursive calls. // This happens in e.g. base/stacktrace_syscall_unittest. // Moreover, we are using only TryLock(), if the decorator list // is being modified (is busy), we skip all decorators, and possibly // loose some info. Sorry, that's the best we could do. ABSL_CONST_INIT absl::base_internal::SpinLock g_decorators_mu( absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); const int kMaxFileMappingHints = …; int g_num_file_mapping_hints; FileMappingHint g_file_mapping_hints[kMaxFileMappingHints]; // Protects g_file_mapping_hints. ABSL_CONST_INIT absl::base_internal::SpinLock g_file_mapping_mu( absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); // Async-signal-safe function to zero a buffer. // memset() is not guaranteed to be async-signal-safe. static void SafeMemZero(void* p, size_t size) { … } struct ObjFile { … }; // Build 4-way associative cache for symbols. Within each cache line, symbols // are replaced in LRU order. enum { … }; struct SymbolCacheLine { … }; // --------------------------------------------------------------- // An async-signal-safe arena for LowLevelAlloc static std::atomic<base_internal::LowLevelAlloc::Arena *> g_sig_safe_arena; static base_internal::LowLevelAlloc::Arena *SigSafeArena() { … } static void InitSigSafeArena() { … } // --------------------------------------------------------------- // An AddrMap is a vector of ObjFile, using SigSafeArena() for allocation. class AddrMap { … }; void AddrMap::Clear() { … } ObjFile *AddrMap::Add() { … } class CachingFile { … }; // --------------------------------------------------------------- enum FindSymbolResult { … }; class Symbolizer { … }; static std::atomic<Symbolizer *> g_cached_symbolizer; } // namespace static size_t SymbolizerSize() { … } // Return (and set null) g_cached_symbolized_state if it is not null. // Otherwise return a new symbolizer. static Symbolizer *AllocateSymbolizer() { … } // Set g_cached_symbolize_state to s if it is null, otherwise // delete s. static void FreeSymbolizer(Symbolizer *s) { … } Symbolizer::Symbolizer() : … { … } Symbolizer::~Symbolizer() { … } // We don't use assert() since it's not guaranteed to be // async-signal-safe. Instead we define a minimal assertion // macro. So far, we don't need pretty printing for __FILE__, etc. #define SAFE_ASSERT(expr) … // Read up to "count" bytes from file descriptor "fd" into the buffer // starting at "buf" while handling short reads and EINTR. On // success, return the number of bytes read. Otherwise, return -1. static ssize_t ReadPersistent(int fd, void *buf, size_t count) { … } // Read up to "count" bytes from "offset" into the buffer starting at "buf", // while handling short reads and EINTR. On success, return the number of bytes // read. Otherwise, return -1. ssize_t CachingFile::ReadFromOffset(void *buf, size_t count, off_t offset) { … } // Try reading exactly "count" bytes from "offset" bytes into the buffer // starting at "buf" while handling short reads and EINTR. On success, return // true. Otherwise, return false. bool CachingFile::ReadFromOffsetExact(void *buf, size_t count, off_t offset) { … } // Returns elf_header.e_type if the file pointed by fd is an ELF binary. static int FileGetElfType(CachingFile *file) { … } // Read the section headers in the given ELF binary, and if a section // of the specified type is found, set the output to this section header // and return true. Otherwise, return false. // To keep stack consumption low, we would like this function to not get // inlined. static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( CachingFile *file, ElfW(Half) sh_num, const off_t sh_offset, ElfW(Word) type, ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) { … } // There is no particular reason to limit section name to 63 characters, // but there has (as yet) been no need for anything longer either. const int kMaxSectionNameLen = …; // Small cache to use for miscellaneous file reads. const int kSmallFileCacheSize = …; bool ForEachSection(int fd, const std::function<bool(absl::string_view name, const ElfW(Shdr) &)> &callback) { … } // name_len should include terminating '\0'. bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, ElfW(Shdr) * out) { … } // Compare symbols at in the same address. // Return true if we should pick symbol1. static bool ShouldPickFirstSymbol(const ElfW(Sym) & symbol1, const ElfW(Sym) & symbol2) { … } // Return true if an address is inside a section. static bool InSection(const void *address, ptrdiff_t relocation, const ElfW(Shdr) * section) { … } static const char *ComputeOffset(const char *base, ptrdiff_t offset) { … } // Read a symbol table and look for the symbol containing the // pc. Iterate over symbols in a symbol table and look for the symbol // containing "pc". If the symbol is found, and its name fits in // out_size, the name is written into out and SYMBOL_FOUND is returned. // If the name does not fit, truncated name is written into out, // and SYMBOL_TRUNCATED is returned. Out is NUL-terminated. // If the symbol is not found, SYMBOL_NOT_FOUND is returned; // To keep stack consumption low, we would like this function to not get // inlined. static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( const void *const pc, CachingFile *file, char *out, size_t out_size, ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab, const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) { … } // Get the symbol name of "pc" from the file pointed by "fd". Process // both regular and dynamic symbol tables if necessary. // See FindSymbol() comment for description of return value. FindSymbolResult Symbolizer::GetSymbolFromObjectFile( const ObjFile &obj, const void *const pc, const ptrdiff_t relocation, char *out, size_t out_size, char *tmp_buf, size_t tmp_buf_size) { … } namespace { // Thin wrapper around a file descriptor so that the file descriptor // gets closed for sure. class FileDescriptor { … }; // Helper class for reading lines from file. // // Note: we don't use ProcMapsIterator since the object is big (it has // a 5k array member) and uses async-unsafe functions such as sscanf() // and snprintf(). class LineReader { … }; } // namespace // Place the hex number read from "start" into "*hex". The pointer to // the first non-hex character or "end" is returned. static const char *GetHex(const char *start, const char *end, uint64_t *const value) { … } static const char *GetHex(const char *start, const char *end, const void **const addr) { … } // Normally we are only interested in "r?x" maps. // On the PowerPC, function pointers point to descriptors in the .opd // section. The descriptors themselves are not executable code, so // we need to relax the check below to "r??". static bool ShouldUseMapping(const char *const flags) { … } // Read /proc/self/maps and run "callback" for each mmapped file found. If // "callback" returns false, stop scanning and return true. Else continue // scanning /proc/self/maps. Return true if no parse error is found. static ABSL_ATTRIBUTE_NOINLINE bool ReadAddrMap( bool (*callback)(const char *filename, const void *const start_addr, const void *const end_addr, uint64_t offset, void *arg), void *arg, void *tmp_buf, size_t tmp_buf_size) { … } // Find the objfile mapped in address region containing [addr, addr + len). ObjFile *Symbolizer::FindObjFile(const void *const addr, size_t len) { … } void Symbolizer::ClearAddrMap() { … } // Callback for ReadAddrMap to register objfiles in an in-memory table. bool Symbolizer::RegisterObjFile(const char *filename, const void *const start_addr, const void *const end_addr, uint64_t offset, void *arg) { … } // This function wraps the Demangle function to provide an interface // where the input symbol is demangled in-place. // To keep stack consumption low, we would like this function to not // get inlined. static ABSL_ATTRIBUTE_NOINLINE void DemangleInplace(char *out, size_t out_size, char *tmp_buf, size_t tmp_buf_size) { … } SymbolCacheLine *Symbolizer::GetCacheLine(const void *const pc) { … } void Symbolizer::AgeSymbols(SymbolCacheLine *line) { … } const char *Symbolizer::FindSymbolInCache(const void *const pc) { … } const char *Symbolizer::InsertSymbolInCache(const void *const pc, const char *name) { … } static void MaybeOpenFdFromSelfExe(ObjFile *obj) { … } static bool MaybeInitializeObjFile(ObjFile *obj) { … } // The implementation of our symbolization routine. If it // successfully finds the symbol containing "pc" and obtains the // symbol name, returns pointer to that symbol. Otherwise, returns nullptr. // If any symbol decorators have been installed via InstallSymbolDecorator(), // they are called here as well. // To keep stack consumption low, we would like this function to not // get inlined. const char *Symbolizer::GetUncachedSymbol(const void *pc) { … } const char *Symbolizer::GetSymbol(const void *pc) { … } bool RemoveAllSymbolDecorators(void) { … } bool RemoveSymbolDecorator(int ticket) { … } int InstallSymbolDecorator(SymbolDecorator decorator, void *arg) { … } bool RegisterFileMappingHint(const void *start, const void *end, uint64_t offset, const char *filename) { … } bool GetFileMappingHint(const void **start, const void **end, uint64_t *offset, const char **filename) { … } } // namespace debugging_internal bool Symbolize(const void *pc, char *out, int out_size) { … } ABSL_NAMESPACE_END } // namespace absl extern "C" bool AbslInternalGetFileMappingHint(const void **start, const void **end, uint64_t *offset, const char **filename) { … }