#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Runs dwarfdump on passed-in .so."""
import argparse
import bisect
import dataclasses
import logging
import os
import re
import subprocess
import typing
import path_util
_DWARF_DUMP_FLAGS = ['--debug-info', '--recurse-depth=0']
# Matching and group examples:
# '0x00001234: DW_TAG_compile_unit' -> None
# ' DW_AT_low_pc (0x123)' -> ('DW_', None)
# ' DW_AT_name ("foo")' -> ('DW_', 'foo')
_RE_DW_AT_NAME = re.compile(r'\s+(DW_)(?:AT_name\s+\("(.*?)"\))?')
class _DwoNameLookup:
"""Helper to look up name (source file) from .dwo files
dwarfdump of an ELF file normally specifies source files in DW_AT_name fields.
However, debug fission can move debug info from ELF files to .dwo files. In
this case, dwarfdump would omit DW_AT_name of affected symbols, and use
DW_AT_GNU_dwo_name to specify the path (relative to output dir) of the
matching .dwo files, whose dwarfdump would then specify the matching source
file in DW_AT_name.
This class performs cached lookup from .dwo to name (source file).
"""
def __init__(self, any_path):
finder = path_util.OutputDirectoryFinder(
any_path_within_output_directory=any_path)
self._output_path = finder.Detect() # May be None.
self._dwarf_dump_path = path_util.GetDwarfdumpPath()
self._cache = {}
def _ReadName(self, dwo_path):
"""Runs dwarfdump on .dwo to extract name.
If this is not possible then returns |dwo_path|.
"""
if self._output_path is None:
return dwo_path
# Assumption: |dwo_path| is relative to output path.
real_dwo_path = os.path.join(self._output_path, dwo_path)
cmd = [self._dwarf_dump_path, real_dwo_path] + _DWARF_DUMP_FLAGS
proc = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
encoding='utf-8')
name = None
state = 0
# Scan output line by line, exit and terminate as soon as possible.
for line in iter(proc.stdout.readline, ''):
if state == 0: # Scan for DW_TAG_compile_unit.
if 'DW_TAG_compile_unit' in line:
state = 1
elif state == 1: # scan for DW_AT_name.
m = _RE_DW_AT_NAME.match(line)
if not m: # Not even matching prefix ' DW_'.
break
name = m.groups()[1]
if name is not None: # Extracted names.
break
# Else matches ' DW_': Continue scanning.
proc.kill()
return dwo_path if name is None else name
def Lookup(self, dwo_path):
"""Looks up name in .dwo, with caching."""
if dwo_path in self._cache:
name = self._cache[dwo_path]
else:
name = self._ReadName(dwo_path)
self._cache[dwo_path] = name
return name
def LogStats(self):
if self._cache:
num_success = sum(1 for k, v in self._cache.items() if k != v)
logging.info('Successful .dwo lookups: %d / %d', num_success,
len(self._cache))
@dataclasses.dataclass(order=True)
class _AddressRange:
start: int
stop: int
class _SourceMapper:
def __init__(self, range_info_list):
self._range_info_list = range_info_list
self._largest_address = 0
if self._range_info_list:
self._largest_address = self._range_info_list[-1][0].stop
def FindSourceForTextAddress(self, address):
"""Returns source file path matching passed-in symbol address.
Only symbols in the .text section of the elf file are supported.
"""
# Bisect against stop = self._largest_address + 1 to avoid bisecting against
# the "source path" tuple component.
bisect_index = bisect.bisect_right(
self._range_info_list,
(_AddressRange(address, self._largest_address + 1), '')) - 1
if bisect_index >= 0:
info = self._range_info_list[bisect_index]
if info[0].start <= address < info[0].stop:
return info[1]
return ''
def NumberOfPaths(self):
return len(set(info[1] for info in self._range_info_list))
@property
def num_ranges(self):
return len(self._range_info_list)
def CreateAddressSourceMapper(elf_path):
"""Runs dwarfdump. Returns object for querying source path given address."""
return _SourceMapper(_Parse(elf_path))
def CreateAddressSourceMapperForTest(lines, dwo_name_lookup=None):
return _SourceMapper(_ParseDumpOutput(lines, dwo_name_lookup))
def ParseDumpOutputForTest(lines, dwo_name_lookup=None):
return _ParseDumpOutput(lines, dwo_name_lookup)
def _Parse(elf_path):
cmd = [path_util.GetDwarfdumpPath(), elf_path] + _DWARF_DUMP_FLAGS
logging.debug('Running: %s', ' '.join(cmd))
stdout = subprocess.check_output(cmd,
stderr=subprocess.DEVNULL,
encoding='utf-8')
return _ParseDumpOutput(stdout.splitlines(), _DwoNameLookup(elf_path))
def _ParseDumpOutput(lines, dwo_name_lookup=None):
"""Parses passed-in dwarfdump stdout."""
# List of (_AddressRange, source path) tuples.
range_info_list = []
line_it = iter(lines)
line = next(line_it, None)
while line is not None:
if 'DW_TAG_compile_unit' not in line:
line = next(line_it, None)
continue
line, address_ranges, source_path, dwo_path = _ParseCompileUnit(line_it)
if (source_path or dwo_path) and address_ranges:
for address_range in address_ranges:
if dwo_path:
source_path = (dwo_name_lookup.Lookup(dwo_path)
if dwo_name_lookup else dwo_path)
range_info_list.append((address_range, source_path))
if dwo_name_lookup:
dwo_name_lookup.LogStats()
return sorted(range_info_list)
def _ParseCompileUnit(line_it):
"""Parses DW_AT_compile_unit block.
Example:
0x000026: DW_AT_compile_unit
DW_AT_low_pc (0x02f)
DW_AT_high_pc (0x03f)
DW_AT_name ("foo.cc")
DW_AT_GNU_dwo_name ("foo.dwo")
"""
source_path = None
dwo_path = None
single_range = _AddressRange(0, 0)
range_addresses = []
while True:
line = next(line_it, None)
dw_index = 0 if line is None else line.find('DW_')
if dw_index < 0:
continue
if line is None or line.startswith('DW_TAG', dw_index):
if range_addresses:
# If compile unit specifies both DW_AT_ranges and DW_AT_low_pc,
# DW_AT_low_pc is base offset. Base offset is currently unsupported.
assert single_range.start == 0
elif single_range.start > 0:
range_addresses.append(single_range)
return (line, range_addresses, source_path, dwo_path)
if line.startswith('DW_AT_low_pc', dw_index):
single_range.start = int(_ExtractDwValue(line), 16)
if single_range.stop == 0:
single_range.stop = single_range.start + 1
elif line.startswith('DW_AT_high_pc', dw_index):
single_range.stop = int(_ExtractDwValue(line), 16)
elif line.startswith('DW_AT_name', dw_index):
source_path = _ExtractDwValue(line)
elif line.startswith('DW_AT_GNU_dwo_name', dw_index):
dwo_path = _ExtractDwValue(line)
elif line.startswith('DW_AT_ranges', dw_index):
range_addresses = _ParseRanges(line_it)
def _ParseRanges(line_it):
"""Parses DW_AT_ranges from dwarfdump stdout.
Example:
[0x1, 0x2)
[0x5, 0x10))
"""
range_addresses = []
line = next(line_it, None)
while line is not None:
num_opening_brackets = line.count('(') + line.count('[')
num_closing_brackets = line.count(')') + line.count(']')
tokens = line.strip('([]) \t').split(',')
if len(tokens) == 2:
start_address = int(tokens[0], 16)
end_address = int(tokens[1], 16)
# Dwarf spec does not assign special meaning to empty ranges.
if start_address != end_address:
range_addresses.append(_AddressRange(start_address, end_address))
if num_closing_brackets > num_opening_brackets:
break
line = next(line_it, None)
return range_addresses
def _ExtractDwValue(line):
"""Extract DW_AT_ value from dwarfdump stdout.
Examples:
DW_AT_name ("foo.cc")
DW_AT_decl_line (177)
DW_AT_low_pc (0x2)
"""
lparen_index = line.rfind('(')
if lparen_index < 0:
return None
rparen_index = line.find(')', lparen_index + 1)
if rparen_index < 0:
return None
if (lparen_index < rparen_index - 2 and line[lparen_index + 1] == '"'
and line[rparen_index - 1] == '"'):
lparen_index += 1
rparen_index -= 1
return line[lparen_index + 1:rparen_index]
def main():
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--dwarf-dump-output', type=os.path.realpath)
group.add_argument('--elf-file', type=os.path.realpath)
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG,
format='%(levelname).1s %(relativeCreated)6d %(message)s')
if args.dwarf_dump_output:
dwo_name_lookup = _DwoNameLookup(args.dwarf_dump_output)
with open(args.dwarf_dump_output, 'r') as f:
source_mapper = CreateAddressSourceMapperForTest(f.read().splitlines(),
dwo_name_lookup)
else:
assert args.elf_file
source_mapper = CreateAddressSourceMapper(args.elf_file)
logging.warning('Found %d source paths across %d ranges',
source_mapper.NumberOfPaths(), source_mapper.num_ranges)
if __name__ == '__main__':
main()