chromium/tools/android/memdump/memsymbols.py

#!/usr/bin/env python
#
# Copyright 2013 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

from __future__ import print_function

import base64
import os
import sys
import re

from optparse import OptionParser

"""Extracts the list of resident symbols of a library loaded in a process.

This scripts combines the extended output of memdump for a given process
(obtained through memdump -x PID) and the symbol table of a .so loaded in that
process (obtained through nm -C lib-with-symbols.so), filtering out only those
symbols that, at the time of the snapshot, were resident in memory (that are,
the symbols which start address belongs to a mapped page of the .so which was
resident at the time of the snapshot).
The aim is to perform a "code coverage"-like profiling of a binary, intersecting
run-time information (list of resident pages) and debug symbols.
"""

_PAGE_SIZE = 4096


def _TestBit(word, bit):
  assert(bit >= 0 and bit < 8)
  return not not ((word >> bit) & 1)


def _HexAddr(addr):
  return hex(addr)[2:].zfill(8)


def _GetResidentPagesSet(memdump_contents, lib_name, verbose):
  """Parses the memdump output and extracts the resident page set for lib_name.
  Args:
    memdump_contents: Array of strings (lines) of a memdump output.
    lib_name: A string containing the name of the library.so to be matched.
    verbose: Print a verbose header for each mapping matched.

  Returns:
    A set of resident pages (the key is the page index) for all the
    mappings matching .*lib_name.
  """
  resident_pages = set()
  MAP_RX = re.compile(
      r'^([0-9a-f]+)-([0-9a-f]+) ([\w-]+) ([0-9a-f]+) .* "(.*)" \[(.*)\]$')
  for line in memdump_contents:
    line = line.rstrip('\r\n')
    if line.startswith('[ PID'):
      continue

    r = MAP_RX.match(line)
    if not r:
      sys.stderr.write('Skipping %s from %s\n' % (line, memdump_file))
      continue

    map_start = int(r.group(1), 16)
    map_end = int(r.group(2), 16)
    prot = r.group(3)
    offset = int(r.group(4), 16)
    assert(offset % _PAGE_SIZE == 0)
    lib = r.group(5)
    enc_bitmap = r.group(6)

    if not lib.endswith(lib_name):
      continue

    bitmap = base64.b64decode(enc_bitmap)
    map_pages_count = (map_end - map_start + 1) / _PAGE_SIZE
    bitmap_pages_count = len(bitmap) * 8

    if verbose:
      print('Found %s: mapped %d pages in mode %s @ offset %s.' %
            (lib, map_pages_count, prot, _HexAddr(offset)))
      print(' Map range in the process VA: [%s - %s]. Len: %s' %
            (_HexAddr(map_start), _HexAddr(map_end),
             _HexAddr(map_pages_count * _PAGE_SIZE)))
      print(' Corresponding addresses in the binary: [%s - %s]. Len: %s' %
            (_HexAddr(offset), _HexAddr(offset + map_end - map_start),
             _HexAddr(map_pages_count * _PAGE_SIZE)))
      print(' Bitmap: %d pages' % bitmap_pages_count)
      print('')

    assert(bitmap_pages_count >= map_pages_count)
    for i in xrange(map_pages_count):
      bitmap_idx = i / 8
      bitmap_off = i % 8
      if (bitmap_idx < len(bitmap) and
          _TestBit(ord(bitmap[bitmap_idx]), bitmap_off)):
        resident_pages.add(offset / _PAGE_SIZE + i)
  return resident_pages


def main(argv):
  NM_RX = re.compile(r'^([0-9a-f]+)\s+.*$')

  parser = OptionParser()
  parser.add_option("-r", "--reverse",
                    action="store_true", dest="reverse", default=False,
                    help="Print out non present symbols")
  parser.add_option("-v", "--verbose",
                    action="store_true", dest="verbose", default=False,
                    help="Print out verbose debug information.")

  (options, args) = parser.parse_args()

  if len(args) != 3:
    print('Usage: %s [-v] memdump.file nm.file library.so' % (os.path.basename(
        argv[0])))
    return 1

  memdump_file = args[0]
  nm_file = args[1]
  lib_name = args[2]

  if memdump_file == '-':
    memdump_contents = sys.stdin.readlines()
  else:
    memdump_contents = open(memdump_file, 'r').readlines()
  resident_pages = _GetResidentPagesSet(memdump_contents,
                                        lib_name,
                                        options.verbose)

  # Process the nm symbol table, filtering out the resident symbols.
  nm_fh = open(nm_file, 'r')
  for line in nm_fh:
    line = line.rstrip('\r\n')
    # Skip undefined symbols (lines with no address).
    if line.startswith(' '):
      continue

    r = NM_RX.match(line)
    if not r:
      sys.stderr.write('Skipping %s from %s\n' % (line, nm_file))
      continue

    sym_addr = int(r.group(1), 16)
    sym_page = sym_addr / _PAGE_SIZE
    last_sym_matched = (sym_page in resident_pages)
    if (sym_page in resident_pages) != options.reverse:
      print(line)
  return 0

if __name__ == '__main__':
  sys.exit(main(sys.argv))