// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// This tool scans a PDB file and prints out information about 'interesting'
// global variables. This includes duplicates and large globals. This is often
// helpful in understanding code bloat or finding inefficient globals.
//
// Duplicate global variables often happen when constructs like this are placed
// in a header file:
//
// const double sqrt_two = sqrt(2.0);
//
// Many (although usually not all) of the translation units that include this
// header file will get a copy of sqrt_two, possibly including an initializer.
// Because 'const' implies 'static' there are no warnings or errors from the
// linker. This duplication can happen with float/double, structs and classes,
// and arrays - any non-integral type.
//
// With C++ 17 these problems can often be fixed by adding an inline keyword:
//
// const inline double sqrt_two = sqrt(2.0);
//
// constexpr would be even better in order to ensure that initializations are
// not being done at runtime.
//
// Note that the linker will coalesce identical constant variables in some
// cases, leaving multiple symbol entries pointing at a single global. This is
// the global-variable version of code folding (/OPT:ICF). If the argument
// --show_folded_constants is passed then these will be displayed. Otherwise
// they will be silently suppressed as not being interesting because they aren't
// actually wasting space.
//
// Global variables are not necessarily a problem but it is useful to understand
// them, and monitoring their changes can be instructive.
#include <atlbase.h>
#include <dia2.h>
#include <stdio.h>
#include <wrl/client.h>
#include <algorithm>
#include <string>
#include <vector>
// Helper function for comparing strings - returns a strcmp/wcscmp compatible
// value.
int StringCompare(const std::wstring& lhs, const std::wstring& rhs) {
return wcscmp(lhs.c_str(), rhs.c_str());
}
// Use this struct to record data about symbols for sorting and analysis.
struct SymbolData {
SymbolData(ULONGLONG size, DWORD section, DWORD offset, const wchar_t* name)
: size(size), section(section), offset(offset), name(name) {}
ULONGLONG size;
DWORD section;
DWORD offset;
std::wstring name;
};
// Comparison function for when sorting symbol data by name, in order to allow
// looking for duplicate symbols. It uses the symbol size as a tiebreaker. This
// is necessary because sometimes there are symbols with matching names but
// different sizes in which case they aren't actually duplicates. These false
// positives happen because namespaces are omitted from the symbol names that
// DIA2 returns.
bool NameCompare(const SymbolData& lhs, const SymbolData& rhs) {
int nameCompare = StringCompare(lhs.name, rhs.name);
if (nameCompare == 0)
return lhs.size < rhs.size;
return nameCompare < 0;
}
// Comparison function for when sorting symbols by size, in order to allow
// finding the largest global variables. Use the symbol names as a tiebreaker
// in order to get consistent ordering.
bool SizeCompare(const SymbolData& lhs, const SymbolData& rhs) {
if (lhs.size == rhs.size)
return StringCompare(lhs.name, rhs.name) < 0;
return lhs.size < rhs.size;
}
// Use this struct to store data about repeated globals, for later sorting.
struct RepeatData {
RepeatData(ULONGLONG repeat_count,
int folding_count,
ULONGLONG bytes_wasted,
const std::wstring& name)
: repeat_count(repeat_count),
bytes_wasted(bytes_wasted),
folding_count(folding_count),
name(name) {}
bool operator<(const RepeatData& rhs) {
return bytes_wasted < rhs.bytes_wasted;
}
ULONGLONG repeat_count;
ULONGLONG bytes_wasted;
int folding_count;
std::wstring name;
};
bool DumpInterestingGlobals(IDiaSymbol* global,
const wchar_t* filename,
bool show_folded_constants) {
wprintf(L"#Dups\t#Folded\tDupSize\t Size\tSection\tSymbol name\tPDB name\n");
// How many bytes must be wasted on repeats before being listed.
const int kWastageThreshold = 100;
// How big must an individual symbol be before being listed.
const int kBigSizeThreshold = 500;
std::vector<SymbolData> symbols;
std::vector<RepeatData> repeats;
Microsoft::WRL::ComPtr<IDiaEnumSymbols> enum_symbols;
HRESULT result =
global->findChildren(SymTagData, NULL, nsNone, &enum_symbols);
if (FAILED(result)) {
wprintf(L"ERROR - DumpInterestingGlobals() returned no symbols.\n");
return false;
}
Microsoft::WRL::ComPtr<IDiaSymbol> symbol;
for (ULONG celt = 0;
SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && (celt == 1);) {
DWORD location_type = 0;
// If we can't get the location type then we assume the variable is not of
// interest.
if (FAILED(symbol->get_locationType(&location_type))) {
continue;
}
// Ignore location types that don't actually correspond to statics and
// globals.
if (location_type != LocIsStatic)
continue;
// If we call get_length on symbol it works for functions but not for
// data. For some reason for data we have to call get_type() to get
// another IDiaSymbol object which we can query for length.
Microsoft::WRL::ComPtr<IDiaSymbol> type_symbol;
if (FAILED(symbol->get_type(&type_symbol))) {
wprintf(L"Get_type failed.\n");
continue;
}
// Errors in the remainder of this loop can be ignored silently.
ULONGLONG size = 0;
type_symbol->get_length(&size);
// Use -1 and -2 as canary values to indicate various failures.
DWORD section = static_cast<DWORD>(-1);
if (symbol->get_addressSection(§ion) != S_OK)
section = static_cast<DWORD>(-2);
DWORD offset = 0;
symbol->get_addressOffset(&offset);
CComBSTR name;
if (symbol->get_name(&name) == S_OK) {
symbols.push_back(SymbolData(size, section, offset, name));
}
}
// Sort the symbols by name/size so that we can print a report about duplicate
// variables.
std::sort(symbols.begin(), symbols.end(), NameCompare);
for (auto p = symbols.begin(); p != symbols.end(); /**/) {
auto pScan = p;
// Scan the data looking for symbols that have the same name
// and size.
int folding_count = 0;
while (pScan != symbols.end() && p->size == pScan->size &&
StringCompare(p->name, pScan->name) == 0) {
if (pScan->offset == p->offset && p->offset != 0)
++folding_count;
++pScan;
}
// Calculate how many times the symbol name/size appears in this PDB.
size_t repeat_count = pScan - p;
if (repeat_count > 1) {
// Change the count from how many instances of this variable there are to
// how many *excess* instances there are.
--repeat_count;
--folding_count;
const size_t excess_count =
show_folded_constants ? repeat_count : repeat_count - folding_count;
const ULONGLONG bytes_wasted = excess_count * p->size;
if (bytes_wasted > kWastageThreshold) {
repeats.push_back(
RepeatData(repeat_count, folding_count, bytes_wasted, p->name));
}
}
p = pScan;
}
// Print a summary of duplicated variables, sorted to put the worst offenders
// first.
std::sort(repeats.begin(), repeats.end());
std::reverse(repeats.begin(), repeats.end());
for (const auto& repeat : repeats) {
// The empty fields contain a zero so that Excel/sheets will more easily
// create the pivot tables that I want.
wprintf(L"%llu\t%d\t%llu\t%6u\t%u\t%s\t%s\n", repeat.repeat_count,
repeat.folding_count, repeat.bytes_wasted, 0, 0,
repeat.name.c_str(), filename);
}
wprintf(L"\n");
// Print a summary of the largest global variables
std::sort(symbols.begin(), symbols.end(), SizeCompare);
std::reverse(symbols.begin(), symbols.end());
for (const auto& s : symbols) {
if (s.size < kBigSizeThreshold)
break;
// The empty fields contain a zero so that the columns line up which can
// be important when pasting the data into a spreadsheet.
wprintf(L"%u\t%u\t%6llu\t%u\t%s\t%s\n", 0, 0, s.size, s.section,
s.name.c_str(), filename);
}
return true;
}
bool Initialize(const wchar_t* filename,
Microsoft::WRL::ComPtr<IDiaDataSource>& source,
Microsoft::WRL::ComPtr<IDiaSession>& session,
Microsoft::WRL::ComPtr<IDiaSymbol>& global) {
// Initialize DIA2
HRESULT hr = CoCreateInstance(__uuidof(DiaSource), NULL, CLSCTX_INPROC_SERVER,
__uuidof(IDiaDataSource), (void**)&source);
if (FAILED(hr)) {
wprintf(L"Failed to initialized DIA2 - %08X.\n", hr);
return false;
}
// Open the PDB
hr = source->loadDataFromPdb(filename);
if (FAILED(hr)) {
wprintf(L"LoadDataFromPdb failed - %08X.\n", hr);
return false;
}
hr = source->openSession(&session);
if (FAILED(hr)) {
wprintf(L"OpenSession failed - %08X.\n", hr);
return false;
}
// Retrieve a reference to the global scope
hr = session->get_globalScope(&global);
if (hr != S_OK) {
wprintf(L"Get_globalScope failed - %08X.\n", hr);
return false;
}
return true;
}
int wmain(int argc, wchar_t* argv[]) {
bool show_folded_constants = false;
const wchar_t* filename = nullptr;
for (int i = 0; i < argc; ++i) {
if (wcscmp(argv[i], L"--show_folded_constants") == 0)
show_folded_constants = true;
else
filename = argv[i];
}
if (!filename) {
wprintf(L"Usage: ShowGlobals file.pdb [--show_folded_constants]");
return -1;
}
HRESULT hr = CoInitialize(NULL);
if (FAILED(hr)) {
wprintf(L"CoInitialize failed - %08X.", hr);
return false;
}
// Extra scope so that we can call CoUninitialize after we destroy our local
// variables.
{
Microsoft::WRL::ComPtr<IDiaDataSource> source;
Microsoft::WRL::ComPtr<IDiaSession> session;
Microsoft::WRL::ComPtr<IDiaSymbol> global;
if (!(Initialize(filename, source, session, global)))
return -1;
DumpInterestingGlobals(global.Get(), filename, show_folded_constants);
}
CoUninitialize();
}