// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/356368033): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#include "chrome/browser/safe_browsing/incident_reporting/module_integrity_verifier_win.h"
#include <stddef.h>
#include <algorithm>
#include <string>
#include <vector>
#include "base/files/file_path.h"
#include "base/files/memory_mapped_file.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/stack_allocated.h"
#include "base/metrics/histogram_functions.h"
#include "base/scoped_native_library.h"
#include "base/strings/utf_string_conversions.h"
#include "base/win/pe_image.h"
#include "components/safe_browsing/core/common/proto/csd.pb.h"
namespace safe_browsing {
namespace {
// The maximum amount of bytes that can be reported as modified by VerifyModule.
const int kMaxModuleModificationBytes = 256;
struct Export {
Export(void* addr, const std::string& name);
~Export();
bool operator<(const Export& other) const {
return addr < other.addr;
}
raw_ptr<void> addr;
std::string name;
};
Export::Export(void* addr, const std::string& name) : addr(addr), name(name) {
}
Export::~Export() {
}
struct ModuleVerificationState {
STACK_ALLOCATED();
public:
explicit ModuleVerificationState(HMODULE hModule);
ModuleVerificationState(const ModuleVerificationState&) = delete;
ModuleVerificationState& operator=(const ModuleVerificationState&) = delete;
~ModuleVerificationState();
base::win::PEImageAsData disk_peimage;
// The module's preferred base address minus the base address it actually
// loaded at.
intptr_t image_base_delta;
// The location of the disk_peimage module's code section minus that of the
// mem_peimage module's code section.
intptr_t code_section_delta;
// Set true if the relocation table contains a reloc of type that we don't
// currently handle.
bool unknown_reloc_type;
// The start of the code section of the in-memory binary.
uint8_t* mem_code_addr;
// The start of the code section of the on-disk binary.
uint8_t* disk_code_addr;
// The size of the binary's code section.
uint32_t code_size;
// The exports of the DLL, sorted by address in ascending order.
std::vector<Export> exports;
// The location in the in-memory binary of the latest reloc encountered by
// |EnumRelocsCallback|.
uint8_t* last_mem_reloc_position;
// The location in the on-disk binary of the latest reloc encountered by
// |EnumRelocsCallback|.
uint8_t* last_disk_reloc_position;
// The number of bytes with a different value on disk and in memory, as
// computed by |VerifyModule|.
int bytes_different;
// The module state protobuf object that |VerifyModule| will populate.
ClientIncidentReport_EnvironmentData_Process_ModuleState* module_state;
};
ModuleVerificationState::ModuleVerificationState(HMODULE hModule)
: disk_peimage(hModule),
image_base_delta(0),
code_section_delta(0),
unknown_reloc_type(false),
mem_code_addr(nullptr),
disk_code_addr(nullptr),
code_size(0),
last_mem_reloc_position(nullptr),
last_disk_reloc_position(nullptr),
bytes_different(0),
module_state(nullptr) {
}
ModuleVerificationState::~ModuleVerificationState() {
}
// Find which export a modification at address |mem_address| is in. Looks for
// the largest export address still smaller than |mem_address|. |start| and
// |end| must come from a sorted collection.
std::vector<Export>::const_iterator FindModifiedExport(
uint8_t* mem_address,
std::vector<Export>::const_iterator start,
std::vector<Export>::const_iterator end) {
// We get the largest export address still smaller than |addr|. It is
// possible that |addr| belongs to some nonexported function located
// between this export and the following one.
Export addr(reinterpret_cast<void*>(mem_address), std::string());
return std::upper_bound(start, end, addr);
}
// Checks each byte in a subsection of the module's code section against the
// corresponding byte on disk, returning the number of bytes differing between
// the two. |state.exports| must be sorted.
int ExamineByteRangeDiff(uint8_t* disk_start,
uint8_t* mem_start,
ptrdiff_t range_size,
ModuleVerificationState* state) {
int bytes_different = 0;
std::vector<Export>::const_iterator export_it = state->exports.begin();
for (uint8_t* end = mem_start + range_size; mem_start < end;
++mem_start, ++disk_start) {
if (*disk_start == *mem_start)
continue;
auto* modification = state->module_state->add_modification();
// Store the address at which the modification starts on disk, relative to
// the beginning of the image.
modification->set_file_offset(
disk_start - reinterpret_cast<uint8_t*>(state->disk_peimage.module()));
// Find the export containing this modification.
std::vector<Export>::const_iterator modified_export_it =
FindModifiedExport(mem_start, export_it, state->exports.end());
// No later byte can belong to an earlier export.
export_it = modified_export_it;
if (modified_export_it != state->exports.begin())
modification->set_export_name((modified_export_it - 1)->name);
const uint8_t* range_start = mem_start;
while (mem_start < end && *disk_start != *mem_start) {
++disk_start;
++mem_start;
}
int bytes_in_modification = mem_start - range_start;
bytes_different += bytes_in_modification;
modification->set_byte_count(bytes_in_modification);
modification->set_modified_bytes(
range_start,
std::min(bytes_in_modification, kMaxModuleModificationBytes));
}
return bytes_different;
}
bool AddrIsInCodeSection(void* address,
uint8_t* code_addr,
uint32_t code_size) {
return (code_addr <= address && address < code_addr + code_size);
}
bool EnumRelocsCallback(const base::win::PEImage& mem_peimage,
WORD type,
void* address,
void* cookie) {
ModuleVerificationState* state =
reinterpret_cast<ModuleVerificationState*>(cookie);
// If not in the code section return true to continue to the next reloc.
if (!AddrIsInCodeSection(address, state->mem_code_addr, state->code_size))
return true;
switch (type) {
case IMAGE_REL_BASED_ABSOLUTE: // 0
break;
case IMAGE_REL_BASED_HIGHLOW: // 3
{
// The range to inspect is from the last reloc to the current one at
// |ptr|
uint8_t* ptr = reinterpret_cast<uint8_t*>(address);
// If the last relocation was not before this one in the binary,
// there's an issue in the reloc section. We can't really recover from
// that so flag state as such so the error can be logged.
if (ptr < state->last_mem_reloc_position)
return false;
// Check which bytes of the relocation are not accounted for by the
// rebase. If the beginning of the relocation is modified by something
// other than the rebase, extend the verification range to include those
// bytes since they are considered part of a modification.
uint32_t relocated = *reinterpret_cast<uint32_t*>(ptr);
uint32_t original = relocated + state->image_base_delta;
uint8_t* original_reloc_bytes = reinterpret_cast<uint8_t*>(&original);
uint8_t* reloc_disk_position = ptr + state->code_section_delta;
size_t unaccounted_reloc_bytes = 0;
while (unaccounted_reloc_bytes < sizeof(uint32_t) &&
original_reloc_bytes[unaccounted_reloc_bytes] !=
reloc_disk_position[unaccounted_reloc_bytes]) {
++unaccounted_reloc_bytes;
}
// If the entire reloc was modified, return true to let the next
// EnumReloc track it as part of a larger modification.
if (unaccounted_reloc_bytes == sizeof(uint32_t))
return true;
ptrdiff_t range_size = ptr +
unaccounted_reloc_bytes -
state->last_mem_reloc_position;
state->bytes_different += ExamineByteRangeDiff(
state->last_disk_reloc_position,
state->last_mem_reloc_position,
range_size,
state);
// Starting after the verified range, check if the relocation ends with
// modified bytes. If it does, include them in the following range to be
// verified as they're considered modified. Otherwise, the following
// range will start right after the current reloc.
size_t unmodified_reloc_byte_count = unaccounted_reloc_bytes;
while (unmodified_reloc_byte_count < sizeof(uint32_t) &&
original_reloc_bytes[unmodified_reloc_byte_count] ==
reloc_disk_position[unmodified_reloc_byte_count]) {
++unmodified_reloc_byte_count;
}
state->last_disk_reloc_position +=
range_size + unmodified_reloc_byte_count;
state->last_mem_reloc_position +=
range_size + unmodified_reloc_byte_count;
}
break;
case IMAGE_REL_BASED_DIR64: // 10
break;
default:
// TODO(robertshield): Find a reliable description of the behaviour of the
// remaining types of relocation and handle them.
state->unknown_reloc_type = true;
break;
}
return true;
}
bool EnumExportsCallback(const base::win::PEImage& mem_peimage,
DWORD ordinal,
DWORD hint,
LPCSTR name,
PVOID function_addr,
LPCSTR forward,
PVOID cookie) {
std::vector<Export>* exports = reinterpret_cast<std::vector<Export>*>(cookie);
if (name)
exports->push_back(Export(function_addr, std::string(name)));
return true;
}
} // namespace
bool GetCodeAddrsAndSize(const base::win::PEImage& mem_peimage,
const base::win::PEImageAsData& disk_peimage,
uint8_t** mem_code_addr,
uint8_t** disk_code_addr,
uint32_t* code_size) {
DWORD base_of_code = mem_peimage.GetNTHeaders()->OptionalHeader.BaseOfCode;
// Get the address and size of the code section in the loaded module image.
PIMAGE_SECTION_HEADER mem_code_header =
mem_peimage.GetImageSectionFromAddr(mem_peimage.RVAToAddr(base_of_code));
if (mem_code_header == NULL)
return false;
*mem_code_addr = reinterpret_cast<uint8_t*>(
mem_peimage.RVAToAddr(mem_code_header->VirtualAddress));
// If the section is padded with zeros when mapped then |VirtualSize| can be
// larger. Alternatively, |SizeOfRawData| can be rounded up to align
// according to OptionalHeader.FileAlignment.
*code_size = std::min(mem_code_header->Misc.VirtualSize,
mem_code_header->SizeOfRawData);
// Get the address of the code section in the module mapped as data from disk.
DWORD disk_code_offset = 0;
if (!mem_peimage.ImageAddrToOnDiskOffset(
reinterpret_cast<void*>(*mem_code_addr), &disk_code_offset))
return false;
*disk_code_addr =
reinterpret_cast<uint8_t*>(disk_peimage.module()) + disk_code_offset;
return true;
}
bool VerifyModule(
const wchar_t* module_name,
ClientIncidentReport_EnvironmentData_Process_ModuleState* module_state,
int* num_bytes_different) {
using ModuleState = ClientIncidentReport_EnvironmentData_Process_ModuleState;
*num_bytes_different = 0;
module_state->set_name(base::WideToUTF8(module_name));
module_state->set_modified_state(ModuleState::MODULE_STATE_UNKNOWN);
// Get module handle, load a copy from disk as data and create PEImages.
HMODULE module_handle = NULL;
if (!GetModuleHandleEx(0, module_name, &module_handle))
return false;
base::ScopedNativeLibrary native_library(module_handle);
WCHAR module_path[MAX_PATH] = {};
DWORD length =
GetModuleFileName(module_handle, module_path, std::size(module_path));
if (!length || length == std::size(module_path))
return false;
base::MemoryMappedFile mapped_module;
if (!mapped_module.Initialize(base::FilePath(module_path)))
return false;
ModuleVerificationState state(
reinterpret_cast<HMODULE>(const_cast<uint8_t*>(mapped_module.data())));
base::win::PEImage mem_peimage(module_handle);
if (!mem_peimage.VerifyMagic() || !state.disk_peimage.VerifyMagic())
return false;
// Get the list of exports and sort them by address for efficient lookups.
mem_peimage.EnumExports(EnumExportsCallback, &state.exports);
std::sort(state.exports.begin(), state.exports.end());
// Get the addresses of the code sections then calculate |code_section_delta|
// and |image_base_delta|.
if (!GetCodeAddrsAndSize(mem_peimage,
state.disk_peimage,
&state.mem_code_addr,
&state.disk_code_addr,
&state.code_size))
return false;
state.module_state = module_state;
state.last_mem_reloc_position = state.mem_code_addr;
state.last_disk_reloc_position = state.disk_code_addr;
state.code_section_delta = state.disk_code_addr - state.mem_code_addr;
uint8_t* preferred_image_base = reinterpret_cast<uint8_t*>(
state.disk_peimage.GetNTHeaders()->OptionalHeader.ImageBase);
state.image_base_delta =
preferred_image_base - reinterpret_cast<uint8_t*>(mem_peimage.module());
state.last_mem_reloc_position = state.mem_code_addr;
state.last_disk_reloc_position = state.disk_code_addr;
// Enumerate relocations and verify the bytes between them.
bool scan_complete = mem_peimage.EnumRelocs(EnumRelocsCallback, &state);
if (scan_complete) {
size_t range_size =
state.code_size - (state.last_mem_reloc_position - state.mem_code_addr);
// Inspect the last chunk spanning from the furthest relocation to the end
// of the code section.
state.bytes_different += ExamineByteRangeDiff(
state.last_disk_reloc_position,
state.last_mem_reloc_position,
range_size,
&state);
}
*num_bytes_different = state.bytes_different;
// Report STATE_MODIFIED if any difference was found, regardless of whether or
// not the entire module was scanned. Report STATE_UNMODIFIED only if the
// entire module was scanned and understood.
if (state.bytes_different)
module_state->set_modified_state(ModuleState::MODULE_STATE_MODIFIED);
else if (!state.unknown_reloc_type && scan_complete)
module_state->set_modified_state(ModuleState::MODULE_STATE_UNMODIFIED);
return scan_complete;
}
} // namespace safe_browsing