#!/usr/bin/env python
# Merge or print the coverage data collected by asan's coverage.
# Input files are sequences of 4-byte integers.
# We need to merge these integers into a set and then
# either print them (as hex) or dump them into another file.
import array
import bisect
import glob
import os.path
import struct
import subprocess
import sys
prog_name = ""
def Usage():
sys.stderr.write(
"Usage: \n" + " " + prog_name + " merge FILE [FILE...] > OUTPUT\n"
" " + prog_name + " print FILE [FILE...]\n"
" " + prog_name + " unpack FILE [FILE...]\n"
" " + prog_name + " rawunpack FILE [FILE ...]\n"
" " + prog_name + " missing BINARY < LIST_OF_PCS\n"
"\n"
)
exit(1)
def CheckBits(bits):
if bits != 32 and bits != 64:
raise Exception("Wrong bitness: %d" % bits)
def TypeCodeForBits(bits):
CheckBits(bits)
return "L" if bits == 64 else "I"
def TypeCodeForStruct(bits):
CheckBits(bits)
return "Q" if bits == 64 else "I"
kMagic32SecondHalf = 0xFFFFFF32
kMagic64SecondHalf = 0xFFFFFF64
kMagicFirstHalf = 0xC0BFFFFF
def MagicForBits(bits):
CheckBits(bits)
if sys.byteorder == "little":
return [
kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf,
kMagicFirstHalf,
]
else:
return [
kMagicFirstHalf,
kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf,
]
def ReadMagicAndReturnBitness(f, path):
magic_bytes = f.read(8)
magic_words = struct.unpack("II", magic_bytes)
bits = 0
idx = 1 if sys.byteorder == "little" else 0
if magic_words[idx] == kMagicFirstHalf:
if magic_words[1 - idx] == kMagic64SecondHalf:
bits = 64
elif magic_words[1 - idx] == kMagic32SecondHalf:
bits = 32
if bits == 0:
raise Exception("Bad magic word in %s" % path)
return bits
def ReadOneFile(path):
with open(path, mode="rb") as f:
f.seek(0, 2)
size = f.tell()
f.seek(0, 0)
if size < 8:
raise Exception("File %s is short (< 8 bytes)" % path)
bits = ReadMagicAndReturnBitness(f, path)
size -= 8
w = size * 8 // bits
s = struct.unpack_from(TypeCodeForStruct(bits) * (w), f.read(size))
sys.stderr.write("%s: read %d %d-bit PCs from %s\n" % (prog_name, w, bits, path))
return s
def Merge(files):
s = set()
for f in files:
s = s.union(set(ReadOneFile(f)))
sys.stderr.write(
"%s: %d files merged; %d PCs total\n" % (prog_name, len(files), len(s))
)
return sorted(s)
def PrintFiles(files):
if len(files) > 1:
s = Merge(files)
else: # If there is just on file, print the PCs in order.
s = ReadOneFile(files[0])
sys.stderr.write("%s: 1 file merged; %d PCs total\n" % (prog_name, len(s)))
for i in s:
print("0x%x" % i)
def MergeAndPrint(files):
if sys.stdout.isatty():
Usage()
s = Merge(files)
bits = 32
if max(s) > 0xFFFFFFFF:
bits = 64
stdout_buf = getattr(sys.stdout, "buffer", sys.stdout)
array.array("I", MagicForBits(bits)).tofile(stdout_buf)
a = struct.pack(TypeCodeForStruct(bits) * len(s), *s)
stdout_buf.write(a)
def UnpackOneFile(path):
with open(path, mode="rb") as f:
sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
while True:
header = f.read(12)
if not header:
return
if len(header) < 12:
break
pid, module_length, blob_size = struct.unpack("iII", header)
module = f.read(module_length).decode("utf-8")
blob = f.read(blob_size)
assert len(module) == module_length
assert len(blob) == blob_size
extracted_file = "%s.%d.sancov" % (module, pid)
sys.stderr.write("%s: extracting %s\n" % (prog_name, extracted_file))
# The packed file may contain multiple blobs for the same pid/module
# pair. Append to the end of the file instead of overwriting.
with open(extracted_file, "ab") as f2:
f2.write(blob)
# fail
raise Exception("Error reading file %s" % path)
def Unpack(files):
for f in files:
UnpackOneFile(f)
def UnpackOneRawFile(path, map_path):
mem_map = []
with open(map_path, mode="rt") as f_map:
sys.stderr.write("%s: reading map %s\n" % (prog_name, map_path))
bits = int(f_map.readline())
if bits != 32 and bits != 64:
raise Exception("Wrong bits size in the map")
for line in f_map:
parts = line.rstrip().split()
mem_map.append(
(
int(parts[0], 16),
int(parts[1], 16),
int(parts[2], 16),
" ".join(parts[3:]),
)
)
mem_map.sort(key=lambda m: m[0])
mem_map_keys = [m[0] for m in mem_map]
with open(path, mode="rb") as f:
sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
f.seek(0, 2)
size = f.tell()
f.seek(0, 0)
pcs = struct.unpack_from(
TypeCodeForStruct(bits) * (size * 8 // bits), f.read(size)
)
mem_map_pcs = [[] for i in range(0, len(mem_map))]
for pc in pcs:
if pc == 0:
continue
map_idx = bisect.bisect(mem_map_keys, pc) - 1
(start, end, base, module_path) = mem_map[map_idx]
assert pc >= start
if pc >= end:
sys.stderr.write(
"warning: %s: pc %x outside of any known mapping\n"
% (prog_name, pc)
)
continue
mem_map_pcs[map_idx].append(pc - base)
for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs):
if len(pc_list) == 0:
continue
assert path.endswith(".sancov.raw")
dst_path = module_path + "." + os.path.basename(path)[:-4]
sys.stderr.write(
"%s: writing %d PCs to %s\n" % (prog_name, len(pc_list), dst_path)
)
sorted_pc_list = sorted(pc_list)
pc_buffer = struct.pack(
TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list
)
with open(dst_path, "ab+") as f2:
array.array("I", MagicForBits(bits)).tofile(f2)
f2.seek(0, 2)
f2.write(pc_buffer)
def RawUnpack(files):
for f in files:
if not f.endswith(".sancov.raw"):
raise Exception("Unexpected raw file name %s" % f)
f_map = f[:-3] + "map"
UnpackOneRawFile(f, f_map)
def GetInstrumentedPCs(binary):
# This looks scary, but all it does is extract all offsets where we call:
# - __sanitizer_cov() or __sanitizer_cov_with_check(),
# - with call or callq,
# - directly or via PLT.
cmd = (
r"objdump --no-show-raw-insn -d %s | "
r"grep '^\s\+[0-9a-f]\+:\s\+call\(q\|\)\s\+\(0x\|\)[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | "
r"grep -o '^\s\+[0-9a-f]\+'" % binary
)
lines = subprocess.check_output(cmd, stdin=subprocess.PIPE, shell=True).splitlines()
# The PCs we get from objdump are off by 4 bytes, as they point to the
# beginning of the callq instruction. Empirically this is true on x86 and
# x86_64.
return set(int(line.strip(), 16) + 4 for line in lines)
def PrintMissing(binary):
if not os.path.isfile(binary):
raise Exception("File not found: %s" % binary)
instrumented = GetInstrumentedPCs(binary)
sys.stderr.write(
"%s: found %d instrumented PCs in %s\n" % (prog_name, len(instrumented), binary)
)
covered = set(int(line, 16) for line in sys.stdin)
sys.stderr.write("%s: read %d PCs from stdin\n" % (prog_name, len(covered)))
missing = instrumented - covered
sys.stderr.write("%s: %d PCs missing from coverage\n" % (prog_name, len(missing)))
if len(missing) > len(instrumented) - len(covered):
sys.stderr.write(
"%s: WARNING: stdin contains PCs not found in binary\n" % prog_name
)
for pc in sorted(missing):
print("0x%x" % pc)
if __name__ == "__main__":
prog_name = sys.argv[0]
if len(sys.argv) <= 2:
Usage()
if sys.argv[1] == "missing":
if len(sys.argv) != 3:
Usage()
PrintMissing(sys.argv[2])
exit(0)
file_list = []
for f in sys.argv[2:]:
file_list += glob.glob(f)
if not file_list:
Usage()
if sys.argv[1] == "print":
PrintFiles(file_list)
elif sys.argv[1] == "merge":
MergeAndPrint(file_list)
elif sys.argv[1] == "unpack":
Unpack(file_list)
elif sys.argv[1] == "rawunpack":
RawUnpack(file_list)
else:
Usage()