# Copyright 2019 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""This library contains 64 bit ELF headers access and modification methods.
This library implements classes representing various ELF structures.
For more detailed description of ELF headers and fields refer to the ELF
standard: http://www.skyfree.org/linux/references/ELF_Format.pdf and to the
64 bit update: https://uclibc.org/docs/elf-64-gen.pdf.
This library was created because the script required precise manipulations of
file offsets. I.e: move the headers segment to the end of the file.
No package, capable of doing this kind of manipulations was found, so the
creation of this library was deemed necessary.
The point of entry is ElfHeader class that provides methods for accessing
additional parts of the ELF file.
"""
import enum
import logging
class ElfEntry(object):
"""Base class for ELF headers.
Provides methods for populating fields.
"""
def __init__(self, byte_order, fields=None):
"""ElfEntry constructor.
Args:
byte_order: str. Either 'little' for little endian or 'big' for big
endian.
fields: List[Tuple[str, int]]. An ordered list of pairs of
(attribute name, size in bytes). This list will be used for parsing
data and automatically setting up those fields.
"""
if fields is None:
self._fields = []
else:
self._fields = fields
self.byte_order = byte_order
def ParseBytes(self, data, offset):
"""Parses Entry fields from data starting at offset using _fields.
Args:
data: bytes.
offset: int. The start point of parsing.
"""
current_offset = offset
for field_name, field_size in self._fields:
value = int.from_bytes(
data[current_offset:current_offset + field_size],
byteorder=self.byte_order)
setattr(self, field_name, value)
current_offset += field_size
def ApplyKwargs(self, **kwargs):
"""Set the fields from kwargs matching the _fields array entries."""
for field_name, _ in self._fields:
if field_name not in kwargs:
logging.error('field_name %s not found in kwargs', field_name)
continue
setattr(self, field_name, kwargs[field_name])
def ToBytes(self):
"""Returns byte representation of ELF entry."""
bytearr = bytearray()
for field_name, field_size in self._fields:
field_bytes = getattr(self, field_name).to_bytes(
field_size, byteorder=self.byte_order)
bytearr.extend(field_bytes)
return bytearr
@classmethod
def Create(cls, byte_order, **kwargs):
"""Static wrapper around ApplyKwargs method.
Args:
byte_order: str. Either 'little' for little endian or 'big' for big
endian.
**kwargs: will be passed directly to the ApplyKwargs method.
"""
obj = cls(byte_order)
obj.ApplyKwargs(**kwargs)
return obj
@classmethod
def FromBytes(cls, byte_order, data, offset):
"""Static wrapper around ParseBytes method.
Args:
byte_order: str. Either 'little' for little endian or 'big' for big
endian.
data: bytes.
offset: int. The start point of parsing.
"""
obj = cls(byte_order)
obj.ParseBytes(data, offset)
return obj
class SectionHeader(ElfEntry):
"""This class represents SectionEntry from ELF standard."""
class Type(enum.IntEnum):
SHT_NULL = 0
SHT_PROGBITS = 1
SHT_SYMTAB = 2
SHT_STRTAB = 3
SHT_RELA = 4
SHT_HASH = 5
SHT_DYNAMIC = 6
SHT_NOTE = 7
SHT_NOBITS = 8
SHT_REL = 9
SHT_SHLIB = 10
SHT_DYNSYM = 11
def __init__(self, byte_order):
self.sh_name = None
self.sh_type = None
self.sh_flags = None
self.sh_addr = None
self.sh_offset = None
self.sh_size = None
self.sh_link = None
self.sh_info = None
self.sh_addralign = None
self.sh_entsize = None
fields = [
('sh_name', 4),
('sh_type', 4),
('sh_flags', 8),
('sh_addr', 8),
('sh_offset', 8),
('sh_size', 8),
('sh_link', 4),
('sh_info', 4),
('sh_addralign', 8),
('sh_entsize', 8),
]
super(SectionHeader, self).__init__(byte_order, fields)
# This is readonly version of section name in string form. We can't set it
# in constructor since to actually get it we need to instantiate
# StringTableHeader so it is set by SetStrName method later on.
self._str_name = ''
def SetStrName(self, name):
"""Sets the resolved sh_name to provided str.
Changes made by this method WILL NOT propagate into data after PatchData
call.
Args:
name: str. Name to set.
"""
self._str_name = name
def GetStrName(self):
"""Returns the sh_name as resolved string."""
return self._str_name
class StringTableHeader(SectionHeader):
"""This class represents a StringTableHeader header entry."""
def GetName(self, data, string_index):
"""Returns the name located on string_index table's offset.
Args:
data: bytearray. The file's data.
string_index: int. Offset from the beginning of the string table to the
required name.
"""
begin = self.sh_offset + string_index
end = data.find(0, begin)
if end == -1:
raise RuntimeError('Failed to find null terminator for StringTable entry')
return data[begin:end].decode('ascii')
class ProgramHeader(ElfEntry):
"""This class represent PhdrEntry from ELF standard."""
class Type(enum.IntEnum):
PT_NULL = 0
PT_LOAD = 1
PT_DYNAMIC = 2
PT_INTERP = 3
PT_NOTE = 4
PT_SHLIB = 5
PT_PHDR = 6
class Flags(enum.IntFlag):
PF_X = 1
PF_W = 2
PF_R = 4
def __init__(self, byte_order):
"""ProgramHeader constructor.
Args:
byte_order: str.
"""
# We have to set them here to avoid attribute-error from pylint.
self.p_type = None
self.p_flags = None
self.p_offset = None
self.p_vaddr = None
self.p_paddr = None
self.p_filesz = None
self.p_memsz = None
self.p_align = None
fields = [
('p_type', 4),
('p_flags', 4),
('p_offset', 8),
('p_vaddr', 8),
('p_paddr', 8),
('p_filesz', 8),
('p_memsz', 8),
('p_align', 8),
]
super(ProgramHeader, self).__init__(byte_order, fields)
def FilePositionEnd(self):
"""Returns the end (exclusive) of the segment file range."""
return self.p_offset + self.p_filesz
class ElfHeader(ElfEntry):
"""This class represents ELFHdr from the ELF standard.
On its initialization it determines the bitness and endianness of the binary.
"""
class EiClass(enum.IntEnum):
ELFCLASS32 = 1
ELFCLASS64 = 2
class EiData(enum.IntEnum):
ELFDATALSB = 1
ELFDATAMSB = 2
class EType(enum.IntEnum):
ET_NONE = 0
ET_REL = 1
ET_EXEC = 2
ET_DYN = 3
ET_CORE = 4
_EI_CLASS_OFFSET = 4
_EI_DATA_OFFSET = 5
def _GetEiClass(self, data):
"""Returns the value of ei_class."""
return data[self._EI_CLASS_OFFSET]
def _GetEiData(self, data):
"""Returns the value of ei_data."""
return data[self._EI_DATA_OFFSET]
def _ValidateBitness(self, data):
"""Verifies that library supports file's bitness."""
if self._GetEiClass(data) != ElfHeader.EiClass.ELFCLASS64:
raise RuntimeError('only 64 bit objects are supported')
def _ReadByteOrder(self, data):
"""Reads and returns the file's byte order."""
ei_data = data[self._EI_DATA_OFFSET]
if ei_data == ElfHeader.EiData.ELFDATALSB:
return 'little'
elif ei_data == ElfHeader.EiData.ELFDATAMSB:
return 'big'
raise RuntimeError('Failed to parse ei_data')
def _ParseProgramHeaders(self, data):
current_offset = self.e_phoff
for _ in range(0, self.e_phnum):
self.phdrs.append(
ProgramHeader.FromBytes(self.byte_order, data, current_offset))
current_offset += self.e_phentsize
def _ParseSectionHeaders(self, data):
current_offset = self.e_shoff
string_table = None
for _ in range(0, self.e_shnum):
shdr = SectionHeader.FromBytes(self.byte_order, data, current_offset)
self.shdrs.append(shdr)
current_offset += self.e_shentsize
if self.e_shstrndx != 0:
string_table_offset = self.e_shoff + self.e_shstrndx * self.e_shentsize
string_table = StringTableHeader.FromBytes(self.byte_order, data,
string_table_offset)
for shdr in self.shdrs:
shdr.SetStrName(string_table.GetName(data, shdr.sh_name))
def __init__(self, data):
"""ElfHeader constructor.
Args:
data: bytearray.
"""
# We have to set them here to avoid attribute-error from pylint.
self.ei_magic = None
self.ei_class = None
self.ei_data = None
self.ei_version = None
self.ei_osabi = None
self.ei_abiversion = None
self.ei_pad = None
self.e_type = None
self.e_machine = None
self.e_version = None
self.e_entry = None
self.e_phoff = None
self.e_shoff = None
self.e_flags = None
self.e_ehsize = None
self.e_phentsize = None
self.e_phnum = None
self.e_shentsize = None
self.e_shnum = None
self.e_shstrndx = None
fields = [
('ei_magic', 4),
('ei_class', 1),
('ei_data', 1),
('ei_version', 1),
('ei_osabi', 1),
('ei_abiversion', 1),
('ei_pad', 7),
('e_type', 2),
('e_machine', 2),
('e_version', 4),
('e_entry', 8),
('e_phoff', 8),
('e_shoff', 8),
('e_flags', 4),
('e_ehsize', 2),
('e_phentsize', 2),
('e_phnum', 2),
('e_shentsize', 2),
('e_shnum', 2),
('e_shstrndx', 2),
]
self._ValidateBitness(data)
byte_order = self._ReadByteOrder(data)
super(ElfHeader, self).__init__(byte_order, fields)
self.ParseBytes(data, 0)
if self.e_type != ElfHeader.EType.ET_DYN:
raise RuntimeError('Only shared libraries are supported')
self.phdrs = []
self._ParseProgramHeaders(data)
self.shdrs = []
self._ParseSectionHeaders(data)
def GetProgramHeaders(self):
"""Returns the list of file's program headers."""
return self.phdrs
def GetProgramHeadersByType(self, phdr_type):
"""Yields program headers of the given type."""
return (phdr for phdr in self.phdrs if phdr.p_type == phdr_type)
def GetSectionHeaders(self):
"""Returns the list of file's section headers."""
return self.shdrs
def AddProgramHeader(self, phdr):
"""Adds a new ProgramHeader entry correcting the e_phnum variable.
This method will increase the size of LOAD segment containing the program
headers without correcting the other offsets. It is up to the caller to
deal with the results. One way to avoid any problems would be to move
program headers to the end of the file.
Args:
phdr: ProgramHeader. Instance of ProgramHeader to add.
"""
self.phdrs.append(phdr)
phdrs_size = self.e_phnum * self.e_phentsize
# We need to locate the LOAD segment containing program headers and
# increase its size.
phdr_found = False
for phdr in self.GetProgramHeadersByType(ProgramHeader.Type.PT_LOAD):
if phdr.p_offset > self.e_phoff:
continue
if phdr.FilePositionEnd() < self.e_phoff + phdrs_size:
continue
phdr.p_filesz += self.e_phentsize
phdr.p_memsz += self.e_phentsize
phdr_found = True
break
if not phdr_found:
raise RuntimeError('Failed to increase program headers LOAD segment')
# If PHDR segment exists it needs to be corrected as well.
for phdr in self.GetProgramHeadersByType(ProgramHeader.Type.PT_PHDR):
phdr.p_filesz += self.e_phentsize
phdr.p_memsz += self.e_phentsize
self.e_phnum += 1
def _OrderProgramHeaders(self):
"""Orders program LOAD headers by p_vaddr to comply with standard."""
def HeaderToKey(phdr):
# ELF standard required PT_INTERP and PT_PHDR to be strictly before
# PT_LOAD.
if phdr.p_type == ProgramHeader.Type.PT_INTERP:
return (0, phdr.p_vaddr)
elif phdr.p_type == ProgramHeader.Type.PT_PHDR:
return (1, phdr.p_vaddr)
elif phdr.p_type == ProgramHeader.Type.PT_LOAD:
return (2, phdr.p_vaddr)
else:
# We want to preserve the order of non LOAD segments.
return (3, 0)
self.phdrs.sort(key=HeaderToKey)
def _PatchProgramHeaders(self, data):
"""Patches all program headers."""
current_offset = self.e_phoff
self._OrderProgramHeaders()
for phdr in self.GetProgramHeaders():
phdr_bytes = phdr.ToBytes()
data[current_offset:current_offset + len(phdr_bytes)] = phdr_bytes
current_offset += self.e_phentsize
def _PatchSectionHeaders(self, data):
"""Patches all section headers."""
current_offset = self.e_shoff
for shdr in self.GetSectionHeaders():
shdr_bytes = shdr.ToBytes()
data[current_offset:current_offset + len(shdr_bytes)] = shdr_bytes
current_offset += self.e_shentsize
def PatchData(self, data):
"""Patches the given data array to reflect all changes made to the header.
This method doesn't completely rewrite the data, instead it patches
inplace. Not only the ElfHeader is patched but all of its ProgramHeader
as well.
The important limitation is that this method doesn't take changes of sizes
and offsets into account. As example, if new ProgramHeader is added, this
method will override whatever data is located under its placement so the
user has to move the headers to the end beforehand or the user mustn't
change header's size.
Args:
data: bytearray. The data array to be patched.
"""
elf_bytes = self.ToBytes()
data[:len(elf_bytes)] = elf_bytes
self._PatchProgramHeaders(data)
self._PatchSectionHeaders(data)