# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Library functions for making a corpus from arbitrary bitcode."""
import pathlib
import os
import shutil
import json
from typing import List, Optional
BITCODE_EXTENSION = ".bc"
def load_bitcode_from_directory(bitcode_base_dir: str) -> List[str]:
"""Finds bitcode files to extract from a given directory.
Args:
bitcode_base_dir: The base directory where the bitcode to be copied
is from.
output_dir: The directory to place the bitcode in.
Returns an array of paths representing the relative path to the bitcode
file from the base direcotry.
"""
paths = [
str(p)[: -len(BITCODE_EXTENSION)]
for p in pathlib.Path(bitcode_base_dir).glob("**/*" + BITCODE_EXTENSION)
]
return [os.path.relpath(full_path, start=bitcode_base_dir) for full_path in paths]
def copy_bitcode(
relative_paths: List[str], bitcode_base_dir: str, output_dir: str
) -> None:
"""Copies bitcode files from the base directory to the output directory.
Args:
relative_paths: An array of relative paths to bitcode files that are copied
over to the output directory, preserving relative location.
bitcode_base_dir: The base directory where the bitcode is located.
output_dir: The output directory to place the bitcode in.
"""
for relative_path in relative_paths:
base_path = os.path.join(bitcode_base_dir, relative_path + BITCODE_EXTENSION)
destination_path = os.path.join(output_dir, relative_path + BITCODE_EXTENSION)
os.makedirs(os.path.dirname(destination_path), exist_ok=True)
shutil.copy(base_path, destination_path)
def write_corpus_manifest(
relative_output_paths: List[str],
output_dir: str,
default_args: Optional[List[str]] = None,
) -> None:
"""Creates a corpus manifest describing the bitcode that has been found.
Args:
relative_output_paths: A list of paths to each bitcode file relative to the
output directory.
outout_dir: The output directory where the corpus is being created.
default_args: An array of compiler flags that should be used to compile
the bitcode when using further downstream tooling."""
if default_args is None:
default_args = []
corpus_description = {
"global_command_override": default_args,
"has_thinlto": False,
"modules": [path for path in relative_output_paths if path is not None],
}
with open(
os.path.join(output_dir, "corpus_description.json"), "w", encoding="utf-8"
) as description_file:
json.dump(corpus_description, description_file, indent=2)