# -*- coding: utf-8 -*-
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
""" This module is responsible for to parse a compiler invocation. """
import re
import os
import collections
__all__ = ["split_command", "classify_source", "compiler_language"]
# Ignored compiler options map for compilation database creation.
# The map is used in `split_command` method. (Which does ignore and classify
# parameters.) Please note, that these are not the only parameters which
# might be ignored.
#
# Keys are the option name, value number of options to skip
IGNORED_FLAGS = {
# compiling only flag, ignored because the creator of compilation
# database will explicitly set it.
"-c": 0,
# preprocessor macros, ignored because would cause duplicate entries in
# the output (the only difference would be these flags). this is actual
# finding from users, who suffered longer execution time caused by the
# duplicates.
"-MD": 0,
"-MMD": 0,
"-MG": 0,
"-MP": 0,
"-MF": 1,
"-MT": 1,
"-MQ": 1,
# linker options, ignored because for compilation database will contain
# compilation commands only. so, the compiler would ignore these flags
# anyway. the benefit to get rid of them is to make the output more
# readable.
"-static": 0,
"-shared": 0,
"-s": 0,
"-rdynamic": 0,
"-l": 1,
"-L": 1,
"-u": 1,
"-z": 1,
"-T": 1,
"-Xlinker": 1,
}
# Known C/C++ compiler executable name patterns
COMPILER_PATTERNS = frozenset(
[
re.compile(r"^(intercept-|analyze-|)c(c|\+\+)$"),
re.compile(r"^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$"),
re.compile(r"^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$"),
re.compile(r"^llvm-g(cc|\+\+)$"),
]
)
def split_command(command):
"""Returns a value when the command is a compilation, None otherwise.
The value on success is a named tuple with the following attributes:
files: list of source files
flags: list of compile options
compiler: string value of 'c' or 'c++'"""
# the result of this method
result = collections.namedtuple("Compilation", ["compiler", "flags", "files"])
result.compiler = compiler_language(command)
result.flags = []
result.files = []
# quit right now, if the program was not a C/C++ compiler
if not result.compiler:
return None
# iterate on the compile options
args = iter(command[1:])
for arg in args:
# quit when compilation pass is not involved
if arg in {"-E", "-S", "-cc1", "-M", "-MM", "-###"}:
return None
# ignore some flags
elif arg in IGNORED_FLAGS:
count = IGNORED_FLAGS[arg]
for _ in range(count):
next(args)
elif re.match(r"^-(l|L|Wl,).+", arg):
pass
# some parameters could look like filename, take as compile option
elif arg in {"-D", "-I"}:
result.flags.extend([arg, next(args)])
# parameter which looks source file is taken...
elif re.match(r"^[^-].+", arg) and classify_source(arg):
result.files.append(arg)
# and consider everything else as compile option.
else:
result.flags.append(arg)
# do extra check on number of source files
return result if result.files else None
def classify_source(filename, c_compiler=True):
"""Return the language from file name extension."""
mapping = {
".c": "c" if c_compiler else "c++",
".i": "c-cpp-output" if c_compiler else "c++-cpp-output",
".ii": "c++-cpp-output",
".m": "objective-c",
".mi": "objective-c-cpp-output",
".mm": "objective-c++",
".mii": "objective-c++-cpp-output",
".C": "c++",
".cc": "c++",
".CC": "c++",
".cp": "c++",
".cpp": "c++",
".cxx": "c++",
".c++": "c++",
".C++": "c++",
".txx": "c++",
}
__, extension = os.path.splitext(os.path.basename(filename))
return mapping.get(extension)
def compiler_language(command):
"""A predicate to decide the command is a compiler call or not.
Returns 'c' or 'c++' when it match. None otherwise."""
cplusplus = re.compile(r"^(.+)(\+\+)(-.+|)$")
if command:
executable = os.path.basename(command[0])
if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
return "c++" if cplusplus.match(executable) else "c"
return None