llvm/clang-tools-extra/modularize/PreprocessorTracker.cpp

//===--- PreprocessorTracker.cpp - Preprocessor tracking -*- C++ -*------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===--------------------------------------------------------------------===//
//
// The Basic Idea (Macro and Conditional Checking)
//
// Basically we install a PPCallbacks-derived object to track preprocessor
// activity, namely when a header file is entered/exited, when a macro
// is expanded, when "defined" is used, and when #if, #elif, #ifdef,
// and #ifndef are used.  We save the state of macro and "defined"
// expressions in a map, keyed on a name/file/line/column quadruple.
// The map entries store the different states (values) that a macro expansion,
// "defined" expression, or condition expression has in the course of
// processing for the one location in the one header containing it,
// plus a list of the nested include stacks for the states.  When a macro
// or "defined" expression evaluates to the same value, which is the
// desired case, only one state is stored.  Similarly, for conditional
// directives, we save the condition expression states in a separate map.
//
// This information is collected as modularize compiles all the headers
// given to it to process.  After all the compilations are performed,
// a check is performed for any entries in the maps that contain more
// than one different state, and for these an output message is generated.
//
// For example:
//
//   (...)/SubHeader.h:11:5:
//   #if SYMBOL == 1
//       ^
//   error: Macro instance 'SYMBOL' has different values in this header,
//          depending on how it was included.
//     'SYMBOL' expanded to: '1' with respect to these inclusion paths:
//       (...)/Header1.h
//         (...)/SubHeader.h
//   (...)/SubHeader.h:3:9:
//   #define SYMBOL 1
//             ^
//   Macro defined here.
//     'SYMBOL' expanded to: '2' with respect to these inclusion paths:
//       (...)/Header2.h
//           (...)/SubHeader.h
//   (...)/SubHeader.h:7:9:
//   #define SYMBOL 2
//             ^
//   Macro defined here.
//
// The Basic Idea ('Extern "C/C++" {}' Or 'namespace {}') With Nested
// '#include' Checking)
//
// To check for '#include' directives nested inside 'Extern "C/C++" {}'
// or 'namespace {}' blocks, we keep track of the '#include' directives
// while running the preprocessor, and later during a walk of the AST
// we call a function to check for any '#include' directives inside
// an 'Extern "C/C++" {}' or 'namespace {}' block, given its source
// range.
//
// Design and Implementation Details (Macro and Conditional Checking)
//
// A PreprocessorTrackerImpl class implements the PreprocessorTracker
// interface. It uses a PreprocessorCallbacks class derived from PPCallbacks
// to track preprocessor activity, namely entering/exiting a header, macro
// expansions, use of "defined" expressions, and #if, #elif, #ifdef, and
// #ifndef conditional directives. PreprocessorTrackerImpl stores a map
// of MacroExpansionTracker objects keyed on a name/file/line/column
// value represented by a light-weight PPItemKey value object. This
// is the key top-level data structure tracking the values of macro
// expansion instances.  Similarly, it stores a map of ConditionalTracker
// objects with the same kind of key, for tracking preprocessor conditional
// directives.
//
// The MacroExpansionTracker object represents one macro reference or use
// of a "defined" expression in a header file. It stores a handle to a
// string representing the unexpanded macro instance, a handle to a string
// representing the unpreprocessed source line containing the unexpanded
// macro instance, and a vector of one or more MacroExpansionInstance
// objects.
//
// The MacroExpansionInstance object represents one or more expansions
// of a macro reference, for the case where the macro expands to the same
// value. MacroExpansionInstance stores a handle to a string representing
// the expanded macro value, a PPItemKey representing the file/line/column
// where the macro was defined, a handle to a string representing the source
// line containing the macro definition, and a vector of InclusionPathHandle
// values that represents the hierarchies of include files for each case
// where the particular header containing the macro reference was referenced
// or included.

// In the normal case where a macro instance always expands to the same
// value, the MacroExpansionTracker object will only contain one
// MacroExpansionInstance representing all the macro expansion instances.
// If a case was encountered where a macro instance expands to a value
// that is different from that seen before, or the macro was defined in
// a different place, a new MacroExpansionInstance object representing
// that case will be added to the vector in MacroExpansionTracker. If a
// macro instance expands to a value already seen before, the
// InclusionPathHandle representing that case's include file hierarchy
// will be added to the existing MacroExpansionInstance object.

// For checking conditional directives, the ConditionalTracker class
// functions similarly to MacroExpansionTracker, but tracks an #if,
// #elif, #ifdef, or #ifndef directive in a header file.  It stores
// a vector of one or two ConditionalExpansionInstance objects,
// representing the cases where the conditional expression evaluates
// to true or false.  This latter object stores the evaluated value
// of the condition expression (a bool) and a vector of
// InclusionPathHandles.
//
// To reduce the instances of string and object copying, the
// PreprocessorTrackerImpl class uses a StringPool to save all stored
// strings, and defines a StringHandle type to abstract the references
// to the strings.
//
// PreprocessorTrackerImpl also maintains a list representing the unique
// headers, which is just a vector of StringHandle's for the header file
// paths. A HeaderHandle abstracts a reference to a header, and is simply
// the index of the stored header file path.
//
// A HeaderInclusionPath class abstracts a unique hierarchy of header file
// inclusions. It simply stores a vector of HeaderHandles ordered from the
// top-most header (the one from the header list passed to modularize) down
// to the header containing the macro reference. PreprocessorTrackerImpl
// stores a vector of these objects. An InclusionPathHandle typedef
// abstracts a reference to one of the HeaderInclusionPath objects, and is
// simply the index of the stored HeaderInclusionPath object. The
// MacroExpansionInstance object stores a vector of these handles so that
// the reporting function can display the include hierarchies for the macro
// expansion instances represented by that object, to help the user
// understand how the header was included. (A future enhancement might
// be to associate a line number for the #include directives, but I
// think not doing so is good enough for the present.)
//
// A key reason for using these opaque handles was to try to keep all the
// internal objects light-weight value objects, in order to reduce string
// and object copying overhead, and to abstract this implementation detail.
//
// The key data structures are built up while modularize runs the headers
// through the compilation. A PreprocessorTracker instance is created and
// passed down to the AST action and consumer objects in modularize. For
// each new compilation instance, the consumer calls the
// PreprocessorTracker's handleNewPreprocessorEntry function, which sets
// up a PreprocessorCallbacks object for the preprocessor. At the end of
// the compilation instance, the PreprocessorTracker's
// handleNewPreprocessorExit function handles cleaning up with respect
// to the preprocessing instance.
//
// The PreprocessorCallbacks object uses an overridden FileChanged callback
// to determine when a header is entered and exited (including exiting the
// header during #include directives). It calls PreprocessorTracker's
// handleHeaderEntry and handleHeaderExit functions upon entering and
// exiting a header. These functions manage a stack of header handles
// representing by a vector, pushing and popping header handles as headers
// are entered and exited. When a HeaderInclusionPath object is created,
// it simply copies this stack.
//
// The PreprocessorCallbacks object uses an overridden MacroExpands callback
// to track when a macro expansion is performed. It calls a couple of helper
// functions to get the unexpanded and expanded macro values as strings, but
// then calls PreprocessorTrackerImpl's addMacroExpansionInstance function to
// do the rest of the work. The getMacroExpandedString function uses the
// preprocessor's getSpelling to convert tokens to strings using the
// information passed to the MacroExpands callback, and simply concatenates
// them. It makes recursive calls to itself to handle nested macro
// definitions, and also handles function-style macros.
//
// PreprocessorTrackerImpl's addMacroExpansionInstance function looks for
// an existing MacroExpansionTracker entry in its map of MacroExampleTracker
// objects. If none exists, it adds one with one MacroExpansionInstance and
// returns. If a MacroExpansionTracker object already exists, it looks for
// an existing MacroExpansionInstance object stored in the
// MacroExpansionTracker object, one that matches the macro expanded value
// and the macro definition location. If a matching MacroExpansionInstance
// object is found, it just adds the current HeaderInclusionPath object to
// it. If not found, it creates and stores a new MacroExpansionInstance
// object. The addMacroExpansionInstance function calls a couple of helper
// functions to get the pre-formatted location and source line strings for
// the macro reference and the macro definition stored as string handles.
// These helper functions use the current source manager from the
// preprocessor. This is done in advance at this point in time because the
// source manager doesn't exist at the time of the reporting.
//
// For conditional check, the PreprocessorCallbacks class overrides the
// PPCallbacks handlers for #if, #elif, #ifdef, and #ifndef.  These handlers
// call the addConditionalExpansionInstance method of
// PreprocessorTrackerImpl.  The process is similar to that of macros, but
// with some different data and error messages.  A lookup is performed for
// the conditional, and if a ConditionalTracker object doesn't yet exist for
// the conditional, a new one is added, including adding a
// ConditionalExpansionInstance object to it to represent the condition
// expression state.  If a ConditionalTracker for the conditional does
// exist, a lookup is made for a ConditionalExpansionInstance object
// matching the condition expression state.  If one exists, a
// HeaderInclusionPath is added to it.  Otherwise a new
// ConditionalExpansionInstance  entry is made.  If a ConditionalTracker
// has two ConditionalExpansionInstance objects, it means there was a
// conflict, meaning the conditional expression evaluated differently in
// one or more cases.
//
// After modularize has performed all the compilations, it enters a phase
// of error reporting. This new feature adds to this reporting phase calls
// to the PreprocessorTracker's reportInconsistentMacros and
// reportInconsistentConditionals functions. These functions walk the maps
// of MacroExpansionTracker's and ConditionalTracker's respectively. If
// any of these objects have more than one MacroExpansionInstance or
// ConditionalExpansionInstance objects, it formats and outputs an error
// message like the example shown previously, using the stored data.
//
// A potential issue is that there is some overlap between the #if/#elif
// conditional and macro reporting.  I could disable the #if and #elif,
// leaving just the #ifdef and #ifndef, since these don't overlap.  Or,
// to make clearer the separate reporting phases, I could add an output
// message marking the phases.
//
// Design and Implementation Details ('Extern "C/C++" {}' Or
// 'namespace {}') With Nested '#include' Checking)
//
// We override the InclusionDirective in PPCallbacks to record information
// about each '#include' directive encountered during preprocessing.
// We co-opt the PPItemKey class to store the information about each
// '#include' directive, including the source file name containing the
// directive, the name of the file being included, and the source line
// and column of the directive.  We store these object in a vector,
// after first check to see if an entry already exists.
//
// Later, while the AST is being walked for other checks, we provide
// visit handlers for 'extern "C/C++" {}' and 'namespace (name) {}'
// blocks, checking to see if any '#include' directives occurred
// within the blocks, reporting errors if any found.
//
// Future Directions
//
// We probably should add options to disable any of the checks, in case
// there is some problem with them, or the messages get too verbose.
//
// With the map of all the macro and conditional expansion instances,
// it might be possible to add to the existing modularize error messages
// (the second part referring to definitions being different), attempting
// to tie them to the last macro conflict encountered with respect to the
// order of the code encountered.
//
//===--------------------------------------------------------------------===//

#include "PreprocessorTracker.h"
#include "ModularizeUtilities.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/MacroArgs.h"
#include "clang/Lex/PPCallbacks.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/raw_ostream.h"

namespace Modularize {

// Some handle types
StringHandle;

HeaderHandle;
const HeaderHandle HeaderHandleInvalid =;

InclusionPathHandle;
const InclusionPathHandle InclusionPathHandleInvalid =;

// Some utility functions.

// Get a "file:line:column" source location string.
static std::string getSourceLocationString(clang::Preprocessor &PP,
                                           clang::SourceLocation Loc) {}

// Get just the file name from a source location.
static std::string getSourceLocationFile(clang::Preprocessor &PP,
                                         clang::SourceLocation Loc) {}

// Get just the line and column from a source location.
static void getSourceLocationLineAndColumn(clang::Preprocessor &PP,
                                           clang::SourceLocation Loc, int &Line,
                                           int &Column) {}

// Retrieve source snippet from file image.
static std::string getSourceString(clang::Preprocessor &PP,
                                   clang::SourceRange Range) {}

// Retrieve source line from file image given a location.
static std::string getSourceLine(clang::Preprocessor &PP,
                                 clang::SourceLocation Loc) {}

// Retrieve source line from file image given a file ID and line number.
static std::string getSourceLine(clang::Preprocessor &PP, clang::FileID FileID,
                                 int Line) {}

// Get the string for the Unexpanded macro instance.
// The sourceRange is expected to end at the last token
// for the macro instance, which in the case of a function-style
// macro will be a ')', but for an object-style macro, it
// will be the macro name itself.
static std::string getMacroUnexpandedString(clang::SourceRange Range,
                                            clang::Preprocessor &PP,
                                            llvm::StringRef MacroName,
                                            const clang::MacroInfo *MI) {}

// Get the expansion for a macro instance, given the information
// provided by PPCallbacks.
// FIXME: This doesn't support function-style macro instances
// passed as arguments to another function-style macro. However,
// since it still expands the inner arguments, it still
// allows modularize to effectively work with respect to macro
// consistency checking, although it displays the incorrect
// expansion in error messages.
static std::string getMacroExpandedString(clang::Preprocessor &PP,
                                          llvm::StringRef MacroName,
                                          const clang::MacroInfo *MI,
                                          const clang::MacroArgs *Args) {}

namespace {

// ConditionValueKind strings.
const char *
ConditionValueKindStrings[] =;

// Preprocessor item key.
//
// This class represents a location in a source file, for use
// as a key representing a unique name/file/line/column quadruplet,
// which in this case is used to identify a macro expansion instance,
// but could be used for other things as well.
// The file is a header file handle, the line is a line number,
// and the column is a column number.
class PPItemKey {};

// Header inclusion path.
class HeaderInclusionPath {};

// Macro expansion instance.
//
// This class represents an instance of a macro expansion with a
// unique value.  It also stores the unique header inclusion paths
// for use in telling the user the nested include path to the header.
class MacroExpansionInstance {};

// Macro expansion instance tracker.
//
// This class represents one macro expansion, keyed by a PPItemKey.
// It stores a string representing the macro reference in the source,
// and a list of ConditionalExpansionInstances objects representing
// the unique values the condition expands to in instances of the header.
class MacroExpansionTracker {};

// Conditional expansion instance.
//
// This class represents an instance of a condition exoression result
// with a unique value.  It also stores the unique header inclusion paths
// for use in telling the user the nested include path to the header.
class ConditionalExpansionInstance {};

// Conditional directive instance tracker.
//
// This class represents one conditional directive, keyed by a PPItemKey.
// It stores a string representing the macro reference in the source,
// and a list of ConditionExpansionInstance objects representing
// the unique value the condition expression expands to in instances of
// the header.
class ConditionalTracker {};

class PreprocessorTrackerImpl;

// Preprocessor callbacks for modularize.
//
// This class derives from the Clang PPCallbacks class to track preprocessor
// actions, such as changing files and handling preprocessor directives and
// macro expansions.  It has to figure out when a new header file is entered
// and left, as the provided handler is not particularly clear about it.
class PreprocessorCallbacks : public clang::PPCallbacks {};

// Preprocessor macro expansion item map types.
MacroExpansionMap;
MacroExpansionMapIter;

// Preprocessor conditional expansion item map types.
ConditionalExpansionMap;
ConditionalExpansionMapIter;

// Preprocessor tracker for modularize.
//
// This class stores information about all the headers processed in the
// course of running modularize.
class PreprocessorTrackerImpl : public PreprocessorTracker {};

} // namespace

// PreprocessorTracker functions.

// PreprocessorTracker destructor.
PreprocessorTracker::~PreprocessorTracker() {}

// Create instance of PreprocessorTracker.
PreprocessorTracker *PreprocessorTracker::create(
    llvm::SmallVector<std::string, 32> &Headers,
    bool DoBlockCheckHeaderListOnly) {}

// Preprocessor callbacks for modularize.

// Handle include directive.
void PreprocessorCallbacks::InclusionDirective(
    clang::SourceLocation HashLoc, const clang::Token &IncludeTok,
    llvm::StringRef FileName, bool IsAngled,
    clang::CharSourceRange FilenameRange, clang::OptionalFileEntryRef File,
    llvm::StringRef SearchPath, llvm::StringRef RelativePath,
    const clang::Module *SuggestedModule, bool ModuleImported,
    clang::SrcMgr::CharacteristicKind FileType) {}

// Handle file entry/exit.
void PreprocessorCallbacks::FileChanged(
    clang::SourceLocation Loc, clang::PPCallbacks::FileChangeReason Reason,
    clang::SrcMgr::CharacteristicKind FileType, clang::FileID PrevFID) {}

// Handle macro expansion.
void PreprocessorCallbacks::MacroExpands(const clang::Token &MacroNameTok,
                                         const clang::MacroDefinition &MD,
                                         clang::SourceRange Range,
                                         const clang::MacroArgs *Args) {}

void PreprocessorCallbacks::Defined(const clang::Token &MacroNameTok,
                                    const clang::MacroDefinition &MD,
                                    clang::SourceRange Range) {}

void PreprocessorCallbacks::If(clang::SourceLocation Loc,
                               clang::SourceRange ConditionRange,
                               clang::PPCallbacks::ConditionValueKind ConditionResult) {}

void PreprocessorCallbacks::Elif(clang::SourceLocation Loc,
                                 clang::SourceRange ConditionRange,
                                 clang::PPCallbacks::ConditionValueKind ConditionResult,
                                 clang::SourceLocation IfLoc) {}

void PreprocessorCallbacks::Ifdef(clang::SourceLocation Loc,
                                  const clang::Token &MacroNameTok,
                                  const clang::MacroDefinition &MD) {}

void PreprocessorCallbacks::Ifndef(clang::SourceLocation Loc,
                                   const clang::Token &MacroNameTok,
                                   const clang::MacroDefinition &MD) {}
} // end namespace Modularize