//===--- PreprocessorTracker.cpp - Preprocessor tracking -*- C++ -*------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===--------------------------------------------------------------------===// // // The Basic Idea (Macro and Conditional Checking) // // Basically we install a PPCallbacks-derived object to track preprocessor // activity, namely when a header file is entered/exited, when a macro // is expanded, when "defined" is used, and when #if, #elif, #ifdef, // and #ifndef are used. We save the state of macro and "defined" // expressions in a map, keyed on a name/file/line/column quadruple. // The map entries store the different states (values) that a macro expansion, // "defined" expression, or condition expression has in the course of // processing for the one location in the one header containing it, // plus a list of the nested include stacks for the states. When a macro // or "defined" expression evaluates to the same value, which is the // desired case, only one state is stored. Similarly, for conditional // directives, we save the condition expression states in a separate map. // // This information is collected as modularize compiles all the headers // given to it to process. After all the compilations are performed, // a check is performed for any entries in the maps that contain more // than one different state, and for these an output message is generated. // // For example: // // (...)/SubHeader.h:11:5: // #if SYMBOL == 1 // ^ // error: Macro instance 'SYMBOL' has different values in this header, // depending on how it was included. // 'SYMBOL' expanded to: '1' with respect to these inclusion paths: // (...)/Header1.h // (...)/SubHeader.h // (...)/SubHeader.h:3:9: // #define SYMBOL 1 // ^ // Macro defined here. // 'SYMBOL' expanded to: '2' with respect to these inclusion paths: // (...)/Header2.h // (...)/SubHeader.h // (...)/SubHeader.h:7:9: // #define SYMBOL 2 // ^ // Macro defined here. // // The Basic Idea ('Extern "C/C++" {}' Or 'namespace {}') With Nested // '#include' Checking) // // To check for '#include' directives nested inside 'Extern "C/C++" {}' // or 'namespace {}' blocks, we keep track of the '#include' directives // while running the preprocessor, and later during a walk of the AST // we call a function to check for any '#include' directives inside // an 'Extern "C/C++" {}' or 'namespace {}' block, given its source // range. // // Design and Implementation Details (Macro and Conditional Checking) // // A PreprocessorTrackerImpl class implements the PreprocessorTracker // interface. It uses a PreprocessorCallbacks class derived from PPCallbacks // to track preprocessor activity, namely entering/exiting a header, macro // expansions, use of "defined" expressions, and #if, #elif, #ifdef, and // #ifndef conditional directives. PreprocessorTrackerImpl stores a map // of MacroExpansionTracker objects keyed on a name/file/line/column // value represented by a light-weight PPItemKey value object. This // is the key top-level data structure tracking the values of macro // expansion instances. Similarly, it stores a map of ConditionalTracker // objects with the same kind of key, for tracking preprocessor conditional // directives. // // The MacroExpansionTracker object represents one macro reference or use // of a "defined" expression in a header file. It stores a handle to a // string representing the unexpanded macro instance, a handle to a string // representing the unpreprocessed source line containing the unexpanded // macro instance, and a vector of one or more MacroExpansionInstance // objects. // // The MacroExpansionInstance object represents one or more expansions // of a macro reference, for the case where the macro expands to the same // value. MacroExpansionInstance stores a handle to a string representing // the expanded macro value, a PPItemKey representing the file/line/column // where the macro was defined, a handle to a string representing the source // line containing the macro definition, and a vector of InclusionPathHandle // values that represents the hierarchies of include files for each case // where the particular header containing the macro reference was referenced // or included. // In the normal case where a macro instance always expands to the same // value, the MacroExpansionTracker object will only contain one // MacroExpansionInstance representing all the macro expansion instances. // If a case was encountered where a macro instance expands to a value // that is different from that seen before, or the macro was defined in // a different place, a new MacroExpansionInstance object representing // that case will be added to the vector in MacroExpansionTracker. If a // macro instance expands to a value already seen before, the // InclusionPathHandle representing that case's include file hierarchy // will be added to the existing MacroExpansionInstance object. // For checking conditional directives, the ConditionalTracker class // functions similarly to MacroExpansionTracker, but tracks an #if, // #elif, #ifdef, or #ifndef directive in a header file. It stores // a vector of one or two ConditionalExpansionInstance objects, // representing the cases where the conditional expression evaluates // to true or false. This latter object stores the evaluated value // of the condition expression (a bool) and a vector of // InclusionPathHandles. // // To reduce the instances of string and object copying, the // PreprocessorTrackerImpl class uses a StringPool to save all stored // strings, and defines a StringHandle type to abstract the references // to the strings. // // PreprocessorTrackerImpl also maintains a list representing the unique // headers, which is just a vector of StringHandle's for the header file // paths. A HeaderHandle abstracts a reference to a header, and is simply // the index of the stored header file path. // // A HeaderInclusionPath class abstracts a unique hierarchy of header file // inclusions. It simply stores a vector of HeaderHandles ordered from the // top-most header (the one from the header list passed to modularize) down // to the header containing the macro reference. PreprocessorTrackerImpl // stores a vector of these objects. An InclusionPathHandle typedef // abstracts a reference to one of the HeaderInclusionPath objects, and is // simply the index of the stored HeaderInclusionPath object. The // MacroExpansionInstance object stores a vector of these handles so that // the reporting function can display the include hierarchies for the macro // expansion instances represented by that object, to help the user // understand how the header was included. (A future enhancement might // be to associate a line number for the #include directives, but I // think not doing so is good enough for the present.) // // A key reason for using these opaque handles was to try to keep all the // internal objects light-weight value objects, in order to reduce string // and object copying overhead, and to abstract this implementation detail. // // The key data structures are built up while modularize runs the headers // through the compilation. A PreprocessorTracker instance is created and // passed down to the AST action and consumer objects in modularize. For // each new compilation instance, the consumer calls the // PreprocessorTracker's handleNewPreprocessorEntry function, which sets // up a PreprocessorCallbacks object for the preprocessor. At the end of // the compilation instance, the PreprocessorTracker's // handleNewPreprocessorExit function handles cleaning up with respect // to the preprocessing instance. // // The PreprocessorCallbacks object uses an overridden FileChanged callback // to determine when a header is entered and exited (including exiting the // header during #include directives). It calls PreprocessorTracker's // handleHeaderEntry and handleHeaderExit functions upon entering and // exiting a header. These functions manage a stack of header handles // representing by a vector, pushing and popping header handles as headers // are entered and exited. When a HeaderInclusionPath object is created, // it simply copies this stack. // // The PreprocessorCallbacks object uses an overridden MacroExpands callback // to track when a macro expansion is performed. It calls a couple of helper // functions to get the unexpanded and expanded macro values as strings, but // then calls PreprocessorTrackerImpl's addMacroExpansionInstance function to // do the rest of the work. The getMacroExpandedString function uses the // preprocessor's getSpelling to convert tokens to strings using the // information passed to the MacroExpands callback, and simply concatenates // them. It makes recursive calls to itself to handle nested macro // definitions, and also handles function-style macros. // // PreprocessorTrackerImpl's addMacroExpansionInstance function looks for // an existing MacroExpansionTracker entry in its map of MacroExampleTracker // objects. If none exists, it adds one with one MacroExpansionInstance and // returns. If a MacroExpansionTracker object already exists, it looks for // an existing MacroExpansionInstance object stored in the // MacroExpansionTracker object, one that matches the macro expanded value // and the macro definition location. If a matching MacroExpansionInstance // object is found, it just adds the current HeaderInclusionPath object to // it. If not found, it creates and stores a new MacroExpansionInstance // object. The addMacroExpansionInstance function calls a couple of helper // functions to get the pre-formatted location and source line strings for // the macro reference and the macro definition stored as string handles. // These helper functions use the current source manager from the // preprocessor. This is done in advance at this point in time because the // source manager doesn't exist at the time of the reporting. // // For conditional check, the PreprocessorCallbacks class overrides the // PPCallbacks handlers for #if, #elif, #ifdef, and #ifndef. These handlers // call the addConditionalExpansionInstance method of // PreprocessorTrackerImpl. The process is similar to that of macros, but // with some different data and error messages. A lookup is performed for // the conditional, and if a ConditionalTracker object doesn't yet exist for // the conditional, a new one is added, including adding a // ConditionalExpansionInstance object to it to represent the condition // expression state. If a ConditionalTracker for the conditional does // exist, a lookup is made for a ConditionalExpansionInstance object // matching the condition expression state. If one exists, a // HeaderInclusionPath is added to it. Otherwise a new // ConditionalExpansionInstance entry is made. If a ConditionalTracker // has two ConditionalExpansionInstance objects, it means there was a // conflict, meaning the conditional expression evaluated differently in // one or more cases. // // After modularize has performed all the compilations, it enters a phase // of error reporting. This new feature adds to this reporting phase calls // to the PreprocessorTracker's reportInconsistentMacros and // reportInconsistentConditionals functions. These functions walk the maps // of MacroExpansionTracker's and ConditionalTracker's respectively. If // any of these objects have more than one MacroExpansionInstance or // ConditionalExpansionInstance objects, it formats and outputs an error // message like the example shown previously, using the stored data. // // A potential issue is that there is some overlap between the #if/#elif // conditional and macro reporting. I could disable the #if and #elif, // leaving just the #ifdef and #ifndef, since these don't overlap. Or, // to make clearer the separate reporting phases, I could add an output // message marking the phases. // // Design and Implementation Details ('Extern "C/C++" {}' Or // 'namespace {}') With Nested '#include' Checking) // // We override the InclusionDirective in PPCallbacks to record information // about each '#include' directive encountered during preprocessing. // We co-opt the PPItemKey class to store the information about each // '#include' directive, including the source file name containing the // directive, the name of the file being included, and the source line // and column of the directive. We store these object in a vector, // after first check to see if an entry already exists. // // Later, while the AST is being walked for other checks, we provide // visit handlers for 'extern "C/C++" {}' and 'namespace (name) {}' // blocks, checking to see if any '#include' directives occurred // within the blocks, reporting errors if any found. // // Future Directions // // We probably should add options to disable any of the checks, in case // there is some problem with them, or the messages get too verbose. // // With the map of all the macro and conditional expansion instances, // it might be possible to add to the existing modularize error messages // (the second part referring to definitions being different), attempting // to tie them to the last macro conflict encountered with respect to the // order of the code encountered. // //===--------------------------------------------------------------------===// #include "PreprocessorTracker.h" #include "ModularizeUtilities.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/MacroArgs.h" #include "clang/Lex/PPCallbacks.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/raw_ostream.h" namespace Modularize { // Some handle types StringHandle; HeaderHandle; const HeaderHandle HeaderHandleInvalid = …; InclusionPathHandle; const InclusionPathHandle InclusionPathHandleInvalid = …; // Some utility functions. // Get a "file:line:column" source location string. static std::string getSourceLocationString(clang::Preprocessor &PP, clang::SourceLocation Loc) { … } // Get just the file name from a source location. static std::string getSourceLocationFile(clang::Preprocessor &PP, clang::SourceLocation Loc) { … } // Get just the line and column from a source location. static void getSourceLocationLineAndColumn(clang::Preprocessor &PP, clang::SourceLocation Loc, int &Line, int &Column) { … } // Retrieve source snippet from file image. static std::string getSourceString(clang::Preprocessor &PP, clang::SourceRange Range) { … } // Retrieve source line from file image given a location. static std::string getSourceLine(clang::Preprocessor &PP, clang::SourceLocation Loc) { … } // Retrieve source line from file image given a file ID and line number. static std::string getSourceLine(clang::Preprocessor &PP, clang::FileID FileID, int Line) { … } // Get the string for the Unexpanded macro instance. // The sourceRange is expected to end at the last token // for the macro instance, which in the case of a function-style // macro will be a ')', but for an object-style macro, it // will be the macro name itself. static std::string getMacroUnexpandedString(clang::SourceRange Range, clang::Preprocessor &PP, llvm::StringRef MacroName, const clang::MacroInfo *MI) { … } // Get the expansion for a macro instance, given the information // provided by PPCallbacks. // FIXME: This doesn't support function-style macro instances // passed as arguments to another function-style macro. However, // since it still expands the inner arguments, it still // allows modularize to effectively work with respect to macro // consistency checking, although it displays the incorrect // expansion in error messages. static std::string getMacroExpandedString(clang::Preprocessor &PP, llvm::StringRef MacroName, const clang::MacroInfo *MI, const clang::MacroArgs *Args) { … } namespace { // ConditionValueKind strings. const char * ConditionValueKindStrings[] = …; // Preprocessor item key. // // This class represents a location in a source file, for use // as a key representing a unique name/file/line/column quadruplet, // which in this case is used to identify a macro expansion instance, // but could be used for other things as well. // The file is a header file handle, the line is a line number, // and the column is a column number. class PPItemKey { … }; // Header inclusion path. class HeaderInclusionPath { … }; // Macro expansion instance. // // This class represents an instance of a macro expansion with a // unique value. It also stores the unique header inclusion paths // for use in telling the user the nested include path to the header. class MacroExpansionInstance { … }; // Macro expansion instance tracker. // // This class represents one macro expansion, keyed by a PPItemKey. // It stores a string representing the macro reference in the source, // and a list of ConditionalExpansionInstances objects representing // the unique values the condition expands to in instances of the header. class MacroExpansionTracker { … }; // Conditional expansion instance. // // This class represents an instance of a condition exoression result // with a unique value. It also stores the unique header inclusion paths // for use in telling the user the nested include path to the header. class ConditionalExpansionInstance { … }; // Conditional directive instance tracker. // // This class represents one conditional directive, keyed by a PPItemKey. // It stores a string representing the macro reference in the source, // and a list of ConditionExpansionInstance objects representing // the unique value the condition expression expands to in instances of // the header. class ConditionalTracker { … }; class PreprocessorTrackerImpl; // Preprocessor callbacks for modularize. // // This class derives from the Clang PPCallbacks class to track preprocessor // actions, such as changing files and handling preprocessor directives and // macro expansions. It has to figure out when a new header file is entered // and left, as the provided handler is not particularly clear about it. class PreprocessorCallbacks : public clang::PPCallbacks { … }; // Preprocessor macro expansion item map types. MacroExpansionMap; MacroExpansionMapIter; // Preprocessor conditional expansion item map types. ConditionalExpansionMap; ConditionalExpansionMapIter; // Preprocessor tracker for modularize. // // This class stores information about all the headers processed in the // course of running modularize. class PreprocessorTrackerImpl : public PreprocessorTracker { … }; } // namespace // PreprocessorTracker functions. // PreprocessorTracker destructor. PreprocessorTracker::~PreprocessorTracker() { … } // Create instance of PreprocessorTracker. PreprocessorTracker *PreprocessorTracker::create( llvm::SmallVector<std::string, 32> &Headers, bool DoBlockCheckHeaderListOnly) { … } // Preprocessor callbacks for modularize. // Handle include directive. void PreprocessorCallbacks::InclusionDirective( clang::SourceLocation HashLoc, const clang::Token &IncludeTok, llvm::StringRef FileName, bool IsAngled, clang::CharSourceRange FilenameRange, clang::OptionalFileEntryRef File, llvm::StringRef SearchPath, llvm::StringRef RelativePath, const clang::Module *SuggestedModule, bool ModuleImported, clang::SrcMgr::CharacteristicKind FileType) { … } // Handle file entry/exit. void PreprocessorCallbacks::FileChanged( clang::SourceLocation Loc, clang::PPCallbacks::FileChangeReason Reason, clang::SrcMgr::CharacteristicKind FileType, clang::FileID PrevFID) { … } // Handle macro expansion. void PreprocessorCallbacks::MacroExpands(const clang::Token &MacroNameTok, const clang::MacroDefinition &MD, clang::SourceRange Range, const clang::MacroArgs *Args) { … } void PreprocessorCallbacks::Defined(const clang::Token &MacroNameTok, const clang::MacroDefinition &MD, clang::SourceRange Range) { … } void PreprocessorCallbacks::If(clang::SourceLocation Loc, clang::SourceRange ConditionRange, clang::PPCallbacks::ConditionValueKind ConditionResult) { … } void PreprocessorCallbacks::Elif(clang::SourceLocation Loc, clang::SourceRange ConditionRange, clang::PPCallbacks::ConditionValueKind ConditionResult, clang::SourceLocation IfLoc) { … } void PreprocessorCallbacks::Ifdef(clang::SourceLocation Loc, const clang::Token &MacroNameTok, const clang::MacroDefinition &MD) { … } void PreprocessorCallbacks::Ifndef(clang::SourceLocation Loc, const clang::Token &MacroNameTok, const clang::MacroDefinition &MD) { … } } // end namespace Modularize