chromium/tools/clang/rewrite_scoped_refptr/RewriteScopedRefptr.cpp

// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This implements a Clang tool to rewrite all instances of
// scoped_refptr<T>'s implicit cast to T (operator T*) to an explicit call to
// the .get() method.

#include <assert.h>
#include <algorithm>
#include <memory>
#include <string>

#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/ASTMatchers/ASTMatchersMacros.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Refactoring.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetSelect.h"

using namespace clang::ast_matchers;
using clang::tooling::CommonOptionsParser;
using clang::tooling::Replacement;
using clang::tooling::Replacements;
using llvm::StringRef;

namespace clang {
namespace ast_matchers {

const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
    conversionDecl;

AST_MATCHER(QualType, isBoolean) {
  return Node->isBooleanType();
}

}  // namespace ast_matchers
}  // namespace clang

namespace {

// Returns true if expr needs to be put in parens (eg: when it is an operator
// syntactically).
bool NeedsParens(const clang::Expr* expr) {
  if (llvm::dyn_cast<clang::UnaryOperator>(expr) ||
      llvm::dyn_cast<clang::BinaryOperator>(expr) ||
      llvm::dyn_cast<clang::ConditionalOperator>(expr)) {
    return true;
  }
  // Calls to an overloaded operator also need parens, except for foo(...) and
  // foo[...] expressions.
  if (const clang::CXXOperatorCallExpr* op =
          llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
    return op->getOperator() != clang::OO_Call &&
           op->getOperator() != clang::OO_Subscript;
  }
  return false;
}

Replacement RewriteImplicitToExplicitConversion(
    const MatchFinder::MatchResult& result,
    const clang::Expr* expr) {
  clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
      result.SourceManager->getSpellingLoc(expr->getLocStart()),
      result.SourceManager->getSpellingLoc(expr->getLocEnd()));
  assert(range.isValid() && "Invalid range!");

  // Handle cases where an implicit cast is being done by dereferencing a
  // pointer to a scoped_refptr<> (sadly, it happens...)
  //
  // This rewrites both "*foo" and "*(foo)" as "foo->get()".
  if (const clang::UnaryOperator* op =
          llvm::dyn_cast<clang::UnaryOperator>(expr)) {
    if (op->getOpcode() == clang::UO_Deref) {
      const clang::Expr* const sub_expr =
          op->getSubExpr()->IgnoreParenImpCasts();
      clang::CharSourceRange sub_expr_range =
          clang::CharSourceRange::getTokenRange(
              result.SourceManager->getSpellingLoc(sub_expr->getLocStart()),
              result.SourceManager->getSpellingLoc(sub_expr->getLocEnd()));
      assert(sub_expr_range.isValid() && "Invalid subexpression range!");

      std::string inner_text = clang::Lexer::getSourceText(
          sub_expr_range, *result.SourceManager, result.Context->getLangOpts());
      assert(!inner_text.empty() && "No text for subexpression!");
      if (NeedsParens(sub_expr)) {
        inner_text.insert(0, "(");
        inner_text.append(")");
      }
      inner_text.append("->get()");
      return Replacement(*result.SourceManager, range, inner_text);
    }
  }

  std::string text = clang::Lexer::getSourceText(
      range, *result.SourceManager, result.Context->getLangOpts());
  assert(!text.empty() && "No text for expression!");

  // Unwrap any temporaries - for example, custom iterators that return
  // scoped_refptr<T> as part of operator*. Any such iterators should also
  // be declaring a scoped_refptr<T>* operator->, per C++03 24.4.1.1 (Table 72)
  if (const clang::CXXBindTemporaryExpr* op =
          llvm::dyn_cast<clang::CXXBindTemporaryExpr>(expr)) {
    expr = op->getSubExpr();
  }

  // Handle iterators (which are operator* calls, followed by implicit
  // conversions) by rewriting *it as it->get()
  if (const clang::CXXOperatorCallExpr* op =
          llvm::dyn_cast<clang::CXXOperatorCallExpr>(expr)) {
    if (op->getOperator() == clang::OO_Star) {
      // Note that this doesn't rewrite **it correctly, since it should be
      // rewritten using parens, e.g. (*it)->get(). However, this shouldn't
      // happen frequently, if at all, since it would likely indicate code is
      // storing pointers to a scoped_refptr in a container.
      text.erase(0, 1);
      text.append("->get()");
      return Replacement(*result.SourceManager, range, text);
    }
  }

  // The only remaining calls should be non-dereferencing calls (eg: member
  // calls), so a simple ".get()" appending should suffice.
  if (NeedsParens(expr)) {
    text.insert(0, "(");
    text.append(")");
  }
  text.append(".get()");
  return Replacement(*result.SourceManager, range, text);
}

Replacement RewriteRawPtrToScopedRefptr(const MatchFinder::MatchResult& result,
                                        clang::SourceLocation begin,
                                        clang::SourceLocation end) {
  clang::CharSourceRange range = clang::CharSourceRange::getTokenRange(
      result.SourceManager->getSpellingLoc(begin),
      result.SourceManager->getSpellingLoc(end));
  assert(range.isValid() && "Invalid range!");

  std::string text = clang::Lexer::getSourceText(
      range, *result.SourceManager, result.Context->getLangOpts());
  text.erase(text.rfind('*'));

  std::string replacement_text("scoped_refptr<");
  replacement_text += text;
  replacement_text += ">";

  return Replacement(*result.SourceManager, range, replacement_text);
}

class GetRewriterCallback : public MatchFinder::MatchCallback {
 public:
  explicit GetRewriterCallback(Replacements* replacements)
      : replacements_(replacements) {}
  virtual void run(const MatchFinder::MatchResult& result) override;

 private:
  Replacements* const replacements_;
};

void GetRewriterCallback::run(const MatchFinder::MatchResult& result) {
  const clang::Expr* arg = result.Nodes.getNodeAs<clang::Expr>("arg");
  assert(arg && "Unexpected match! No Expr captured!");
  auto err =
      replacements_->add(RewriteImplicitToExplicitConversion(result, arg));
  assert(!err);
}

class VarRewriterCallback : public MatchFinder::MatchCallback {
 public:
  explicit VarRewriterCallback(Replacements* replacements)
      : replacements_(replacements) {}
  virtual void run(const MatchFinder::MatchResult& result) override;

 private:
  Replacements* const replacements_;
};

void VarRewriterCallback::run(const MatchFinder::MatchResult& result) {
  const clang::DeclaratorDecl* const var_decl =
      result.Nodes.getNodeAs<clang::DeclaratorDecl>("var");
  assert(var_decl && "Unexpected match! No VarDecl captured!");

  const clang::TypeSourceInfo* tsi = var_decl->getTypeSourceInfo();

  // TODO(dcheng): This mishandles a case where a variable has multiple
  // declarations, e.g.:
  //
  // in .h:
  // Foo* my_global_magical_foo;
  //
  // in .cc:
  // Foo* my_global_magical_foo = CreateFoo();
  //
  // In this case, it will only rewrite the .cc definition. Oh well. This should
  // be rare enough that these cases can be manually handled, since the style
  // guide prohibits globals of non-POD type.
  auto err = replacements_->add(RewriteRawPtrToScopedRefptr(
      result, tsi->getTypeLoc().getBeginLoc(), tsi->getTypeLoc().getEndLoc()));
  assert(!err);
}

class FunctionRewriterCallback : public MatchFinder::MatchCallback {
 public:
  explicit FunctionRewriterCallback(Replacements* replacements)
      : replacements_(replacements) {}
  virtual void run(const MatchFinder::MatchResult& result) override;

 private:
  Replacements* const replacements_;
};

void FunctionRewriterCallback::run(const MatchFinder::MatchResult& result) {
  const clang::FunctionDecl* const function_decl =
      result.Nodes.getNodeAs<clang::FunctionDecl>("fn");
  assert(function_decl && "Unexpected match! No FunctionDecl captured!");

  // If matched against an implicit conversion to a DeclRefExpr, make sure the
  // referenced declaration is of class type, e.g. the tool skips trying to
  // chase pointers/references to determine if the pointee is a scoped_refptr<T>
  // with local storage. Instead, let a human manually handle those cases.
  const clang::VarDecl* const var_decl =
      result.Nodes.getNodeAs<clang::VarDecl>("var");
  if (var_decl && !var_decl->getTypeSourceInfo()->getType()->isClassType()) {
    return;
  }

  for (clang::FunctionDecl* f : function_decl->redecls()) {
    clang::SourceRange range = f->getReturnTypeSourceRange();
    auto err = replacements_->add(
        RewriteRawPtrToScopedRefptr(result, range.getBegin(), range.getEnd()));
    assert(!err);
  }
}

class MacroRewriterCallback : public MatchFinder::MatchCallback {
 public:
  explicit MacroRewriterCallback(Replacements* replacements)
      : replacements_(replacements) {}
  virtual void run(const MatchFinder::MatchResult& result) override;

 private:
  Replacements* const replacements_;
};

void MacroRewriterCallback::run(const MatchFinder::MatchResult& result) {
  const clang::Expr* const expr = result.Nodes.getNodeAs<clang::Expr>("expr");
  assert(expr && "Unexpected match! No Expr captured!");
  auto err =
      replacements_->add(RewriteImplicitToExplicitConversion(result, expr));
  assert(!err);
}

}  // namespace

static llvm::cl::extrahelp common_help(CommonOptionsParser::HelpMessage);

int main(int argc, const char* argv[]) {
  // TODO(dcheng): Clang tooling should do this itself.
  // http://llvm.org/bugs/show_bug.cgi?id=21627
  llvm::InitializeNativeTarget();
  llvm::InitializeNativeTargetAsmParser();
  llvm::cl::OptionCategory category("Remove scoped_refptr conversions");
  CommonOptionsParser options(argc, argv, category);
  clang::tooling::ClangTool tool(options.getCompilations(),
                                 options.getSourcePathList());

  MatchFinder match_finder;
  Replacements replacements;

  auto is_scoped_refptr = cxxRecordDecl(isSameOrDerivedFrom("::scoped_refptr"),
                                        isTemplateInstantiation());

  // Finds all calls to conversion operator member function. This catches calls
  // to "operator T*", "operator Testable", and "operator bool" equally.
  auto base_matcher =
      cxxMemberCallExpr(thisPointerType(is_scoped_refptr),
                        callee(conversionDecl()), on(id("arg", expr())));

  // The heuristic for whether or not converting a temporary is 'unsafe'. An
  // unsafe conversion is one where a temporary scoped_refptr<T> is converted to
  // another type. The matcher provides an exception for a temporary
  // scoped_refptr that is the result of an operator call. In this case, assume
  // that it's the result of an iterator dereference, and the container itself
  // retains the necessary reference, since this is a common idiom to see in
  // loop bodies.
  auto is_unsafe_temporary_conversion =
      on(cxxBindTemporaryExpr(unless(has(cxxOperatorCallExpr()))));

  // Returning a scoped_refptr<T> as a T* is considered unsafe if either are
  // true:
  // - The scoped_refptr<T> is a temporary.
  // - The scoped_refptr<T> has local lifetime.
  auto returned_as_raw_ptr = hasParent(
      returnStmt(hasAncestor(id("fn", functionDecl(returns(pointerType()))))));
  // This matcher intentionally matches more than it should. For example, this
  // will match:
  //   scoped_refptr<Foo>& foo = some_other_foo;
  //   return foo;
  // The matcher callback filters out VarDecls that aren't a scoped_refptr<T>,
  // so those cases can be manually handled.
  auto is_local_variable =
      on(declRefExpr(to(id("var", varDecl(hasLocalStorage())))));
  auto is_unsafe_return =
      anyOf(allOf(hasParent(implicitCastExpr(returned_as_raw_ptr)),
                  is_local_variable),
            allOf(hasParent(implicitCastExpr(
                      hasParent(exprWithCleanups(returned_as_raw_ptr)))),
                  is_unsafe_temporary_conversion));

  // This catches both user-defined conversions (eg: "operator bool") and
  // standard conversion sequence (C++03 13.3.3.1.1), such as converting a
  // pointer to a bool.
  auto implicit_to_bool =
      implicitCastExpr(hasImplicitDestinationType(isBoolean()));

  // Avoid converting calls to of "operator Testable" -> "bool" and calls of
  // "operator T*" -> "bool".
  auto bool_conversion_matcher = hasParent(
      expr(anyOf(implicit_to_bool, expr(hasParent(implicit_to_bool)))));

  auto is_logging_helper =
      functionDecl(anyOf(hasName("CheckEQImpl"), hasName("CheckNEImpl")));
  auto is_gtest_helper = functionDecl(
      anyOf(cxxMethodDecl(ofClass(cxxRecordDecl(isSameOrDerivedFrom(
                              hasName("::testing::internal::EqHelper")))),
                          hasName("Compare")),
            hasName("::testing::internal::CmpHelperNE")));
  auto is_gtest_assertion_result_ctor =
      cxxConstructorDecl(ofClass(cxxRecordDecl(
          isSameOrDerivedFrom(hasName("::testing::AssertionResult")))));

  // Find all calls to an operator overload that are 'safe'.
  //
  // All bool conversions will be handled with the Testable trick, but that
  // can only be used once "operator T*" is removed, since otherwise it leaves
  // the call ambiguous.
  GetRewriterCallback get_callback(&replacements);
  match_finder.addMatcher(
      cxxMemberCallExpr(
          base_matcher,
          // Excluded since the conversion may be unsafe.
          unless(anyOf(is_unsafe_temporary_conversion, is_unsafe_return)),
          // Excluded since the conversion occurs inside a helper function that
          // the macro wraps. Letting this callback handle the rewrite would
          // result in an incorrect replacement that changes the helper function
          // itself. Instead, the right replacement is to rewrite the macro's
          // arguments.
          unless(hasAncestor(decl(anyOf(is_logging_helper, is_gtest_helper,
                                        is_gtest_assertion_result_ctor))))),
      &get_callback);

  // Find temporary scoped_refptr<T>'s being unsafely assigned to a T*.
  VarRewriterCallback var_callback(&replacements);
  auto initialized_with_temporary = has(ignoringImpCasts(
      cxxMemberCallExpr(base_matcher, is_unsafe_temporary_conversion)));
  match_finder.addMatcher(
      id("var", varDecl(hasInitializer(initialized_with_temporary),
                        hasType(pointerType()))),
      &var_callback);
  match_finder.addMatcher(
      cxxConstructorDecl(forEachConstructorInitializer(
          allOf(withInitializer(initialized_with_temporary),
                forField(id("var", fieldDecl(hasType(pointerType()))))))),
      &var_callback);

  // Rewrite functions that unsafely turn a scoped_refptr<T> into a T* when
  // returning a value.
  FunctionRewriterCallback fn_callback(&replacements);
  match_finder.addMatcher(cxxMemberCallExpr(base_matcher, is_unsafe_return),
                          &fn_callback);

  // Rewrite logging / gtest expressions that result in an implicit conversion.
  // Luckily, the matchers don't need to handle the case where one of the macro
  // arguments is NULL, such as:
  // CHECK_EQ(my_scoped_refptr, NULL)
  // because it simply doesn't compile--since NULL is actually of integral type,
  // this doesn't trigger scoped_refptr<T>'s implicit conversion. Since there is
  // no comparison overload for scoped_refptr<T> and int, this fails to compile.
  MacroRewriterCallback macro_callback(&replacements);
  // CHECK_EQ/CHECK_NE helpers.
  match_finder.addMatcher(
      callExpr(callee(is_logging_helper), argumentCountIs(3),
               hasAnyArgument(ignoringParenImpCasts(
                   id("expr", expr(hasType(is_scoped_refptr))))),
               hasAnyArgument(ignoringParenImpCasts(hasType(pointerType()))),
               hasArgument(2, stringLiteral())),
      &macro_callback);
  // ASSERT_EQ/ASSERT_NE/EXPECT_EQ/EXPECT_EQ, which use the same underlying
  // helper functions. Even though gtest has special handling for pointer to
  // NULL comparisons, it doesn't trigger in this case, so no special handling
  // is needed for the replacements.
  match_finder.addMatcher(
      callExpr(callee(is_gtest_helper),
               argumentCountIs(4),
               hasArgument(0, stringLiteral()),
               hasArgument(1, stringLiteral()),
               hasAnyArgument(id("expr", expr(hasType(is_scoped_refptr)))),
               hasAnyArgument(hasType(pointerType()))),
      &macro_callback);
  // ASSERT_TRUE/EXPECT_TRUE helpers. Note that this matcher doesn't need to
  // handle ASSERT_FALSE/EXPECT_FALSE, because it gets coerced to bool before
  // being passed as an argument to AssertionResult's constructor. As a result,
  // GetRewriterCallback handles this case properly since the conversion isn't
  // hidden inside AssertionResult, and the generated replacement properly
  // rewrites the macro argument.
  // However, the tool does need to handle the _TRUE counterparts, since the
  // conversion occurs inside the constructor in those cases.
  match_finder.addMatcher(
      cxxConstructExpr(
          argumentCountIs(2),
          hasArgument(0, id("expr", expr(hasType(is_scoped_refptr)))),
          hasDeclaration(is_gtest_assertion_result_ctor)),
      &macro_callback);

  std::unique_ptr<clang::tooling::FrontendActionFactory> factory =
      clang::tooling::newFrontendActionFactory(&match_finder);
  int result = tool.run(factory.get());
  if (result != 0)
    return result;

  if (replacements.empty())
    return 0;

  // Serialization format is documented in tools/clang/scripts/run_tool.py
  llvm::outs() << "==== BEGIN EDITS ====\n";
  for (const auto& r : replacements) {
    std::string replacement_text = r.getReplacementText().str();
    std::replace(replacement_text.begin(), replacement_text.end(), '\n', '\0');
    llvm::outs() << "r:::" << r.getFilePath() << ":::" << r.getOffset() << ":::"
                 << r.getLength() << ":::" << replacement_text << "\n";
  }
  llvm::outs() << "==== END EDITS ====\n";

  return 0;
}