AnalysisBasedWarnings.cpp

//=- AnalysisBasedWarnings.cpp - Sema warnings based on libAnalysis -*- C++ -*-=//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines analysis_warnings::[Policy,Executor].
// Together they are used by Sema to issue warnings based on inexpensive
// static analysis algorithms in libAnalysis.
//
//===----------------------------------------------------------------------===//

#include "clang/Sema/AnalysisBasedWarnings.h"
#include "clang/Sema/SemaInternal.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/ExprObjC.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/StmtObjC.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/EvaluatedExprVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Analysis/AnalysisContext.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/Analyses/ReachableCode.h"
#include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h"
#include "clang/Analysis/CFGStmtMap.h"
#include "clang/Analysis/Analyses/UninitializedValues.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
#include <vector>

using namespace clang;

//===----------------------------------------------------------------------===//
// Unreachable code analysis.
//===----------------------------------------------------------------------===//

namespace {
  class UnreachableCodeHandler : public reachable_code::Callback {
    Sema &S;
  public:
    UnreachableCodeHandler(Sema &s) : S(s) {}

    void HandleUnreachable(SourceLocation L, SourceRange R1, SourceRange R2) {
      S.Diag(L, diag::warn_unreachable) << R1 << R2;
    }
  };
}

/// CheckUnreachable - Check for unreachable code.
static void CheckUnreachable(Sema &S, AnalysisContext &AC) {
  UnreachableCodeHandler UC(S);
  reachable_code::FindUnreachableCode(AC, UC);
}

//===----------------------------------------------------------------------===//
// Check for missing return value.
//===----------------------------------------------------------------------===//

enum ControlFlowKind {
  UnknownFallThrough,
  NeverFallThrough,
  MaybeFallThrough,
  AlwaysFallThrough,
  NeverFallThroughOrReturn
};

/// CheckFallThrough - Check that we don't fall off the end of a
/// Statement that should return a value.
///
/// \returns AlwaysFallThrough iff we always fall off the end of the statement,
/// MaybeFallThrough iff we might or might not fall off the end,
/// NeverFallThroughOrReturn iff we never fall off the end of the statement or
/// return.  We assume NeverFallThrough iff we never fall off the end of the
/// statement but we may return.  We assume that functions not marked noreturn
/// will return.
static ControlFlowKind CheckFallThrough(AnalysisContext &AC) {
  CFG *cfg = AC.getCFG();
  if (cfg == 0) return UnknownFallThrough;

  // The CFG leaves in dead things, and we don't want the dead code paths to
  // confuse us, so we mark all live things first.
  llvm::BitVector live(cfg->getNumBlockIDs());
  unsigned count = reachable_code::ScanReachableFromBlock(&cfg->getEntry(),
                                                          live);

  bool AddEHEdges = AC.getAddEHEdges();
  if (!AddEHEdges && count != cfg->getNumBlockIDs())
    // When there are things remaining dead, and we didn't add EH edges
    // from CallExprs to the catch clauses, we have to go back and
    // mark them as live.
    for (CFG::iterator I = cfg->begin(), E = cfg->end(); I != E; ++I) {
      CFGBlock &b = **I;
      if (!live[b.getBlockID()]) {
        if (b.pred_begin() == b.pred_end()) {
          if (b.getTerminator() && isa<CXXTryStmt>(b.getTerminator()))
            // When not adding EH edges from calls, catch clauses
            // can otherwise seem dead.  Avoid noting them as dead.
            count += reachable_code::ScanReachableFromBlock(&b, live);
          continue;
        }
      }
    }

  // Now we know what is live, we check the live precessors of the exit block
  // and look for fall through paths, being careful to ignore normal returns,
  // and exceptional paths.
  bool HasLiveReturn = false;
  bool HasFakeEdge = false;
  bool HasPlainEdge = false;
  bool HasAbnormalEdge = false;

  // Ignore default cases that aren't likely to be reachable because all
  // enums in a switch(X) have explicit case statements.
  CFGBlock::FilterOptions FO;
  FO.IgnoreDefaultsWithCoveredEnums = 1;

  for (CFGBlock::filtered_pred_iterator
	 I = cfg->getExit().filtered_pred_start_end(FO); I.hasMore(); ++I) {
    const CFGBlock& B = **I;
    if (!live[B.getBlockID()])
      continue;

    // Destructors can appear after the 'return' in the CFG.  This is
    // normal.  We need to look pass the destructors for the return
    // statement (if it exists).
    CFGBlock::const_reverse_iterator ri = B.rbegin(), re = B.rend();
    bool hasNoReturnDtor = false;
    
    for ( ; ri != re ; ++ri) {
      CFGElement CE = *ri;

      // FIXME: The right solution is to just sever the edges in the
      // CFG itself.
      if (const CFGImplicitDtor *iDtor = ri->getAs<CFGImplicitDtor>())
        if (iDtor->isNoReturn(AC.getASTContext())) {
          hasNoReturnDtor = true;
          HasFakeEdge = true;
          break;
        }
      
      if (isa<CFGStmt>(CE))
        break;
    }
    
    if (hasNoReturnDtor)
      continue;
    
    // No more CFGElements in the block?
    if (ri == re) {
      if (B.getTerminator() && isa<CXXTryStmt>(B.getTerminator())) {
        HasAbnormalEdge = true;
        continue;
      }
      // A labeled empty statement, or the entry block...
      HasPlainEdge = true;
      continue;
    }

    CFGStmt CS = cast<CFGStmt>(*ri);
    const Stmt *S = CS.getStmt();
    if (isa<ReturnStmt>(S)) {
      HasLiveReturn = true;
      continue;
    }
    if (isa<ObjCAtThrowStmt>(S)) {
      HasFakeEdge = true;
      continue;
    }
    if (isa<CXXThrowExpr>(S)) {
      HasFakeEdge = true;
      continue;
    }
    if (const AsmStmt *AS = dyn_cast<AsmStmt>(S)) {
      if (AS->isMSAsm()) {
        HasFakeEdge = true;
        HasLiveReturn = true;
        continue;
      }
    }
    if (isa<CXXTryStmt>(S)) {
      HasAbnormalEdge = true;
      continue;
    }

    bool NoReturnEdge = false;
    if (const CallExpr *C = dyn_cast<CallExpr>(S)) {
      if (std::find(B.succ_begin(), B.succ_end(), &cfg->getExit())
            == B.succ_end()) {
        HasAbnormalEdge = true;
        continue;
      }
      const Expr *CEE = C->getCallee()->IgnoreParenCasts();
      QualType calleeType = CEE->getType();
      if (calleeType == AC.getASTContext().BoundMemberTy) {
        calleeType = Expr::findBoundMemberType(CEE);
        assert(!calleeType.isNull() && "analyzing unresolved call?");
      }
      if (getFunctionExtInfo(calleeType).getNoReturn()) {
        NoReturnEdge = true;
        HasFakeEdge = true;
      } else if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(CEE)) {
        const ValueDecl *VD = DRE->getDecl();
        if (VD->hasAttr<NoReturnAttr>()) {
          NoReturnEdge = true;
          HasFakeEdge = true;
        }
      }
    }
    // FIXME: Add noreturn message sends.
    if (NoReturnEdge == false)
      HasPlainEdge = true;
  }
  if (!HasPlainEdge) {
    if (HasLiveReturn)
      return NeverFallThrough;
    return NeverFallThroughOrReturn;
  }
  if (HasAbnormalEdge || HasFakeEdge || HasLiveReturn)
    return MaybeFallThrough;
  // This says AlwaysFallThrough for calls to functions that are not marked
  // noreturn, that don't return.  If people would like this warning to be more
  // accurate, such functions should be marked as noreturn.
  return AlwaysFallThrough;
}

namespace {

struct CheckFallThroughDiagnostics {
  unsigned diag_MaybeFallThrough_HasNoReturn;
  unsigned diag_MaybeFallThrough_ReturnsNonVoid;
  unsigned diag_AlwaysFallThrough_HasNoReturn;
  unsigned diag_AlwaysFallThrough_ReturnsNonVoid;
  unsigned diag_NeverFallThroughOrReturn;
  bool funMode;
  SourceLocation FuncLoc;

  static CheckFallThroughDiagnostics MakeForFunction(const Decl *Func) {
    CheckFallThroughDiagnostics D;
    D.FuncLoc = Func->getLocation();
    D.diag_MaybeFallThrough_HasNoReturn =
      diag::warn_falloff_noreturn_function;
    D.diag_MaybeFallThrough_ReturnsNonVoid =
      diag::warn_maybe_falloff_nonvoid_function;
    D.diag_AlwaysFallThrough_HasNoReturn =
      diag::warn_falloff_noreturn_function;
    D.diag_AlwaysFallThrough_ReturnsNonVoid =
      diag::warn_falloff_nonvoid_function;

    // Don't suggest that virtual functions be marked "noreturn", since they
    // might be overridden by non-noreturn functions.
    bool isVirtualMethod = false;
    if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(Func))
      isVirtualMethod = Method->isVirtual();
    
    if (!isVirtualMethod)
      D.diag_NeverFallThroughOrReturn =
        diag::warn_suggest_noreturn_function;
    else
      D.diag_NeverFallThroughOrReturn = 0;
    
    D.funMode = true;
    return D;
  }

  static CheckFallThroughDiagnostics MakeForBlock() {
    CheckFallThroughDiagnostics D;
    D.diag_MaybeFallThrough_HasNoReturn =
      diag::err_noreturn_block_has_return_expr;
    D.diag_MaybeFallThrough_ReturnsNonVoid =
      diag::err_maybe_falloff_nonvoid_block;
    D.diag_AlwaysFallThrough_HasNoReturn =
      diag::err_noreturn_block_has_return_expr;
    D.diag_AlwaysFallThrough_ReturnsNonVoid =
      diag::err_falloff_nonvoid_block;
    D.diag_NeverFallThroughOrReturn =
      diag::warn_suggest_noreturn_block;
    D.funMode = false;
    return D;
  }

  bool checkDiagnostics(Diagnostic &D, bool ReturnsVoid,
                        bool HasNoReturn) const {
    if (funMode) {
      return (ReturnsVoid ||
              D.getDiagnosticLevel(diag::warn_maybe_falloff_nonvoid_function,
                                   FuncLoc) == Diagnostic::Ignored)
        && (!HasNoReturn ||
            D.getDiagnosticLevel(diag::warn_noreturn_function_has_return_expr,
                                 FuncLoc) == Diagnostic::Ignored)
        && (!ReturnsVoid ||
            D.getDiagnosticLevel(diag::warn_suggest_noreturn_block, FuncLoc)
              == Diagnostic::Ignored);
    }

    // For blocks.
    return  ReturnsVoid && !HasNoReturn
            && (!ReturnsVoid ||
                D.getDiagnosticLevel(diag::warn_suggest_noreturn_block, FuncLoc)
                  == Diagnostic::Ignored);
  }
};

}

/// CheckFallThroughForFunctionDef - Check that we don't fall off the end of a
/// function that should return a value.  Check that we don't fall off the end
/// of a noreturn function.  We assume that functions and blocks not marked
/// noreturn will return.
static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body,
                                    const BlockExpr *blkExpr,
                                    const CheckFallThroughDiagnostics& CD,
                                    AnalysisContext &AC) {

  bool ReturnsVoid = false;
  bool HasNoReturn = false;

  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
    ReturnsVoid = FD->getResultType()->isVoidType();
    HasNoReturn = FD->hasAttr<NoReturnAttr>() ||
       FD->getType()->getAs<FunctionType>()->getNoReturnAttr();
  }
  else if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) {
    ReturnsVoid = MD->getResultType()->isVoidType();
    HasNoReturn = MD->hasAttr<NoReturnAttr>();
  }
  else if (isa<BlockDecl>(D)) {
    QualType BlockTy = blkExpr->getType();
    if (const FunctionType *FT =
          BlockTy->getPointeeType()->getAs<FunctionType>()) {
      if (FT->getResultType()->isVoidType())
        ReturnsVoid = true;
      if (FT->getNoReturnAttr())
        HasNoReturn = true;
    }
  }

  Diagnostic &Diags = S.getDiagnostics();

  // Short circuit for compilation speed.
  if (CD.checkDiagnostics(Diags, ReturnsVoid, HasNoReturn))
      return;

  // FIXME: Function try block
  if (const CompoundStmt *Compound = dyn_cast<CompoundStmt>(Body)) {
    switch (CheckFallThrough(AC)) {
      case UnknownFallThrough:
        break;

      case MaybeFallThrough:
        if (HasNoReturn)
          S.Diag(Compound->getRBracLoc(),
                 CD.diag_MaybeFallThrough_HasNoReturn);
        else if (!ReturnsVoid)
          S.Diag(Compound->getRBracLoc(),
                 CD.diag_MaybeFallThrough_ReturnsNonVoid);
        break;
      case AlwaysFallThrough:
        if (HasNoReturn)
          S.Diag(Compound->getRBracLoc(),
                 CD.diag_AlwaysFallThrough_HasNoReturn);
        else if (!ReturnsVoid)
          S.Diag(Compound->getRBracLoc(),
                 CD.diag_AlwaysFallThrough_ReturnsNonVoid);
        break;
      case NeverFallThroughOrReturn:
        if (ReturnsVoid && !HasNoReturn && CD.diag_NeverFallThroughOrReturn) {
          if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
            S.Diag(Compound->getLBracLoc(), CD.diag_NeverFallThroughOrReturn)
              << FD;
          } else {
            S.Diag(Compound->getLBracLoc(), CD.diag_NeverFallThroughOrReturn);
          }
        }
        break;
      case NeverFallThrough:
        break;
    }
  }
}

//===----------------------------------------------------------------------===//
// -Wuninitialized
//===----------------------------------------------------------------------===//

namespace {
/// ContainsReference - A visitor class to search for references to
/// a particular declaration (the needle) within any evaluated component of an
/// expression (recursively).
class ContainsReference : public EvaluatedExprVisitor<ContainsReference> {
  bool FoundReference;
  const DeclRefExpr *Needle;

public:
  ContainsReference(ASTContext &Context, const DeclRefExpr *Needle)
    : EvaluatedExprVisitor<ContainsReference>(Context),
      FoundReference(false), Needle(Needle) {}

  void VisitExpr(Expr *E) {
    // Stop evaluating if we already have a reference.
    if (FoundReference)
      return;

    EvaluatedExprVisitor<ContainsReference>::VisitExpr(E);
  }

  void VisitDeclRefExpr(DeclRefExpr *E) {
    if (E == Needle)
      FoundReference = true;
    else
      EvaluatedExprVisitor<ContainsReference>::VisitDeclRefExpr(E);
  }

  bool doesContainReference() const { return FoundReference; }
};
}

/// DiagnoseUninitializedUse -- Helper function for diagnosing uses of an
/// uninitialized variable. This manages the different forms of diagnostic
/// emitted for particular types of uses. Returns true if the use was diagnosed
/// as a warning. If a pariticular use is one we omit warnings for, returns
/// false.
static bool DiagnoseUninitializedUse(Sema &S, const VarDecl *VD,
                                     const Expr *E, bool isAlwaysUninit) {
  bool isSelfInit = false;

  if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
    if (isAlwaysUninit) {
      // Inspect the initializer of the variable declaration which is
      // being referenced prior to its initialization. We emit
      // specialized diagnostics for self-initialization, and we
      // specifically avoid warning about self references which take the
      // form of:
      //
      //   int x = x;
      //
      // This is used to indicate to GCC that 'x' is intentionally left
      // uninitialized. Proven code paths which access 'x' in
      // an uninitialized state after this will still warn.
      //
      // TODO: Should we suppress maybe-uninitialized warnings for
      // variables initialized in this way?
      if (const Expr *Initializer = VD->getInit()) {
        if (DRE == Initializer->IgnoreParenImpCasts())
          return false;

        ContainsReference CR(S.Context, DRE);
        CR.Visit(const_cast<Expr*>(Initializer));
        isSelfInit = CR.doesContainReference();
      }
      if (isSelfInit) {
        S.Diag(DRE->getLocStart(),
               diag::warn_uninit_self_reference_in_init)
        << VD->getDeclName() << VD->getLocation() << DRE->getSourceRange();
      } else {
        S.Diag(DRE->getLocStart(), diag::warn_uninit_var)
          << VD->getDeclName() << DRE->getSourceRange();
      }
    } else {
      S.Diag(DRE->getLocStart(), diag::warn_maybe_uninit_var)
        << VD->getDeclName() << DRE->getSourceRange();
    }
  } else {
    const BlockExpr *BE = cast<BlockExpr>(E);
    S.Diag(BE->getLocStart(),
           isAlwaysUninit ? diag::warn_uninit_var_captured_by_block
                          : diag::warn_maybe_uninit_var_captured_by_block)
      << VD->getDeclName();
  }

  // Report where the variable was declared when the use wasn't within
  // the initializer of that declaration.
  if (!isSelfInit)
    S.Diag(VD->getLocStart(), diag::note_uninit_var_def)
      << VD->getDeclName();

  return true;
}

static void SuggestInitializationFixit(Sema &S, const VarDecl *VD) {
  // Don't issue a fixit if there is already an initializer.
  if (VD->getInit())
    return;

  // Suggest possible initialization (if any).
  const char *initialization = 0;
  QualType VariableTy = VD->getType().getCanonicalType();

  if (VariableTy->isObjCObjectPointerType() ||
      VariableTy->isBlockPointerType()) {
    // Check if 'nil' is defined.
    if (S.PP.getMacroInfo(&S.getASTContext().Idents.get("nil")))
      initialization = " = nil";
    else
      initialization = " = 0";
  }
  else if (VariableTy->isRealFloatingType())
    initialization = " = 0.0";
  else if (VariableTy->isBooleanType() && S.Context.getLangOptions().CPlusPlus)
    initialization = " = false";
  else if (VariableTy->isEnumeralType())
    return;
  else if (VariableTy->isPointerType() || VariableTy->isMemberPointerType()) {
    if (S.Context.getLangOptions().CPlusPlus0x)
      initialization = " = nullptr";
    // Check if 'NULL' is defined.
    else if (S.PP.getMacroInfo(&S.getASTContext().Idents.get("NULL")))
      initialization = " = NULL";
    else
      initialization = " = 0";
  }
  else if (VariableTy->isScalarType())
    initialization = " = 0";

  if (initialization) {
    SourceLocation loc = S.PP.getLocForEndOfToken(VD->getLocEnd());
    S.Diag(loc, diag::note_var_fixit_add_initialization)
      << FixItHint::CreateInsertion(loc, initialization);
  }
}

typedef std::pair<const Expr*, bool> UninitUse;

namespace {
struct SLocSort {
  bool operator()(const UninitUse &a, const UninitUse &b) {
    SourceLocation aLoc = a.first->getLocStart();
    SourceLocation bLoc = b.first->getLocStart();
    return aLoc.getRawEncoding() < bLoc.getRawEncoding();
  }
};

class UninitValsDiagReporter : public UninitVariablesHandler {
  Sema &S;
  typedef SmallVector<UninitUse, 2> UsesVec;
  typedef llvm::DenseMap<const VarDecl *, UsesVec*> UsesMap;
  UsesMap *uses;
  
public:
  UninitValsDiagReporter(Sema &S) : S(S), uses(0) {}
  ~UninitValsDiagReporter() { 
    flushDiagnostics();
  }
  
  void handleUseOfUninitVariable(const Expr *ex, const VarDecl *vd,
                                 bool isAlwaysUninit) {
    if (!uses)
      uses = new UsesMap();
    
    UsesVec *&vec = (*uses)[vd];
    if (!vec)
      vec = new UsesVec();
    
    vec->push_back(std::make_pair(ex, isAlwaysUninit));
  }
  
  void flushDiagnostics() {
    if (!uses)
      return;
    
    for (UsesMap::iterator i = uses->begin(), e = uses->end(); i != e; ++i) {
      const VarDecl *vd = i->first;
      UsesVec *vec = i->second;

      // Sort the uses by their SourceLocations.  While not strictly
      // guaranteed to produce them in line/column order, this will provide
      // a stable ordering.
      std::sort(vec->begin(), vec->end(), SLocSort());
      
      for (UsesVec::iterator vi = vec->begin(), ve = vec->end(); vi != ve;
           ++vi) {
        if (!DiagnoseUninitializedUse(S, vd, vi->first,
                                      /*isAlwaysUninit=*/vi->second))
          continue;

        SuggestInitializationFixit(S, vd);

        // Skip further diagnostics for this variable. We try to warn only on
        // the first point at which a variable is used uninitialized.
        break;
      }

      delete vec;
    }
    delete uses;
  }
};
}


//===----------------------------------------------------------------------===//
// -Wthread-safety
//===----------------------------------------------------------------------===//

namespace {
/// \brief Implements a set of CFGBlocks using a BitVector.
///
/// This class contains a minimal interface, primarily dictated by the SetType
/// template parameter of the llvm::po_iterator template, as used with external
/// storage. We also use this set to keep track of which CFGBlocks we visit
/// during the analysis.
class CFGBlockSet {
  llvm::BitVector VisitedBlockIDs;

public:
  // po_iterator requires this iterator, but the only interface needed is the
  // value_type typedef.
  struct iterator {
    typedef const CFGBlock *value_type;
  };

  CFGBlockSet() {}
  CFGBlockSet(const CFG *G) : VisitedBlockIDs(G->getNumBlockIDs(), false) {}

  /// \brief Set the bit associated with a particular CFGBlock.
  /// This is the important method for the SetType template parameter.
  bool insert(const CFGBlock *Block) {
    // Note that insert() is called by po_iterator, which doesn't check to make
    // sure that Block is non-null.  Moreover, the CFGBlock iterator will
    // occasionally hand out null pointers for pruned edges, so we catch those
    // here.
    if (Block == 0)
      return false;  // if an edge is trivially false.
    if (VisitedBlockIDs.test(Block->getBlockID()))
      return false;
    VisitedBlockIDs.set(Block->getBlockID());
    return true;
  }

  /// \brief Check if the bit for a CFGBlock has been already set.
  /// This method is for tracking visited blocks in the main threadsafety loop.
  /// Block must not be null.
  bool alreadySet(const CFGBlock *Block) {
    return VisitedBlockIDs.test(Block->getBlockID());
  }
};

/// \brief We create a helper class which we use to iterate through CFGBlocks in
/// the topological order.
class TopologicallySortedCFG {
  typedef llvm::po_iterator<const CFG*, CFGBlockSet, true>  po_iterator;

  std::vector<const CFGBlock*> Blocks;

public:
  typedef std::vector<const CFGBlock*>::reverse_iterator iterator;

  TopologicallySortedCFG(const CFG *CFGraph) {
    Blocks.reserve(CFGraph->getNumBlockIDs());
    CFGBlockSet BSet(CFGraph);

    for (po_iterator I = po_iterator::begin(CFGraph, BSet),
         E = po_iterator::end(CFGraph, BSet); I != E; ++I) {
      Blocks.push_back(*I);
    }
  }

  iterator begin() {
    return Blocks.rbegin();
  }

  iterator end() {
    return Blocks.rend();
  }
};

/// \brief A MutexID object uniquely identifies a particular mutex, and
/// is built from an Expr* (i.e. calling a lock function).
///
/// Thread-safety analysis works by comparing lock expressions.  Within the
/// body of a function, an expression such as "x->foo->bar.mu" will resolve to
/// a particular mutex object at run-time.  Subsequent occurrences of the same
/// expression (where "same" means syntactic equality) will refer to the same
/// run-time object if three conditions hold:
/// (1) Local variables in the expression, such as "x" have not changed.
/// (2) Values on the heap that affect the expression have not changed.
/// (3) The expression involves only pure function calls.
/// The current implementation assumes, but does not verify, that multiple uses
/// of the same lock expression satisfies these criteria.
///
/// Clang introduces an additional wrinkle, which is that it is difficult to
/// derive canonical expressions, or compare expressions directly for equality.
/// Thus, we identify a mutex not by an Expr, but by the set of named
/// declarations that are referenced by the Expr.  In other words,
/// x->foo->bar.mu will be a four element vector with the Decls for
/// mu, bar, and foo, and x.  The vector will uniquely identify the expression
/// for all practical purposes.
///
/// Note we will need to perform substitution on "this" and function parameter
/// names when constructing a lock expression.
///
/// For example:
/// class C { Mutex Mu;  void lock() EXCLUSIVE_LOCK_FUNCTION(this->Mu); };
/// void myFunc(C *X) { ... X->lock() ... }
/// The original expression for the mutex acquired by myFunc is "this->Mu", but
/// "X" is substituted for "this" so we get X->Mu();
///
/// For another example:
/// foo(MyList *L) EXCLUSIVE_LOCKS_REQUIRED(L->Mu) { ... }
/// MyList *MyL;
/// foo(MyL);  // requires lock MyL->Mu to be held
class MutexID {
  SmallVector<NamedDecl*, 2> DeclSeq;
 
  /// Build a Decl sequence representing the lock from the given expression.
  /// Recursive function that bottoms out when the final DeclRefExpr is reached.
  void buildMutexID(Expr *Exp) {
    if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Exp)) {
      NamedDecl *ND = cast<NamedDecl>(DRE->getDecl()->getCanonicalDecl());
      DeclSeq.push_back(ND);
    } else if (MemberExpr *ME = dyn_cast<MemberExpr>(Exp)) {
      NamedDecl *ND = ME->getMemberDecl();
      DeclSeq.push_back(ND);
      buildMutexID(ME->getBase());
    } else if (isa<CXXThisExpr>(Exp)) {
      return;
    } else {
      // FIXME: add diagnostic
      llvm::report_fatal_error("Expected lock expression!");
    }
  }

public:
  MutexID(Expr *LExpr) {
    buildMutexID(LExpr);
    assert(!DeclSeq.empty());
  }

  bool operator==(const MutexID &other) const {
    return DeclSeq == other.DeclSeq;
  }

  bool operator!=(const MutexID &other) const {
    return !(*this == other);
  }

  // SmallVector overloads Operator< to do lexicographic ordering. Note that
  // we use pointer equality (and <) to compare NamedDecls. This means the order
  // of MutexIDs in a lockset is nondeterministic. In order to output
  // diagnostics in a deterministic ordering, we must order all diagnostics to
  // output by SourceLocation when iterating through this lockset.
  bool operator<(const MutexID &other) const {
    return DeclSeq < other.DeclSeq;
  }

  /// \brief Returns the name of the first Decl in the list for a given MutexID;
  /// e.g. the lock expression foo.bar() has name "bar".
  /// The caret will point unambiguously to the lock expression, so using this
  /// name in diagnostics is a way to get simple, and consistent, mutex names.
  /// We do not want to output the entire expression text for security reasons.
  StringRef getName() const {
    return DeclSeq.front()->getName();
  }

  void Profile(llvm::FoldingSetNodeID &ID) const {
    for (SmallVectorImpl<NamedDecl*>::const_iterator I = DeclSeq.begin(),
         E = DeclSeq.end(); I != E; ++I) {
      ID.AddPointer(*I);
    }
  }
};

enum LockKind {
  LK_Shared,
  LK_Exclusive
};

enum AccessKind {
  AK_Read,
  AK_Written
};

/// \brief This is a helper class that stores info about the most recent
/// accquire of a Lock.
///
/// The main body of the analysis maps MutexIDs to LockDatas.
struct LockData {
  SourceLocation AcquireLoc;

  /// \brief LKind stores whether a lock is held shared or exclusively.
  /// Note that this analysis does not currently support either re-entrant
  /// locking or lock "upgrading" and "downgrading" between exclusive and
  /// shared.
  ///
  /// FIXME: add support for re-entrant locking and lock up/downgrading
  LockKind LKind;

  LockData(SourceLocation AcquireLoc, LockKind LKind)
    : AcquireLoc(AcquireLoc), LKind(LKind) {}

  bool operator==(const LockData &other) const {
    return AcquireLoc == other.AcquireLoc && LKind == other.LKind;
  }

  bool operator!=(const LockData &other) const {
    return !(*this == other);
  }

  void Profile(llvm::FoldingSetNodeID &ID) const {
      ID.AddInteger(AcquireLoc.getRawEncoding());
      ID.AddInteger(LKind);
    }
};

/// A Lockset maps each MutexID (defined above) to information about how it has
/// been locked.
typedef llvm::ImmutableMap<MutexID, LockData> Lockset;

/// \brief We use this class to visit different types of expressions in
/// CFGBlocks, and build up the lockset.
/// An expression may cause us to add or remove locks from the lockset, or else
/// output error messages related to missing locks.
/// FIXME: In future, we may be able to not inherit from a visitor.
class BuildLockset : public StmtVisitor<BuildLockset> {
  Sema &S;
  Lockset LSet;
  Lockset::Factory &LocksetFactory;

  // Helper functions
  void removeLock(SourceLocation UnlockLoc, Expr *LockExp);
  void addLock(SourceLocation LockLoc, Expr *LockExp, LockKind LK);
  const ValueDecl *getValueDecl(Expr *Exp);
  void warnIfMutexNotHeld (const NamedDecl *D, Expr *Exp, AccessKind AK,
                           MutexID &Mutex, unsigned DiagID);
  void checkAccess(Expr *Exp, AccessKind AK);
  void checkDereference(Expr *Exp, AccessKind AK);

  template <class AttrType>
  void addLocksToSet(LockKind LK, Attr *Attr, CXXMemberCallExpr *Exp);

  /// \brief Returns true if the lockset contains a lock, regardless of whether
  /// the lock is held exclusively or shared.
  bool locksetContains(MutexID Lock) {
    return LSet.lookup(Lock);
  }

  /// \brief Returns true if the lockset contains a lock with the passed in
  /// locktype.
  bool locksetContains(MutexID Lock, LockKind KindRequested) const {
    const LockData *LockHeld = LSet.lookup(Lock);
    return (LockHeld && KindRequested == LockHeld->LKind);
  }

public:
  BuildLockset(Sema &S, Lockset LS, Lockset::Factory &F)
    : StmtVisitor<BuildLockset>(), S(S), LSet(LS),
      LocksetFactory(F) {}

  Lockset getLockset() {
    return LSet;
  }

  void VisitUnaryOperator(UnaryOperator *UO);
  void VisitBinaryOperator(BinaryOperator *BO);
  void VisitCastExpr(CastExpr *CE);
  void VisitCXXMemberCallExpr(CXXMemberCallExpr *Exp);
};

/// \brief Add a new lock to the lockset, warning if the lock is already there.
/// \param LockLoc The source location of the acquire
/// \param LockExp The lock expression corresponding to the lock to be added
void BuildLockset::addLock(SourceLocation LockLoc, Expr *LockExp,
                           LockKind LK) {
  // FIXME: deal with acquired before/after annotations
  MutexID Mutex(LockExp);
  LockData NewLock(LockLoc, LK);

  // FIXME: Don't always warn when we have support for reentrant locks.
  if (locksetContains(Mutex))
    S.Diag(LockLoc, diag::warn_double_lock) << Mutex.getName();
  LSet = LocksetFactory.add(LSet, Mutex, NewLock);
}

/// \brief Remove a lock from the lockset, warning if the lock is not there.
/// \param LockExp The lock expression corresponding to the lock to be removed
/// \param UnlockLoc The source location of the unlock (only used in error msg)
void BuildLockset::removeLock(SourceLocation UnlockLoc, Expr *LockExp) {
  MutexID Mutex(LockExp);

  Lockset NewLSet = LocksetFactory.remove(LSet, Mutex);
  if(NewLSet == LSet)
    S.Diag(UnlockLoc, diag::warn_unlock_but_no_lock) << Mutex.getName();

  LSet = NewLSet;
}

/// \brief Gets the value decl pointer from DeclRefExprs or MemberExprs
const ValueDecl *BuildLockset::getValueDecl(Expr *Exp) {
  if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Exp))
    return DR->getDecl();

  if (const MemberExpr *ME = dyn_cast<MemberExpr>(Exp))
    return ME->getMemberDecl();

  return 0;
}

/// \brief Warn if the LSet does not contain a lock sufficient to protect access
/// of at least the passed in AccessType.
void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, Expr *Exp,
                                      AccessKind AK, MutexID &Mutex,
                                      unsigned DiagID) {
  switch (AK) {
    case AK_Read:
      if (!locksetContains(Mutex))
        S.Diag(Exp->getExprLoc(), DiagID)
          << D->getName() << Mutex.getName() << LK_Shared;
      break;
    case AK_Written :
      if (!locksetContains(Mutex, LK_Exclusive))
        S.Diag(Exp->getExprLoc(), DiagID)
          << D->getName() << Mutex.getName() << LK_Exclusive;
      break;
  }
}


/// \brief This method identifies variable dereferences and checks pt_guarded_by
/// and pt_guarded_var annotations. Note that we only check these annotations
/// at the time a pointer is dereferenced.
/// FIXME: We need to check for other types of pointer dereferences
/// (e.g. [], ->) and deal with them here.
/// \param Exp An expression that has been read or written.
void BuildLockset::checkDereference(Expr *Exp, AccessKind AK) {
  UnaryOperator *UO = dyn_cast<UnaryOperator>(Exp);
  if (!UO || UO->getOpcode() != clang::UO_Deref)
    return;
  Exp = UO->getSubExpr()->IgnoreParenCasts();

  const ValueDecl *D = getValueDecl(Exp);
  if(!D || !D->hasAttrs())
    return;

  if (D->getAttr<PtGuardedVarAttr>() && LSet.isEmpty())
    S.Diag(Exp->getExprLoc(), diag::warn_var_deref_requires_any_lock)
      << D->getName();

  const AttrVec &ArgAttrs = D->getAttrs();
  for(unsigned i = 0, Size = ArgAttrs.size(); i < Size; ++i) {
    if (ArgAttrs[i]->getKind() != attr::PtGuardedBy)
      continue;
    const PtGuardedByAttr *PGBAttr = cast<PtGuardedByAttr>(ArgAttrs[i]);
    MutexID Mutex(PGBAttr->getArg());
    warnIfMutexNotHeld(D, Exp, AK, Mutex, diag::warn_var_deref_requires_lock);
  }
}

/// \brief Checks guarded_by and guarded_var attributes.
/// Whenever we identify an access (read or write) of a DeclRefExpr or
/// MemberExpr, we need to check whether there are any guarded_by or
/// guarded_var attributes, and make sure we hold the appropriate mutexes.
void BuildLockset::checkAccess(Expr *Exp, AccessKind AK) {
  const ValueDecl *D = getValueDecl(Exp);
  if(!D || !D->hasAttrs())
    return;

  if (D->getAttr<GuardedVarAttr>() && LSet.isEmpty())
    S.Diag(Exp->getExprLoc(), diag::warn_variable_requires_any_lock)
      << D->getName();

  const AttrVec &ArgAttrs = D->getAttrs();
  for(unsigned i = 0, Size = ArgAttrs.size(); i < Size; ++i) {
    if (ArgAttrs[i]->getKind() != attr::GuardedBy)
      continue;
    const GuardedByAttr *GBAttr = cast<GuardedByAttr>(ArgAttrs[i]);
    MutexID Mutex(GBAttr->getArg());
    warnIfMutexNotHeld(D, Exp, AK, Mutex, diag::warn_variable_requires_lock);
  }
}

/// \brief For unary operations which read and write a variable, we need to
/// check whether we hold any required mutexes. Reads are checked in
/// VisitCastExpr.
void BuildLockset::VisitUnaryOperator(UnaryOperator *UO) {
  switch (UO->getOpcode()) {
    case clang::UO_PostDec:
    case clang::UO_PostInc:
    case clang::UO_PreDec:
    case clang::UO_PreInc: {
      Expr *SubExp = UO->getSubExpr()->IgnoreParenCasts();
      checkAccess(SubExp, AK_Written);
      checkDereference(SubExp, AK_Written);
      break;
    }
    default:
      break;
  }
}