Newer
Older
//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements extra semantic analysis beyond what is enforced
// by the C type system.
//
//===----------------------------------------------------------------------===//
#include "Sema.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/Preprocessor.h"
using namespace clang;
Chris Lattner
committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/// getLocationOfStringLiteralByte - Return a source location that points to the
/// specified byte of the specified string literal.
///
/// Strings are amazingly complex. They can be formed from multiple tokens and
/// can have escape sequences in them in addition to the usual trigraph and
/// escaped newline business. This routine handles this complexity.
///
SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
unsigned ByteNo) const {
assert(!SL->isWide() && "This doesn't work for wide strings yet");
// Loop over all of the tokens in this string until we find the one that
// contains the byte we're looking for.
unsigned TokNo = 0;
while (1) {
assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
// Get the spelling of the string so that we can get the data that makes up
// the string literal, not the identifier for the macro it is potentially
// expanded through.
SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
// Re-lex the token to get its length and original spelling.
std::pair<FileID, unsigned> LocInfo =
SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
std::pair<const char *,const char *> Buffer =
SourceMgr.getBufferData(LocInfo.first);
const char *StrData = Buffer.first+LocInfo.second;
// Create a langops struct and enable trigraphs. This is sufficient for
// relexing tokens.
LangOptions LangOpts;
LangOpts.Trigraphs = true;
// Create a lexer starting at the beginning of this token.
Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
Buffer.second);
Token TheTok;
TheLexer.LexFromRawLexer(TheTok);
// Use the StringLiteralParser to compute the length of the string in bytes.
StringLiteralParser SLP(&TheTok, 1, PP);
unsigned TokNumBytes = SLP.GetStringLength();
Chris Lattner
committed
// If the byte is in this token, return the location of the byte.
Chris Lattner
committed
if (ByteNo < TokNumBytes ||
(ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
unsigned Offset =
StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP);
// Now that we know the offset of the token in the spelling, use the
// preprocessor to get the offset in the original source.
return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
Chris Lattner
committed
}
// Move to the next string token.
++TokNo;
ByteNo -= TokNumBytes;
}
}
/// CheckFunctionCall - Check a direct function call for various correctness
/// and safety properties not strictly enforced by the C type system.
Action::OwningExprResult
Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
OwningExprResult TheCallResult(Owned(TheCall));
// Get the IdentifierInfo* for the called function.
IdentifierInfo *FnInfo = FDecl->getIdentifier();
// None of the checks below are needed for functions that don't have
// simple names (e.g., C++ conversion functions).
if (!FnInfo)
return move(TheCallResult);
switch (FDecl->getBuiltinID(Context)) {
case Builtin::BI__builtin___CFStringMakeConstantString:
assert(TheCall->getNumArgs() == 1 &&
"Wrong # arguments to builtin CFStringMakeConstantString");
if (CheckObjCString(TheCall->getArg(0)))
return ExprError();
return move(TheCallResult);
case Builtin::BI__builtin_stdarg_start:
case Builtin::BI__builtin_va_start:
if (SemaBuiltinVAStart(TheCall))
return ExprError();
return move(TheCallResult);
case Builtin::BI__builtin_isgreater:
case Builtin::BI__builtin_isgreaterequal:
case Builtin::BI__builtin_isless:
case Builtin::BI__builtin_islessequal:
case Builtin::BI__builtin_islessgreater:
case Builtin::BI__builtin_isunordered:
if (SemaBuiltinUnorderedCompare(TheCall))
return ExprError();
return move(TheCallResult);
case Builtin::BI__builtin_return_address:
case Builtin::BI__builtin_frame_address:
if (SemaBuiltinStackAddress(TheCall))
return ExprError();
return move(TheCallResult);
case Builtin::BI__builtin_shufflevector:
return SemaBuiltinShuffleVector(TheCall);
// TheCall will be freed by the smart pointer here, but that's fine, since
// SemaBuiltinShuffleVector guts it, but then doesn't release it.
case Builtin::BI__builtin_prefetch:
if (SemaBuiltinPrefetch(TheCall))
return ExprError();
return move(TheCallResult);
case Builtin::BI__builtin_object_size:
if (SemaBuiltinObjectSize(TheCall))
return ExprError();
// FIXME: This mechanism should be abstracted to be less fragile and
// more efficient. For example, just map function ids to custom
// handlers.
// Printf checking.
if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
if (Format->getType() == "printf") {
Ted Kremenek
committed
bool HasVAListArg = Format->getFirstArg() == 0;
if (!HasVAListArg) {
if (const FunctionProtoType *Proto
= FDecl->getType()->getAsFunctionProtoType())
HasVAListArg = !Proto->isVariadic();
Ted Kremenek
committed
}
CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
Ted Kremenek
committed
HasVAListArg ? 0 : Format->getFirstArg() - 1);
return move(TheCallResult);
/// CheckObjCString - Checks that the argument to the builtin
/// CFString constructor is correct
bool Sema::CheckObjCString(Expr *Arg) {
Arg = Arg->IgnoreParenCasts();
StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
if (!Literal || Literal->isWide()) {
Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
<< Arg->getSourceRange();
return true;
}
const char *Data = Literal->getStrData();
unsigned Length = Literal->getByteLength();
for (unsigned i = 0; i < Length; ++i) {
if (!Data[i]) {
Chris Lattner
committed
Diag(getLocationOfStringLiteralByte(Literal, i),
diag::warn_cfstring_literal_contains_nul_character)
<< Arg->getSourceRange();
return false;
}
/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
/// Emit an error and return true on failure, return false on success.
bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
Expr *Fn = TheCall->getCallee();
if (TheCall->getNumArgs() > 2) {
Diag(TheCall->getArg(2)->getLocStart(),
diag::err_typecheck_call_too_many_args)
<< 0 /*function call*/ << Fn->getSourceRange()
<< SourceRange(TheCall->getArg(2)->getLocStart(),
(*(TheCall->arg_end()-1))->getLocEnd());
return true;
}
if (TheCall->getNumArgs() < 2) {
return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
<< 0 /*function call*/;
}
// Determine whether the current function is variadic or not.
bool isVariadic;
if (getCurFunctionDecl()) {
if (FunctionProtoType* FTP =
dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType()))
isVariadic = FTP->isVariadic();
else
isVariadic = false;
} else {
Argyrios Kyrtzidis
committed
isVariadic = getCurMethodDecl()->isVariadic();
Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
return true;
}
// Verify that the second argument to the builtin is the last argument of the
// current function or method.
bool SecondArgIsLastNamedArgument = false;
const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
Anders Carlsson
committed
if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
// FIXME: This isn't correct for methods (results in bogus warning).
// Get the last formal in the current function.
Anders Carlsson
committed
const ParmVarDecl *LastArg;
if (FunctionDecl *FD = getCurFunctionDecl())
LastArg = *(FD->param_end()-1);
Argyrios Kyrtzidis
committed
LastArg = *(getCurMethodDecl()->param_end()-1);
SecondArgIsLastNamedArgument = PV == LastArg;
}
}
if (!SecondArgIsLastNamedArgument)
Diag(TheCall->getArg(1)->getLocStart(),
diag::warn_second_parameter_of_va_start_not_last_named_argument);
return false;
}
/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
/// friends. This is declared to take (...), so we have to check everything.
bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
if (TheCall->getNumArgs() < 2)
return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
<< 0 /*function call*/;
if (TheCall->getNumArgs() > 2)
return Diag(TheCall->getArg(2)->getLocStart(),
diag::err_typecheck_call_too_many_args)
<< SourceRange(TheCall->getArg(2)->getLocStart(),
(*(TheCall->arg_end()-1))->getLocEnd());
Expr *OrigArg0 = TheCall->getArg(0);
Expr *OrigArg1 = TheCall->getArg(1);
// Do standard promotions between the two arguments, returning their common
// type.
QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
// Make sure any conversions are pushed back into the call; this is
// type safe since unordered compare builtins are declared as "_Bool
// foo(...)".
TheCall->setArg(0, OrigArg0);
TheCall->setArg(1, OrigArg1);
// If the common type isn't a real floating type, then the arguments were
// invalid for this operation.
if (!Res->isRealFloatingType())
return Diag(OrigArg0->getLocStart(),
diag::err_typecheck_call_invalid_ordered_compare)
<< OrigArg0->getType() << OrigArg1->getType()
<< SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
return false;
}
bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
// The signature for these builtins is exact; the only thing we need
// to check is that the argument is a constant.
SourceLocation Loc;
if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
return false;
}
/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
// This is declared to take (...), so we have to check everything.
Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
if (TheCall->getNumArgs() < 3)
return ExprError(Diag(TheCall->getLocEnd(),
diag::err_typecheck_call_too_few_args)
<< 0 /*function call*/ << TheCall->getSourceRange());
QualType FAType = TheCall->getArg(0)->getType();
QualType SAType = TheCall->getArg(1)->getType();
if (!FAType->isVectorType() || !SAType->isVectorType()) {
Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
<< SourceRange(TheCall->getArg(0)->getLocStart(),
TheCall->getArg(1)->getLocEnd());
}
if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
Context.getCanonicalType(SAType).getUnqualifiedType()) {
Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
<< SourceRange(TheCall->getArg(0)->getLocStart(),
TheCall->getArg(1)->getLocEnd());
}
unsigned numElements = FAType->getAsVectorType()->getNumElements();
if (TheCall->getNumArgs() != numElements+2) {
if (TheCall->getNumArgs() < numElements+2)
return ExprError(Diag(TheCall->getLocEnd(),
diag::err_typecheck_call_too_few_args)
<< 0 /*function call*/ << TheCall->getSourceRange());
return ExprError(Diag(TheCall->getLocEnd(),
diag::err_typecheck_call_too_many_args)
<< 0 /*function call*/ << TheCall->getSourceRange());
}
for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
llvm::APSInt Result(32);
if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
return ExprError(Diag(TheCall->getLocStart(),
diag::err_shufflevector_nonconstant_argument)
<< TheCall->getArg(i)->getSourceRange());
if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
return ExprError(Diag(TheCall->getLocStart(),
diag::err_shufflevector_argument_too_large)
<< TheCall->getArg(i)->getSourceRange());
}
llvm::SmallVector<Expr*, 32> exprs;
for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
exprs.push_back(TheCall->getArg(i));
TheCall->setArg(i, 0);
}
return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2,
FAType,
TheCall->getCallee()->getLocStart(),
TheCall->getRParenLoc()));
}
/// SemaBuiltinPrefetch - Handle __builtin_prefetch.
// This is declared to take (const void*, ...) and can take two
// optional constant int args.
bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
unsigned NumArgs = TheCall->getNumArgs();
if (NumArgs > 3)
return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
<< 0 /*function call*/ << TheCall->getSourceRange();
// Argument 0 is checked for us and the remaining arguments must be
// constant integers.
for (unsigned i = 1; i != NumArgs; ++i) {
Expr *Arg = TheCall->getArg(i);
QualType RWType = Arg->getType();
const BuiltinType *BT = RWType->getAsBuiltinType();
if (!BT || BT->getKind() != BuiltinType::Int ||
!Arg->isIntegerConstantExpr(Result, Context))
return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
<< SourceRange(Arg->getLocStart(), Arg->getLocEnd());
// FIXME: gcc issues a warning and rewrites these to 0. These
// seems especially odd for the third argument since the default
// is 3.
if (i == 1) {
if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
<< "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
} else {
if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
<< "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
return false;
/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
/// int type). This simply type checks that type is one of the defined
/// constants (0-3).
bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
Expr *Arg = TheCall->getArg(1);
QualType ArgType = Arg->getType();
const BuiltinType *BT = ArgType->getAsBuiltinType();
llvm::APSInt Result(32);
if (!BT || BT->getKind() != BuiltinType::Int ||
!Arg->isIntegerConstantExpr(Result, Context)) {
return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
<< SourceRange(Arg->getLocStart(), Arg->getLocEnd());
}
if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
<< "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Ted Kremenek
committed
bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
bool HasVAListArg,
unsigned format_idx, unsigned firstDataArg) {
switch (E->getStmtClass()) {
case Stmt::ConditionalOperatorClass: {
Ted Kremenek
committed
const ConditionalOperator *C = cast<ConditionalOperator>(E);
return SemaCheckStringLiteral(C->getLHS(), TheCall,
HasVAListArg, format_idx, firstDataArg)
HasVAListArg, format_idx, firstDataArg);
Ted Kremenek
committed
const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E);
return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
format_idx, firstDataArg);
Ted Kremenek
committed
const ParenExpr *Expr = cast<ParenExpr>(E);
return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
format_idx, firstDataArg);
Ted Kremenek
committed
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
case Stmt::DeclRefExprClass: {
const DeclRefExpr *DR = cast<DeclRefExpr>(E);
// As an exception, do not flag errors for variables binding to
// const string literals.
if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
bool isConstant = false;
QualType T = DR->getType();
if (const ArrayType *AT = Context.getAsArrayType(T)) {
isConstant = AT->getElementType().isConstant(Context);
}
else if (const PointerType *PT = T->getAsPointerType()) {
isConstant = T.isConstant(Context) &&
PT->getPointeeType().isConstant(Context);
}
if (isConstant) {
const VarDecl *Def = 0;
if (const Expr *Init = VD->getDefinition(Def))
return SemaCheckStringLiteral(Init, TheCall,
HasVAListArg, format_idx, firstDataArg);
}
}
return false;
}
Ted Kremenek
committed
case Stmt::ObjCStringLiteralClass:
case Stmt::StringLiteralClass: {
const StringLiteral *StrE = NULL;
if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E))
Ted Kremenek
committed
StrE = cast<StringLiteral>(E);
CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
firstDataArg);
Ted Kremenek
committed
default:
return false;
/// CheckPrintfArguments - Check calls to printf (and similar functions) for
/// correct use of format strings.
///
/// HasVAListArg - A predicate indicating whether the printf-like
/// function is passed an explicit va_arg argument (e.g., vprintf)
///
/// format_idx - The index into Args for the format string.
///
/// Improper format strings to functions in the printf family can be
/// the source of bizarre bugs and very serious security holes. A
/// good source of information is available in the following paper
/// (which includes additional references):
///
/// FormatGuard: Automatic Protection From printf Format String
/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
///
/// Functionality implemented:
///
/// We can statically check the following properties for string
/// literal format strings for non v.*printf functions (where the
/// arguments are passed directly):
//
/// (1) Are the number of format conversions equal to the number of
/// data arguments?
///
/// (2) Does each format conversion correctly match the type of the
/// corresponding data argument? (TODO)
///
/// Moreover, for all printf functions we can:
///
/// (3) Check for a missing format string (when not caught by type checking).
///
/// (4) Check for no-operation flags; e.g. using "#" with format
/// conversion 'c' (TODO)
///
/// (5) Check the use of '%n', a major source of security holes.
///
/// (6) Check for malformed format conversions that don't specify anything.
///
/// (7) Check for empty format strings. e.g: printf("");
///
/// (8) Check that the format string is a wide literal.
///
/// (9) Also check the arguments of functions with the __format__ attribute.
/// (TODO).
///
/// All of these checks can be done by parsing the format string.
///
/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
void
Ted Kremenek
committed
Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
unsigned format_idx, unsigned firstDataArg) {
Ted Kremenek
committed
const Expr *Fn = TheCall->getCallee();
// CHECK: printf-like function is called with no format string.
if (format_idx >= TheCall->getNumArgs()) {
Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
<< Fn->getSourceRange();
return;
}
Ted Kremenek
committed
const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
Chris Lattner
committed
// CHECK: format string is not a string literal.
//
// Dynamically generated format strings are difficult to
// automatically vet at compile time. Requiring that format strings
// are string literals: (1) permits the checking of format strings by
// the compiler and thereby (2) can practically remove the source of
// many format string exploits.
// Format string can be either ObjC string (e.g. @"%d") or
// C string (e.g. "%d")
// ObjC string uses the same format specifiers as C string, so we can use
// the same format string checking logic for both ObjC and C strings.
bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall,
HasVAListArg, format_idx,
firstDataArg);
// For vprintf* functions (i.e., HasVAListArg==true), we add a
// special check to see if the format string is a function parameter
// of the function calling the printf function. If the function
// has an attribute indicating it is a printf-like function, then we
// should suppress warnings concerning non-literals being used in a call
// to a vprintf function. For example:
//
// void
// logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
// va_list ap;
// va_start(ap, fmt);
// vprintf(fmt, ap); // Do NOT emit a warning about "fmt".
// ...
//
//
// FIXME: We don't have full attribute support yet, so just check to see
// if the argument is a DeclRefExpr that references a parameter. We'll
// add proper support for checking the attribute later.
if (HasVAListArg)
Ted Kremenek
committed
if (const DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
if (isa<ParmVarDecl>(DR->getDecl()))
return;
Diag(TheCall->getArg(format_idx)->getLocStart(),
diag::warn_printf_not_string_constant)
Ted Kremenek
committed
<< OrigFormatExpr->getSourceRange();
return;
}
Ted Kremenek
committed
void Sema::CheckPrintfString(const StringLiteral *FExpr,
const Expr *OrigFormatExpr,
const CallExpr *TheCall, bool HasVAListArg,
unsigned format_idx, unsigned firstDataArg) {
const ObjCStringLiteral *ObjCFExpr =
dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
// CHECK: is the format string a wide literal?
if (FExpr->isWide()) {
Diag(FExpr->getLocStart(),
diag::warn_printf_format_string_is_wide_literal)
<< OrigFormatExpr->getSourceRange();
return;
}
// Str - The format string. NOTE: this is NOT null-terminated!
const char * const Str = FExpr->getStrData();
// CHECK: empty format string?
const unsigned StrLen = FExpr->getByteLength();
if (StrLen == 0) {
Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
<< OrigFormatExpr->getSourceRange();
return;
}
// We process the format string using a binary state machine. The
// current state is stored in CurrentState.
enum {
state_OrdChr,
state_Conversion
} CurrentState = state_OrdChr;
// numConversions - The number of conversions seen so far. This is
// incremented as we traverse the format string.
unsigned numConversions = 0;
// numDataArgs - The number of data arguments after the format
// string. This can only be determined for non vprintf-like
// functions. For those functions, this value is 1 (the sole
// va_arg argument).
unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg;
// Inspect the format string.
unsigned StrIdx = 0;
// LastConversionIdx - Index within the format string where we last saw
// a '%' character that starts a new format conversion.
unsigned LastConversionIdx = 0;
for (; StrIdx < StrLen; ++StrIdx) {
// Is the number of detected conversion conversions greater than
// the number of matching data arguments? If so, stop.
if (!HasVAListArg && numConversions > numDataArgs) break;
// Handle "\0"
if (Str[StrIdx] == '\0') {
// The string returned by getStrData() is not null-terminated,
// so the presence of a null character is likely an error.
Chris Lattner
committed
Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
diag::warn_printf_format_string_contains_null_char)
<< OrigFormatExpr->getSourceRange();
return;
}
// Ordinary characters (not processing a format conversion).
if (CurrentState == state_OrdChr) {
if (Str[StrIdx] == '%') {
CurrentState = state_Conversion;
LastConversionIdx = StrIdx;
}
continue;
}
// Seen '%'. Now processing a format conversion.
switch (Str[StrIdx]) {
// Handle dynamic precision or width specifier.
case '*': {
++numConversions;
if (!HasVAListArg && numConversions > numDataArgs) {
Chris Lattner
committed
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
if (Str[StrIdx-1] == '.')
Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
<< OrigFormatExpr->getSourceRange();
Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
<< OrigFormatExpr->getSourceRange();
// Don't do any more checking. We'll just emit spurious errors.
return;
// Perform type checking on width/precision specifier.
Ted Kremenek
committed
const Expr *E = TheCall->getArg(format_idx+numConversions);
if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
if (BT->getKind() == BuiltinType::Int)
break;
Chris Lattner
committed
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
if (Str[StrIdx-1] == '.')
Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
<< E->getType() << E->getSourceRange();
Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
<< E->getType() << E->getSourceRange();
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
break;
}
// Characters which can terminate a format conversion
// (e.g. "%d"). Characters that specify length modifiers or
// other flags are handled by the default case below.
//
// FIXME: additional checks will go into the following cases.
case 'i':
case 'd':
case 'o':
case 'u':
case 'x':
case 'X':
case 'D':
case 'O':
case 'U':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case 'a':
case 'A':
case 'c':
case 'C':
case 'S':
case 's':
case 'p':
++numConversions;
CurrentState = state_OrdChr;
break;
// CHECK: Are we using "%n"? Issue a warning.
case 'n': {
++numConversions;
CurrentState = state_OrdChr;
Chris Lattner
committed
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
LastConversionIdx);
Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
// Handle "%@"
case '@':
// %@ is allowed in ObjC format strings only.
if(ObjCFExpr != NULL)
CurrentState = state_OrdChr;
else {
// Issue a warning: invalid format conversion.
Chris Lattner
committed
SourceLocation Loc =
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx,
Str+std::min(LastConversionIdx+2, StrLen))
<< OrigFormatExpr->getSourceRange();
}
++numConversions;
break;
// Handle "%%"
case '%':
// Sanity check: Was the first "%" character the previous one?
// If not, we will assume that we have a malformed format
// conversion, and that the current "%" character is the start
// of a new conversion.
if (StrIdx - LastConversionIdx == 1)
CurrentState = state_OrdChr;
else {
// Issue a warning: invalid format conversion.
Chris Lattner
committed
SourceLocation Loc =
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx, Str+StrIdx)
<< OrigFormatExpr->getSourceRange();
// This conversion is broken. Advance to the next format
// conversion.
LastConversionIdx = StrIdx;
++numConversions;
default:
// This case catches all other characters: flags, widths, etc.
// We should eventually process those as well.
break;
}
}
if (CurrentState == state_Conversion) {
// Issue a warning: invalid format conversion.
Chris Lattner
committed
SourceLocation Loc =
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx,
Str+std::min(LastConversionIdx+2, StrLen))
<< OrigFormatExpr->getSourceRange();
return;
}
if (!HasVAListArg) {
// CHECK: Does the number of format conversions exceed the number
// of data arguments?
if (numConversions > numDataArgs) {
Chris Lattner
committed
SourceLocation Loc =
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_insufficient_data_args)
<< OrigFormatExpr->getSourceRange();
}
// CHECK: Does the number of data arguments exceed the number of
// format conversions in the format string?
else if (numConversions < numDataArgs)
Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
diag::warn_printf_too_many_data_args)
<< OrigFormatExpr->getSourceRange();
}
}
//===--- CHECK: Return Address of Stack Variable --------------------------===//
static DeclRefExpr* EvalVal(Expr *E);
static DeclRefExpr* EvalAddr(Expr* E);
/// CheckReturnStackAddr - Check if a return statement returns the address
/// of a stack variable.
void
Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
SourceLocation ReturnLoc) {
// Perform checking for returned stack addresses.
Steve Naroff
committed
if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
if (DeclRefExpr *DR = EvalAddr(RetValExp))
Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
<< DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
Steve Naroff
committed
// Skip over implicit cast expressions when checking for block expressions.
if (ImplicitCastExpr *IcExpr =
dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
RetValExp = IcExpr->getSubExpr();
if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
Diag(C->getLocStart(), diag::err_ret_local_block)
<< C->getSourceRange();
}
// Perform checking for stack values returned by reference.
else if (lhsType->isReferenceType()) {
// Check for a reference to the stack
if (DeclRefExpr *DR = EvalVal(RetValExp))
Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
<< DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
}
}
/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
/// check if the expression in a return statement evaluates to an address
/// to a location on the stack. The recursion is used to traverse the
/// AST of the return expression, with recursion backtracking when we
/// encounter a subexpression that (1) clearly does not lead to the address
/// of a stack variable or (2) is something we cannot determine leads to
/// the address of a stack variable based on such local checking.
///
/// EvalAddr processes expressions that are pointers that are used as
/// references (and not L-values). EvalVal handles all other values.
/// At the base case of the recursion is a check for a DeclRefExpr* in
/// the refers to a stack variable.
///
/// This implementation handles:
///
/// * pointer-to-pointer casts
/// * implicit conversions from array references to pointers
/// * taking the address of fields
/// * arbitrary interplay between "&" and "*" operators
/// * pointer arithmetic from an address of a stack variable
/// * taking the address of an array element where the array is on the stack
static DeclRefExpr* EvalAddr(Expr *E) {
// We should only be called for evaluating pointer expressions.
Steve Naroff
committed
assert((E->getType()->isPointerType() ||
E->getType()->isBlockPointerType() ||
E->getType()->isObjCQualifiedIdType()) &&
"EvalAddr only works on pointers");
// Our "symbolic interpreter" is just a dispatch off the currently
// viewed AST node. We then recursively traverse the AST by calling
// EvalAddr and EvalVal appropriately.
switch (E->getStmtClass()) {
case Stmt::ParenExprClass:
// Ignore parentheses.
return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
case Stmt::UnaryOperatorClass: {
// The only unary operator that make sense to handle here
// is AddrOf. All others don't make sense as pointers.
UnaryOperator *U = cast<UnaryOperator>(E);
if (U->getOpcode() == UnaryOperator::AddrOf)
return EvalVal(U->getSubExpr());
else
return NULL;
}
case Stmt::BinaryOperatorClass: {
// Handle pointer arithmetic. All other binary operators are not valid
// in this context.
BinaryOperator *B = cast<BinaryOperator>(E);
BinaryOperator::Opcode op = B->getOpcode();
if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
return NULL;
Expr *Base = B->getLHS();
Anders Carlsson
committed
// Determine which argument is the real pointer base. It could be
// the RHS argument instead of the LHS.
if (!Base->getType()->isPointerType()) Base = B->getRHS();
assert (Base->getType()->isPointerType());
return EvalAddr(Base);
}
// For conditional operators we need to see if either the LHS or RHS are
// valid DeclRefExpr*s. If one of them is valid, we return it.
case Stmt::ConditionalOperatorClass: {
ConditionalOperator *C = cast<ConditionalOperator>(E);
// Handle the GNU extension for missing LHS.
if (Expr *lhsExpr = C->getLHS())
if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
return LHS;
return EvalAddr(C->getRHS());
}
// For casts, we need to handle conversions from arrays to
// pointer values, and pointer-to-pointer conversions.
case Stmt::ImplicitCastExprClass:
case Stmt::CStyleCastExprClass:
case Stmt::CXXFunctionalCastExprClass: {
Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
Steve Naroff
committed
if (SubExpr->getType()->isPointerType() ||
SubExpr->getType()->isBlockPointerType() ||
SubExpr->getType()->isObjCQualifiedIdType())
return EvalAddr(SubExpr);
else if (T->isArrayType())
return EvalVal(SubExpr);
}
// C++ casts. For dynamic casts, static casts, and const casts, we
// are always converting from a pointer-to-pointer, so we just blow
// through the cast. In the case the dynamic cast doesn't fail (and
// return NULL), we take the conservative route and report cases
// where we return the address of a stack variable. For Reinterpre
// FIXME: The comment about is wrong; we're not always converting
// from pointer to pointer. I'm guessing that this code should also
// handle references to objects.
case Stmt::CXXStaticCastExprClass:
case Stmt::CXXDynamicCastExprClass:
case Stmt::CXXConstCastExprClass:
case Stmt::CXXReinterpretCastExprClass: {
Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
Steve Naroff
committed
if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
return EvalAddr(S);
else
return NULL;
}
// Everything else: we simply don't reason about them.
default:
return NULL;