"llvm/lib/Target/git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "808dca12f8aac4f14784673bfcf252d0d31cefb4"
Newer
Older
//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Ted Kremenek and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements extra semantic analysis beyond what is enforced
// by the C type system.
//
//===----------------------------------------------------------------------===//
#include "Sema.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
using namespace clang;
/// CheckFunctionCall - Check a direct function call for various correctness
/// and safety properties not strictly enforced by the C type system.
void
Sema::CheckFunctionCall(Expr *Fn,
SourceLocation LParenLoc, SourceLocation RParenLoc,
FunctionDecl *FDecl,
Expr** Args, unsigned NumArgsInCall) {
// Get the IdentifierInfo* for the called function.
IdentifierInfo *FnInfo = FDecl->getIdentifier();
// Search the KnownFunctionIDs for the identifier.
unsigned i = 0, e = id_num_known_functions;
for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
if (i == e) return;
// Printf checking.
if (i <= id_vprintf) {
// Retrieve the index of the format string parameter and determine
// if the function is passed a va_arg argument.
unsigned format_idx = 0;
bool HasVAListArg = false;
switch (i) {
default: assert(false && "No format string argument index.");
case id_printf: format_idx = 0; break;
case id_fprintf: format_idx = 1; break;
case id_sprintf: format_idx = 1; break;
case id_snprintf: format_idx = 2; break;
case id_asprintf: format_idx = 1; HasVAListArg = true; break;
case id_vsnprintf: format_idx = 2; HasVAListArg = true; break;
case id_vasprintf: format_idx = 1; HasVAListArg = true; break;
case id_vfprintf: format_idx = 1; HasVAListArg = true; break;
case id_vsprintf: format_idx = 1; HasVAListArg = true; break;
case id_vprintf: format_idx = 0; HasVAListArg = true; break;
}
CheckPrintfArguments(Fn, LParenLoc, RParenLoc, HasVAListArg,
FDecl, format_idx, Args, NumArgsInCall);
}
}
/// CheckPrintfArguments - Check calls to printf (and similar functions) for
/// correct use of format strings.
///
/// HasVAListArg - A predicate indicating whether the printf-like
/// function is passed an explicit va_arg argument (e.g., vprintf)
///
/// format_idx - The index into Args for the format string.
///
/// Improper format strings to functions in the printf family can be
/// the source of bizarre bugs and very serious security holes. A
/// good source of information is available in the following paper
/// (which includes additional references):
///
/// FormatGuard: Automatic Protection From printf Format String
/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
///
/// Functionality implemented:
///
/// We can statically check the following properties for string
/// literal format strings for non v.*printf functions (where the
/// arguments are passed directly):
//
/// (1) Are the number of format conversions equal to the number of
/// data arguments?
///
/// (2) Does each format conversion correctly match the type of the
/// corresponding data argument? (TODO)
///
/// Moreover, for all printf functions we can:
///
/// (3) Check for a missing format string (when not caught by type checking).
///
/// (4) Check for no-operation flags; e.g. using "#" with format
/// conversion 'c' (TODO)
///
/// (5) Check the use of '%n', a major source of security holes.
///
/// (6) Check for malformed format conversions that don't specify anything.
///
/// (7) Check for empty format strings. e.g: printf("");
///
/// (8) Check that the format string is a wide literal.
///
/// All of these checks can be done by parsing the format string.
///
/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
void
Sema::CheckPrintfArguments(Expr *Fn,
SourceLocation LParenLoc, SourceLocation RParenLoc,
bool HasVAListArg, FunctionDecl *FDecl,
unsigned format_idx, Expr** Args,
unsigned NumArgsInCall) {
// CHECK: printf-like function is called with no format string.
if (format_idx >= NumArgsInCall) {
Diag(RParenLoc, diag::warn_printf_missing_format_string,
Fn->getSourceRange());
return;
}
// CHECK: format string is not a string literal.
//
// Dynamically generated format strings are difficult to
// automatically vet at compile time. Requiring that format strings
// are string literals: (1) permits the checking of format strings by
// the compiler and thereby (2) can practically remove the source of
// many format string exploits.
StringLiteral *FExpr = dyn_cast<StringLiteral>(Args[format_idx]);
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
if (FExpr == NULL) {
Diag(Args[format_idx]->getLocStart(),
diag::warn_printf_not_string_constant, Fn->getSourceRange());
return;
}
// CHECK: is the format string a wide literal?
if (FExpr->isWide()) {
Diag(Args[format_idx]->getLocStart(),
diag::warn_printf_format_string_is_wide_literal,
Fn->getSourceRange());
return;
}
// Str - The format string. NOTE: this is NOT null-terminated!
const char * const Str = FExpr->getStrData();
// CHECK: empty format string?
const unsigned StrLen = FExpr->getByteLength();
if (StrLen == 0) {
Diag(Args[format_idx]->getLocStart(),
diag::warn_printf_empty_format_string, Fn->getSourceRange());
return;
}
// We process the format string using a binary state machine. The
// current state is stored in CurrentState.
enum {
state_OrdChr,
state_Conversion
} CurrentState = state_OrdChr;
// numConversions - The number of conversions seen so far. This is
// incremented as we traverse the format string.
unsigned numConversions = 0;
// numDataArgs - The number of data arguments after the format
// string. This can only be determined for non vprintf-like
// functions. For those functions, this value is 1 (the sole
// va_arg argument).
unsigned numDataArgs = NumArgsInCall-(format_idx+1);
// Inspect the format string.
unsigned StrIdx = 0;
// LastConversionIdx - Index within the format string where we last saw
// a '%' character that starts a new format conversion.
unsigned LastConversionIdx = 0;
for ( ; StrIdx < StrLen ; ++StrIdx ) {
// Is the number of detected conversion conversions greater than
// the number of matching data arguments? If so, stop.
if (!HasVAListArg && numConversions > numDataArgs) break;
// Handle "\0"
if(Str[StrIdx] == '\0' ) {
// The string returned by getStrData() is not null-terminated,
// so the presence of a null character is likely an error.
SourceLocation Loc =
PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),StrIdx+1);
Diag(Loc, diag::warn_printf_format_string_contains_null_char,
Fn->getSourceRange());
return;
}
// Ordinary characters (not processing a format conversion).
if (CurrentState == state_OrdChr) {
if (Str[StrIdx] == '%') {
CurrentState = state_Conversion;
LastConversionIdx = StrIdx;
}
continue;
}
// Seen '%'. Now processing a format conversion.
switch (Str[StrIdx]) {
// Characters which can terminate a format conversion
// (e.g. "%d"). Characters that specify length modifiers or
// other flags are handled by the default case below.
//
// TODO: additional checks will go into the following cases.
case 'i':
case 'd':
case 'o':
case 'u':
case 'x':
case 'X':
case 'D':
case 'O':
case 'U':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
case 'a':
case 'A':
case 'c':
case 'C':
case 'S':
case 's':
case 'P':
++numConversions;
CurrentState = state_OrdChr;
break;
// CHECK: Are we using "%n"? Issue a warning.
case 'n': {
++numConversions;
CurrentState = state_OrdChr;
SourceLocation Loc =
PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
LastConversionIdx+1);
Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange());
break;
}
// Handle "%%"
case '%':
// Sanity check: Was the first "%" character the previous one?
// If not, we will assume that we have a malformed format
// conversion, and that the current "%" character is the start
// of a new conversion.
if (StrIdx - LastConversionIdx == 1)
CurrentState = state_OrdChr;
else {
// Issue a warning: invalid format conversion.
SourceLocation Loc =
PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
LastConversionIdx+1);
Diag(Loc, diag::warn_printf_invalid_conversion,
std::string(Str+LastConversionIdx, Str+StrIdx),
Fn->getSourceRange());
// This conversion is broken. Advance to the next format
// conversion.
LastConversionIdx = StrIdx;
++numConversions;
}
break;
default:
// This case catches all other characters: flags, widths, etc.
// We should eventually process those as well.
break;
}
}
if (CurrentState == state_Conversion) {
// Issue a warning: invalid format conversion.
SourceLocation Loc =
PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
LastConversionIdx+1);
Diag(Loc, diag::warn_printf_invalid_conversion,
std::string(Str+LastConversionIdx, Str+StrIdx),
Fn->getSourceRange());
return;
}
if (!HasVAListArg) {
// CHECK: Does the number of format conversions exceed the number
// of data arguments?
if (numConversions > numDataArgs) {
SourceLocation Loc =
PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
LastConversionIdx);
Diag(Loc, diag::warn_printf_insufficient_data_args,
Fn->getSourceRange());
}
// CHECK: Does the number of data arguments exceed the number of
// format conversions in the format string?
else if (numConversions < numDataArgs)
Diag(Args[format_idx+numConversions+1]->getLocStart(),
diag::warn_printf_too_many_data_args, Fn->getSourceRange());
}
}