From 87c43537df4bd83b26ebab877721ba24a9ee3685 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Fri, 8 Feb 2013 11:17:20 +0000 Subject: [PATCH] [sanitizer] Improve scanf interceptor This a rewrite of the scanf parser. The new implementation is pretty close to the spec, with a few shortcuts taken here and there. It is conservative, i.e. it gives up parsing if it does not understand some part of the format string, or runs into an ambiguous % spec. It does not handle some rarely used parts of the spec, like %n$ - for now. I'm also moving parser call to after the original *scanf function completes, so that we can find out the store size of %s directive by the use of strlen() on the target buffer. llvm-svn: 174704 --- .../sanitizer_common_interceptors.inc | 15 +- .../sanitizer_common_interceptors_scanf.inc | 356 +++++++++++------- .../tests/sanitizer_scanf_interceptor_test.cc | 30 +- 3 files changed, 261 insertions(+), 140 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 1d7d68a236b3..e016abdcc1d8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -150,8 +150,11 @@ INTERCEPTOR(int, prctl, int option, INTERCEPTOR(int, vscanf, const char *format, va_list ap) { // NOLINT void* ctx; COMMON_INTERCEPTOR_ENTER(ctx, vscanf, format, ap); - scanf_common(ctx, format, ap); + va_list aq; + va_copy(aq, ap); int res = REAL(vscanf)(format, ap); // NOLINT + scanf_common(ctx, format, aq); + va_end(aq); return res; } @@ -159,8 +162,11 @@ INTERCEPTOR(int, vsscanf, const char *str, const char *format, // NOLINT va_list ap) { void* ctx; COMMON_INTERCEPTOR_ENTER(ctx, vsscanf, str, format, ap); - scanf_common(ctx, format, ap); + va_list aq; + va_copy(aq, ap); int res = REAL(vsscanf)(str, format, ap); // NOLINT + scanf_common(ctx, format, aq); + va_end(aq); // FIXME: read of str return res; } @@ -169,8 +175,11 @@ INTERCEPTOR(int, vfscanf, void *stream, const char *format, // NOLINT va_list ap) { void* ctx; COMMON_INTERCEPTOR_ENTER(ctx, vfscanf, stream, format, ap); - scanf_common(ctx, format, ap); + va_list aq; + va_copy(aq, ap); int res = REAL(vfscanf)(stream, format, ap); // NOLINT + scanf_common(ctx, format, aq); + va_end(aq); return res; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc index f6433f9ed6e6..04c5543b9d31 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc @@ -8,90 +8,43 @@ //===----------------------------------------------------------------------===// // // Scanf implementation for use in *Sanitizer interceptors. +// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html +// with a few common GNU extensions. // //===----------------------------------------------------------------------===// #include #ifdef _WIN32 #define va_copy(dst, src) ((dst) = (src)) -#endif // _WIN32 +#endif // _WIN32 -struct ScanfSpec { - char c; - unsigned size; +struct ScanfDirective { + int argIdx; // argument index, or -1 of not specified ("%n$") + bool suppressed; // suppress assignment ("*") + int fieldWidth; + bool allocate; // allocate space ("m") + char lengthModifier[2]; + char convSpecifier; }; -// One-letter specs. -static const ScanfSpec scanf_specs[] = { - {'s', 1}, // FIXME: This is incorrect, we should check the actual number - // of bytes written to the string. - {'c', sizeof(char)}, - {'p', sizeof(void *)}, - {'e', sizeof(float)}, - {'E', sizeof(float)}, - {'a', sizeof(float)}, - {'f', sizeof(float)}, - {'g', sizeof(float)}, - {'d', sizeof(int)}, - {'i', sizeof(int)}, - {'o', sizeof(int)}, - {'u', sizeof(int)}, - {'x', sizeof(int)}, - {'X', sizeof(int)}, - {'n', sizeof(int)}, - {'t', sizeof(PTRDIFF_T)}, - {'z', sizeof(SIZE_T)}, - {'j', sizeof(INTMAX_T)}, - {'h', sizeof(short)} -}; - -static const unsigned scanf_specs_cnt = - sizeof(scanf_specs) / sizeof(scanf_specs[0]); - -// %ll?, %L?, %q? specs -static const ScanfSpec scanf_llspecs[] = { - {'e', sizeof(long double)}, - {'f', sizeof(long double)}, - {'g', sizeof(long double)}, - {'d', sizeof(long long)}, - {'i', sizeof(long long)}, - {'o', sizeof(long long)}, - {'u', sizeof(long long)}, - {'x', sizeof(long long)} -}; - -static const unsigned scanf_llspecs_cnt = - sizeof(scanf_llspecs) / sizeof(scanf_llspecs[0]); - -// %l? specs -static const ScanfSpec scanf_lspecs[] = { - {'e', sizeof(double)}, - {'f', sizeof(double)}, - {'g', sizeof(double)}, - {'d', sizeof(long)}, - {'i', sizeof(long)}, - {'o', sizeof(long)}, - {'u', sizeof(long)}, - {'x', sizeof(long)}, - {'X', sizeof(long)}, -}; - -static const unsigned scanf_lspecs_cnt = - sizeof(scanf_lspecs) / sizeof(scanf_lspecs[0]); - -static unsigned match_spec(const struct ScanfSpec *spec, unsigned n, char c) { - for (unsigned i = 0; i < n; ++i) - if (spec[i].c == c) - return spec[i].size; - return 0; +static const char *parse_number(const char *p, int *out) { + *out = internal_atoll(p); + while (*p >= '0' && *p <= '9') + ++p; + return p; } -static void scanf_common(void *ctx, const char *format, va_list ap_const) { - va_list aq; - va_copy(aq, ap_const); +static bool char_is_one_of(char c, const char *s) { + return !!internal_strchr(s, c); +} - const char *p = format; - unsigned size; +// Parse scanf format string. If a valid directive in encountered, it is +// returned in dir. This function returns the pointer to the first +// unprocessed character, or 0 in case of error. +// In case of the end-of-string, a pointer to the closing \0 is returned. +static const char *scanf_parse_next(const char *p, ScanfDirective *dir) { + internal_memset(dir, 0, sizeof(*dir)); + dir->argIdx = -1; while (*p) { if (*p != '%') { @@ -99,81 +52,224 @@ static void scanf_common(void *ctx, const char *format, va_list ap_const) { continue; } ++p; - if (*p == '*' || *p == '%' || *p == '\0') { - // FIXME: Bailing out for (p == "*") is wrong, we should parse the - // directive to the end. - if (*p != '\0') - ++p; + // %% + if (*p == '%') { + ++p; continue; } - - unsigned field_width = 0; - if (*p >= '0' && *p <= '9') { - field_width = internal_atoll(p); - while (*p >= '0' && *p <= '9') - p++; + if (*p == '\0') { + return 0; } - if (field_width > 0) { - // +1 for the \0 at the end. - if (*p == 's') - field_width++; - if (*p == 's' || *p == 'c') { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void*), field_width); - ++p; - continue; + // %n$ + if (*p >= '0' && *p <= '9') { + int number; + const char *q = parse_number(p, &number); + if (*q == '$') { + dir->argIdx = number; + p = q + 1; } + // Otherwise, do not change p. This will be re-parsed later as the field + // width. } - - if (*p == '[') { - // Search for the closing bracket. It is ignored if it goes right after - // the opening bracket or after ^. - p++; - if (*p == ']') { - p++; - } else if (*p == '^' && *(p+1) == ']') { - p += 2; - } - while (*p != ']') - p++; - // +1 for the \0 at the end. - field_width++; - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void*), field_width); - continue; + // * + if (*p == '*') { + dir->suppressed = true; + ++p; } - - if (*p == 'L' || *p == 'q') { + // Field width. + if (*p >= '0' && *p <= '9') { + p = parse_number(p, &dir->fieldWidth); + if (dir->fieldWidth <= 0) + return 0; + } + // m + if (*p == 'm') { + dir->allocate = true; ++p; - size = match_spec(scanf_llspecs, scanf_llspecs_cnt, *p); - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size); - continue; } - - if (*p == 'l') { + // Length modifier. + if (char_is_one_of(*p, "jztLq")) { + dir->lengthModifier[0] = *p; + ++p; + } else if (*p == 'h') { + dir->lengthModifier[0] = 'h'; + ++p; + if (*p == 'h') { + dir->lengthModifier[1] = 'h'; + ++p; + } + } else if (*p == 'l') { + dir->lengthModifier[0] = 'l'; ++p; if (*p == 'l') { + dir->lengthModifier[1] = 'l'; ++p; - size = match_spec(scanf_llspecs, scanf_llspecs_cnt, *p); - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size); - continue; - } else { - size = match_spec(scanf_lspecs, scanf_lspecs_cnt, *p); - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size); - continue; } } + // Conversion specifier. + dir->convSpecifier = *p++; + // Consume %[...] expression. + if (dir->convSpecifier == '[') { + if (*p == '^') + ++p; + if (*p == ']') + ++p; + while (*p && *p != ']') + ++p; + if (*p == 0) + return 0; // unexpected end of string + // Consume the closing ']'. + ++p; + } + break; + } + return p; +} + +// Returns true if the character is an integer conversion specifier. +static bool scanf_is_integer_conv(char c) { + return char_is_one_of(c, "diouxXn"); +} + +// Returns true if the character is an floating point conversion specifier. +static bool scanf_is_float_conv(char c) { + return char_is_one_of(c, "AeEfFgG"); + // NOTE: c == 'a' is ambiguous between POSIX and GNU and, therefore, + // unsupported. +} + +// Returns string output character size for string-like conversions, +// or 0 if the conversion is invalid. +static int scanf_get_char_size(ScanfDirective *dir) { + if (char_is_one_of(dir->convSpecifier, "CS")) { + // wchar_t + return 0; + } + + if (char_is_one_of(dir->convSpecifier, "cs[")) { + if (dir->lengthModifier[0] == 'l') + // wchar_t + return 0; + else if (dir->lengthModifier[0] == 0) + return sizeof(char); + else + return 0; + } + + return 0; +} + +enum ScanfStoreSize { + // Store size not known in advance; can be calculated as strlen() of the + // destination buffer. + SSS_STRLEN = -1, + // Invalid conversion specifier. + SSS_INVALID = 0 +}; + +// Returns the store size of a scanf directive (if >0), or a value of +// ScanfStoreSize. +static int scanf_get_store_size(ScanfDirective *dir) { + if (scanf_is_integer_conv(dir->convSpecifier)) { + switch (dir->lengthModifier[0]) { + case 'h': + return dir->lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); + case 'l': + return dir->lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); + case 'L': + return sizeof(long long); + case 'j': + return sizeof(INTMAX_T); + case 'z': + return sizeof(SIZE_T); + case 't': + return sizeof(PTRDIFF_T); + case 0: + return sizeof(int); + default: + return SSS_INVALID; + } + } - if (*p == 'h' && *(p + 1) == 'h') { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), sizeof(char)); - p += 2; - continue; + if (scanf_is_float_conv(dir->convSpecifier)) { + switch (dir->lengthModifier[0]) { + case 'L': + case 'q': + return sizeof(long double); + case 'l': + return dir->lengthModifier[1] == 'l' ? sizeof(long double) + : sizeof(double); + case 0: + return sizeof(float); + default: + return SSS_INVALID; } + } - size = match_spec(scanf_specs, scanf_specs_cnt, *p); - if (size) { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, va_arg(aq, void *), size); - ++p; + if (char_is_one_of(dir->convSpecifier, "sS[")) { + unsigned charSize = scanf_get_char_size(dir); + if (charSize == 0) + return SSS_INVALID; + if (dir->fieldWidth == 0) + return SSS_STRLEN; + return (dir->fieldWidth + 1) * charSize; + } + + if (char_is_one_of(dir->convSpecifier, "cC")) { + unsigned charSize = scanf_get_char_size(dir); + if (charSize == 0) + return SSS_INVALID; + if (dir->fieldWidth == 0) + return charSize; + return dir->fieldWidth * charSize; + } + + if (dir->convSpecifier == 'p') { + if (dir->lengthModifier[1] != 0) + return SSS_INVALID; + return sizeof(void *); + } + + return SSS_INVALID; +} + +// Common part of *scanf interceptors. +// Process format string and va_list, and report all store ranges. +static void scanf_common(void *ctx, const char *format, va_list ap_const) { + va_list aq; + va_copy(aq, ap_const); + + const char *p = format; + + while (p) { + ScanfDirective dir; + p = scanf_parse_next(p, &dir); + if (!p) + break; + if (dir.convSpecifier == 0) { + // This can only happen at the end of the format string. + CHECK_EQ(*p, 0); + break; + } + // Here the directive is valid. Do what it says. + if (dir.argIdx != -1) { + // Unsupported. + break; + } + if (dir.suppressed) + continue; + if (dir.allocate) { + // Unsupported; continue; } + + int size = scanf_get_store_size(&dir); + if (size == SSS_INVALID) + break; + void *p = va_arg(aq, void *); + if (size == SSS_STRLEN) { + size = internal_strlen((const char *)p) + 1; + } + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, size); } - va_end(aq); } diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc b/compiler-rt/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc index a1daa4ec4457..acf16c2718c3 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc @@ -31,14 +31,17 @@ static void testScanf2(void *ctx, const char *format, ...) { va_end(ap); } +static const char scanf_buf[] = "Test string."; +static size_t scanf_buf_size = sizeof(scanf_buf); + static void testScanf(const char *format, unsigned n, ...) { std::vector scanf_sizes; // 16 args should be enough. testScanf2((void *)&scanf_sizes, format, - (void*)0, (void*)0, (void*)0, (void*)0, - (void*)0, (void*)0, (void*)0, (void*)0, - (void*)0, (void*)0, (void*)0, (void*)0, - (void*)0, (void*)0, (void*)0, (void*)0); + scanf_buf, scanf_buf, scanf_buf, scanf_buf, + scanf_buf, scanf_buf, scanf_buf, scanf_buf, + scanf_buf, scanf_buf, scanf_buf, scanf_buf, + scanf_buf, scanf_buf, scanf_buf, scanf_buf); ASSERT_EQ(n, scanf_sizes.size()) << "Unexpected number of format arguments: '" << format << "'"; va_list ap; @@ -86,11 +89,24 @@ TEST(SanitizerCommonInterceptors, Scanf) { testScanf("%*d", 0); testScanf("%4d%8f%c", 3, I, F, C); - testScanf("%s%d", 2, 1, I); - testScanf("%[abc]", 1, 1); + testScanf("%s%d", 2, scanf_buf_size, I); + testScanf("%[abc]", 1, scanf_buf_size); testScanf("%4[bcdef]", 1, 5); - testScanf("%[]]", 1, 1); + testScanf("%[]]", 1, scanf_buf_size); testScanf("%8[^]%d0-9-]%c", 2, 9, C); testScanf("%*[^:]%n:%d:%1[ ]%n", 4, I, I, 2, I); + + testScanf("%*d%u", 1, I); + + // Unsupported stuff. + testScanf("%a[", 0); + testScanf("%as", 0); + testScanf("%aS", 0); + testScanf("%a13S", 0); + testScanf("%alS", 0); + + testScanf("%5$d", 0); + testScanf("%md", 0); + testScanf("%m10s", 0); } -- GitLab