From bde0a1632b36a4f47a378459d9b333a91ac3948c Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Wed, 9 Nov 2016 00:15:54 +0000 Subject: [PATCH] Added the ability to dump hex bytes easily into a raw_ostream. Unit tests were added to verify this functionality keeps working correctly. Example output for raw hex bytes: llvm::ArrayRef Bytes = ...; llvm::outs() << format_hex_bytes(Bytes); 554889e5 4881ec70 04000048 8d051002 00004c8d 05fd0100 004c8b0d d0020000 Example output for raw hex bytes with offsets: llvm::outs() << format_hex_bytes(Bytes, 0x100000d10); 0x0000000100000d10: 554889e5 4881ec70 04000048 8d051002 0x0000000100000d20: 00004c8d 05fd0100 004c8b0d d0020000 Example output for raw hex bytes with ASCII with offsets: llvm::outs() << format_hex_bytes_with_ascii(Bytes, 0x100000d10); 0x0000000100000d10: 554889e5 4881ec70 04000048 8d051002 |UH.?H.?p...H....| 0x0000000100000d20: 00004c8d 05fd0100 004c8b0d d0020000 |..L..?...L..?...| The default groups bytes into 4 byte groups, but this can be changed to 1 byte: llvm::outs() << format_hex_bytes(Bytes, 0x100000d10, 16 /*NumPerLine*/, 1 /*ByteGroupSize*/); 0x0000000100000d10: 55 48 89 e5 48 81 ec 70 04 00 00 48 8d 05 10 02 0x0000000100000d20: 00 00 4c 8d 05 fd 01 00 00 4c 8b 0d d0 02 00 00 llvm::outs() << format_hex_bytes(Bytes, 0x100000d10, 16 /*NumPerLine*/, 2 /*ByteGroupSize*/); 0x0000000100000d10: 5548 89e5 4881 ec70 0400 0048 8d05 1002 0x0000000100000d20: 0000 4c8d 05fd 0100 004c 8b0d d002 0000 llvm::outs() << format_hex_bytes(Bytes, 0x100000d10, 8 /*NumPerLine*/, 1 /*ByteGroupSize*/); 0x0000000100000d10: 55 48 89 e5 48 81 ec 70 0x0000000100000d18: 04 00 00 48 8d 05 10 02 0x0000000100000d20: 00 00 4c 8d 05 fd 01 00 0x0000000100000d28: 00 4c 8b 0d d0 02 00 00 https://reviews.llvm.org/D26405 llvm-svn: 286316 --- llvm/include/llvm/Support/Format.h | 34 +++++ llvm/include/llvm/Support/raw_ostream.h | 4 + llvm/lib/Support/raw_ostream.cpp | 55 +++++++ llvm/unittests/Support/raw_ostream_test.cpp | 152 ++++++++++++++++++++ 4 files changed, 245 insertions(+) diff --git a/llvm/include/llvm/Support/Format.h b/llvm/include/llvm/Support/Format.h index 026488cc70f8..7730c434d635 100644 --- a/llvm/include/llvm/Support/Format.h +++ b/llvm/include/llvm/Support/Format.h @@ -23,6 +23,7 @@ #ifndef LLVM_SUPPORT_FORMAT_H #define LLVM_SUPPORT_FORMAT_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" @@ -202,6 +203,39 @@ inline FormattedNumber format_decimal(int64_t N, unsigned Width) { return FormattedNumber(0, N, Width, false, false, false); } +class FormattedHexBytes { + llvm::ArrayRef Bytes; + // Display offsets for each line if FirstByteOffset has a value. + llvm::Optional FirstByteOffset; + uint32_t NumPerLine; // Number of bytes to show per line. + uint8_t ByteGroupSize; // How many hex bytes are grouped without spaces + bool Upper; // Show offset and hex bytes as upper case. + bool ASCII; // Show the ASCII bytes for the hex bytes to the right. + friend class raw_ostream; + +public: + FormattedHexBytes(llvm::ArrayRef B, llvm::Optional O, + uint32_t NPL, uint8_t BGS, bool U, bool A) + : Bytes(B), FirstByteOffset(O), NumPerLine(NPL), ByteGroupSize(BGS), + Upper(U), ASCII(A) {} +}; + +inline FormattedHexBytes format_hex_bytes( + llvm::ArrayRef Bytes, + llvm::Optional FirstByteOffset = llvm::Optional(), + uint32_t NumPerLine = 16, uint8_t ByteGroupSize = 4) { + return FormattedHexBytes(Bytes, FirstByteOffset, NumPerLine, ByteGroupSize, + false /*Upper*/, false /*ASCII*/); +} + +inline FormattedHexBytes format_hex_bytes_with_ascii( + llvm::ArrayRef Bytes, + llvm::Optional FirstByteOffset = llvm::Optional(), + uint32_t NumPerLine = 16, uint8_t ByteGroupSize = 4) { + return FormattedHexBytes(Bytes, FirstByteOffset, NumPerLine, ByteGroupSize, + false /*Upper*/, true /*ASCII*/); +} + } // end namespace llvm #endif diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index d1e96f892a4b..70c08f79b8d9 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -23,6 +23,7 @@ namespace llvm { class format_object_base; class FormattedString; class FormattedNumber; +class FormattedHexBytes; template class SmallVectorImpl; namespace sys { @@ -222,6 +223,9 @@ public: // Formatted output, see the formatHex() function in Support/Format.h. raw_ostream &operator<<(const FormattedNumber &); + // Formatted output, see the formatHex() function in Support/Format.h. + raw_ostream &operator<<(const FormattedHexBytes &); + /// indent - Insert 'NumSpaces' spaces. raw_ostream &indent(unsigned NumSpaces); diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index b6835e3ebfe7..42f121dbea1e 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -352,6 +352,61 @@ raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) { return *this; } +raw_ostream &raw_ostream::operator<<(const FormattedHexBytes &FB) { + size_t LineIndex = 0; + const size_t Size = FB.Bytes.size(); + HexPrintStyle OffsetStyle = + FB.Upper ? HexPrintStyle::PrefixUpper : HexPrintStyle::PrefixLower; + HexPrintStyle ByteStyle = + FB.Upper ? HexPrintStyle::Upper : HexPrintStyle::Lower; + while (LineIndex < Size) { + if (FB.FirstByteOffset.hasValue()) { + uint64_t Offset = FB.FirstByteOffset.getValue(); + llvm::write_hex(*this, Offset + LineIndex, OffsetStyle, + sizeof(Offset) * 2 + 2); + *this << ": "; + } + // Print the hex bytes for this line + uint32_t I = 0; + for (I = 0; I < FB.NumPerLine; ++I) { + size_t Index = LineIndex + I; + if (Index >= Size) + break; + if (I && (I % FB.ByteGroupSize) == 0) + *this << " "; + llvm::write_hex(*this, FB.Bytes[Index], ByteStyle, 2); + } + uint32_t BytesDisplayed = I; + if (FB.ASCII) { + // Print any spaces needed for any bytes that we didn't print on this + // line so that the ASCII bytes are correctly aligned. + for (; I < FB.NumPerLine; ++I) { + if (I && (I % FB.ByteGroupSize) == 0) + indent(3); + else + indent(2); + } + *this << " |"; + // Print the ASCII char values for each byte on this line + for (I = 0; I < FB.NumPerLine; ++I) { + size_t Index = LineIndex + I; + if (Index >= Size) + break; + char ch = (char)FB.Bytes[Index]; + if (isprint(ch)) + *this << ch; + else + *this << '.'; + } + *this << '|'; + } + LineIndex += BytesDisplayed; + if (LineIndex < Size) + *this << '\n'; + } + return *this; +} + /// indent - Insert 'NumSpaces' spaces. raw_ostream &raw_ostream::indent(unsigned NumSpaces) { static const char Spaces[] = " " diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp index ed6ddabe4634..5a62ad58e33b 100644 --- a/llvm/unittests/Support/raw_ostream_test.cpp +++ b/llvm/unittests/Support/raw_ostream_test.cpp @@ -181,5 +181,157 @@ TEST(raw_ostreamTest, FormatDecimal) { printToString(format_decimal(INT64_MIN, 21), 21)); } +std::string +format_hex_bytes(const void *P, size_t N, + llvm::Optional Offset = llvm::Optional(), + uint32_t NumPerLine = 16, uint8_t ByteGroupSize = 4) { + std::string S; + if (P && N) { + llvm::raw_string_ostream Str(S); + Str << llvm::format_hex_bytes( + llvm::ArrayRef(static_cast(P), N), Offset, + NumPerLine, ByteGroupSize); + Str.flush(); + } + return S; +} + +std::string format_hex_bytes_with_ascii( + const void *P, size_t N, + llvm::Optional Offset = llvm::Optional(), + uint32_t NumPerLine = 16, uint8_t ByteGroupSize = 4) { + std::string S; + if (P && N) { + llvm::raw_string_ostream Str(S); + Str << llvm::format_hex_bytes_with_ascii( + llvm::ArrayRef(static_cast(P), N), Offset, + NumPerLine, ByteGroupSize); + Str.flush(); + } + return S; +} + +TEST(raw_ostreamTest, FormattedHexBytes) { + char b[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', + 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', + 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}; + // Test raw bytes + const llvm::Optional InvalidOffset; + // Test invalid input. + EXPECT_EQ("", format_hex_bytes(nullptr, 0)); + EXPECT_EQ("", format_hex_bytes(b, 0)); + EXPECT_EQ("", format_hex_bytes(nullptr, 16)); + //---------------------------------------------------------------------- + // Test hex byte output with the default 4 byte groups + //---------------------------------------------------------------------- + EXPECT_EQ("61", format_hex_bytes(b, 1)); + EXPECT_EQ("61626364 65", format_hex_bytes(b, 5)); + // Test that 16 bytes get written to a line correctly. + EXPECT_EQ("61626364 65666768 696a6b6c 6d6e6f70", format_hex_bytes(b, 16)); + // Test raw bytes with default 16 bytes per line wrapping. + EXPECT_EQ("61626364 65666768 696a6b6c 6d6e6f70\n71", format_hex_bytes(b, 17)); + // Test raw bytes with 1 bytes per line wrapping. + EXPECT_EQ("61\n62\n63\n64\n65\n66", format_hex_bytes(b, 6, InvalidOffset, 1)); + // Test raw bytes with 7 bytes per line wrapping. + EXPECT_EQ("61626364 656667\n68696a6b 6c6d6e\n6f7071", + format_hex_bytes(b, 17, InvalidOffset, 7)); + // Test raw bytes with 8 bytes per line wrapping. + EXPECT_EQ("61626364 65666768\n696a6b6c 6d6e6f70\n71", + format_hex_bytes(b, 17, InvalidOffset, 8)); + //---------------------------------------------------------------------- + // Test hex byte output with the 1 byte groups + //---------------------------------------------------------------------- + EXPECT_EQ("61 62 63 64 65", format_hex_bytes(b, 5, InvalidOffset, 16, 1)); + // Test that 16 bytes get written to a line correctly. + EXPECT_EQ("61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70", + format_hex_bytes(b, 16, InvalidOffset, 16, 1)); + // Test raw bytes with default 16 bytes per line wrapping. + EXPECT_EQ("61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70\n71", + format_hex_bytes(b, 17, InvalidOffset, 16, 1)); + // Test raw bytes with 7 bytes per line wrapping. + EXPECT_EQ("61 62 63 64 65 66 67\n68 69 6a 6b 6c 6d 6e\n6f 70 71", + format_hex_bytes(b, 17, InvalidOffset, 7, 1)); + // Test raw bytes with 8 bytes per line wrapping. + EXPECT_EQ("61 62 63 64 65 66 67 68\n69 6a 6b 6c 6d 6e 6f 70\n71", + format_hex_bytes(b, 17, InvalidOffset, 8, 1)); + //---------------------------------------------------------------------- + // Test hex byte output with the 2 byte groups + //---------------------------------------------------------------------- + EXPECT_EQ("6162 6364 65", format_hex_bytes(b, 5, InvalidOffset, 16, 2)); + // Test that 16 bytes get written to a line correctly. + EXPECT_EQ("6162 6364 6566 6768 696a 6b6c 6d6e 6f70", + format_hex_bytes(b, 16, InvalidOffset, 16, 2)); + // Test raw bytes with default 16 bytes per line wrapping. + EXPECT_EQ("6162 6364 6566 6768 696a 6b6c 6d6e 6f70\n71", + format_hex_bytes(b, 17, InvalidOffset, 16, 2)); + // Test raw bytes with 7 bytes per line wrapping. + EXPECT_EQ("6162 6364 6566 67\n6869 6a6b 6c6d 6e\n6f70 71", + format_hex_bytes(b, 17, InvalidOffset, 7, 2)); + // Test raw bytes with 8 bytes per line wrapping. + EXPECT_EQ("6162 6364 6566 6768\n696a 6b6c 6d6e 6f70\n71", + format_hex_bytes(b, 17, InvalidOffset, 8, 2)); + + //---------------------------------------------------------------------- + // Test hex bytes with offset with the default 4 byte groups. + //---------------------------------------------------------------------- + EXPECT_EQ("0x0000000000000000: 61", format_hex_bytes(b, 1, 0x0)); + EXPECT_EQ("0x0000000000001000: 61", format_hex_bytes(b, 1, 0x1000)); + EXPECT_EQ("0x0000000000001000: 61\n0x0000000000001001: 62", + format_hex_bytes(b, 2, 0x1000, 1)); + //---------------------------------------------------------------------- + // Test hex bytes with ASCII with the default 4 byte groups. + //---------------------------------------------------------------------- + EXPECT_EQ("61626364 65666768 696a6b6c 6d6e6f70 |abcdefghijklmnop|", + format_hex_bytes_with_ascii(b, 16)); + EXPECT_EQ("61626364 65666768 |abcdefgh|\n" + "696a6b6c 6d6e6f70 |ijklmnop|", + format_hex_bytes_with_ascii(b, 16, InvalidOffset, 8)); + EXPECT_EQ("61626364 65666768 |abcdefgh|\n696a6b6c |ijkl|", + format_hex_bytes_with_ascii(b, 12, InvalidOffset, 8)); + char unprintable[] = {'a', '\x1e', 'b', '\x1f'}; + // Make sure the ASCII is still lined up correctly when fewer bytes than 16 + // bytes per line are available. The ASCII should still be aligned as if 16 + // bytes of hex might be displayed. + EXPECT_EQ("611e621f |a.b.|", + format_hex_bytes_with_ascii(unprintable, 4)); + //---------------------------------------------------------------------- + // Test hex bytes with ASCII with offsets with the default 4 byte groups. + //---------------------------------------------------------------------- + EXPECT_EQ("0x0000000000000000: 61626364 65666768 " + "696a6b6c 6d6e6f70 |abcdefghijklmnop|", + format_hex_bytes_with_ascii(b, 16, 0)); + EXPECT_EQ("0x0000000000000000: 61626364 65666768 |abcdefgh|\n" + "0x0000000000000008: 696a6b6c 6d6e6f70 |ijklmnop|", + format_hex_bytes_with_ascii(b, 16, 0, 8)); + EXPECT_EQ("0x0000000000000000: 61626364 656667 |abcdefg|\n" + "0x0000000000000007: 68696a6b 6c |hijkl|", + format_hex_bytes_with_ascii(b, 12, 0, 7)); + + //---------------------------------------------------------------------- + // Test hex bytes with ASCII with offsets with the default 2 byte groups. + //---------------------------------------------------------------------- + EXPECT_EQ("0x0000000000000000: 6162 6364 6566 6768 " + "696a 6b6c 6d6e 6f70 |abcdefghijklmnop|", + format_hex_bytes_with_ascii(b, 16, 0, 16, 2)); + EXPECT_EQ("0x0000000000000000: 6162 6364 6566 6768 |abcdefgh|\n" + "0x0000000000000008: 696a 6b6c 6d6e 6f70 |ijklmnop|", + format_hex_bytes_with_ascii(b, 16, 0, 8, 2)); + EXPECT_EQ("0x0000000000000000: 6162 6364 6566 67 |abcdefg|\n" + "0x0000000000000007: 6869 6a6b 6c |hijkl|", + format_hex_bytes_with_ascii(b, 12, 0, 7, 2)); + + //---------------------------------------------------------------------- + // Test hex bytes with ASCII with offsets with the default 1 byte groups. + //---------------------------------------------------------------------- + EXPECT_EQ("0x0000000000000000: 61 62 63 64 65 66 67 68 " + "69 6a 6b 6c 6d 6e 6f 70 |abcdefghijklmnop|", + format_hex_bytes_with_ascii(b, 16, 0, 16, 1)); + EXPECT_EQ("0x0000000000000000: 61 62 63 64 65 66 67 68 |abcdefgh|\n" + "0x0000000000000008: 69 6a 6b 6c 6d 6e 6f 70 |ijklmnop|", + format_hex_bytes_with_ascii(b, 16, 0, 8, 1)); + EXPECT_EQ("0x0000000000000000: 61 62 63 64 65 66 67 |abcdefg|\n" + "0x0000000000000007: 68 69 6a 6b 6c |hijkl|", + format_hex_bytes_with_ascii(b, 12, 0, 7, 1)); +} } -- GitLab