diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h index 61cb31bd312ce2d626cc733e9a411e5c11f7ea6e..6b0b6261e4c7cad2aa480880a4418b1cb2a13d26 100644 --- a/clang/include/clang/Frontend/PrecompiledPreamble.h +++ b/clang/include/clang/Frontend/PrecompiledPreamble.h @@ -36,21 +36,6 @@ class CompilerInvocation; class DeclGroupRef; class PCHContainerOperations; -/// A size of the preamble and a flag required by -/// PreprocessorOptions::PrecompiledPreambleBytes. -struct PreambleBounds { - PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine) - : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} - - /// \brief Size of the preamble in bytes. - unsigned Size; - /// \brief Whether the preamble ends at the start of a new line. - /// - /// Used to inform the lexer as to whether it's starting at the beginning of - /// a line after skipping the preamble. - bool PreambleEndsAtStartOfLine; -}; - /// \brief Runs lexer to compute suggested preamble bounds. PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts, llvm::MemoryBuffer *Buffer, diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index aa8bf3891ed966a9dac2fc66be88f0c2f43f8180..603ce10f6450f1eea2ea507a1df3d9cf520ada5b 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -39,6 +39,23 @@ enum ConflictMarkerKind { CMK_Perforce }; +/// Describes the bounds (start, size) of the preamble and a flag required by +/// PreprocessorOptions::PrecompiledPreambleBytes. +/// The preamble includes the BOM, if any. +struct PreambleBounds { + PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine) + : Size(Size), + PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} + + /// \brief Size of the preamble in bytes. + unsigned Size; + /// \brief Whether the preamble ends at the start of a new line. + /// + /// Used to inform the lexer as to whether it's starting at the beginning of + /// a line after skipping the preamble. + bool PreambleEndsAtStartOfLine; +}; + /// Lexer - This provides a simple interface that turns a text buffer into a /// stream of tokens. This provides no support for file reading or buffering, /// or buffering/seeking of tokens, only forward lexing is supported. It relies @@ -443,11 +460,11 @@ public: /// to fewer than this number of lines. /// /// \returns The offset into the file where the preamble ends and the rest - /// of the file begins along with a boolean value indicating whether + /// of the file begins along with a boolean value indicating whether /// the preamble ends at the beginning of a new line. - static std::pair ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines = 0); + static PreambleBounds ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines = 0); /// \brief Checks that the given token is the first token that occurs after /// the given location (this excludes comments and whitespace). Returns the @@ -618,7 +635,7 @@ private: //===--------------------------------------------------------------------===// // Other lexer functions. - void SkipBytes(unsigned Bytes, bool StartOfLine); + void SetByteOffset(unsigned Offset, bool StartOfLine); void PropagateLineStartLeadingSpaceInfo(Token &Result); diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index d91c665cf1dd590c850c9a9dea35540e3603b500..760b308f9240ef105a9475cd302d394712dfc316 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -160,7 +160,7 @@ public: DisablePCHValidation(false), AllowPCHWithCompilerErrors(false), DumpDeserializedPCHDecls(false), - PrecompiledPreambleBytes(0, true), + PrecompiledPreambleBytes(0, false), GeneratePreamble(false), RemappedFilesKeepOriginalName(true), RetainRemappedFileBuffers(false), @@ -195,7 +195,7 @@ public: LexEditorPlaceholders = true; RetainRemappedFileBuffers = true; PrecompiledPreambleBytes.first = 0; - PrecompiledPreambleBytes.second = 0; + PrecompiledPreambleBytes.second = false; } }; diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 3e3483d2c6be9ccf0771ff2b63e8afedd13ec144..86460f17d0f85ffe1ac14d6f8e36f8e9c238ecb3 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -591,7 +591,7 @@ void PrintPreambleAction::ExecuteAction() { auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile()); if (Buffer) { unsigned Preamble = - Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first; + Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).Size; llvm::outs().write((*Buffer)->getBufferStart(), Preamble); } } diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp index cd7446189cc273611cb33c50fcd2931a38aa2a31..bd6770acdff5bbe85138b8ee74d51c4a14163759 100644 --- a/clang/lib/Frontend/PrecompiledPreamble.cpp +++ b/clang/lib/Frontend/PrecompiledPreamble.cpp @@ -195,8 +195,7 @@ template bool moveOnNoError(llvm::ErrorOr Val, T &Output) { PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts, llvm::MemoryBuffer *Buffer, unsigned MaxLines) { - auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines); - return PreambleBounds(Pre.first, Pre.second); + return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines); } llvm::ErrorOr PrecompiledPreamble::Build( diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 928c24d94c3fe66999d7dc8c9088b5a6b12ebc0b..b7f97a583dbb01b5d5dec25ac55b8e08b2b7609c 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -552,9 +552,9 @@ namespace { } // end anonymous namespace -std::pair Lexer::ComputePreamble(StringRef Buffer, - const LangOptions &LangOpts, - unsigned MaxLines) { +PreambleBounds Lexer::ComputePreamble(StringRef Buffer, + const LangOptions &LangOpts, + unsigned MaxLines) { // Create a lexer starting at the beginning of the file. Note that we use a // "fake" file source location at offset 1 so that the lexer will track our // position within the file. @@ -688,7 +688,7 @@ std::pair Lexer::ComputePreamble(StringRef Buffer, else End = TheTok.getLocation(); - return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(), + return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(), TheTok.isAtStartOfLine()); } @@ -1394,9 +1394,9 @@ Slash: // Helper methods for lexing. //===----------------------------------------------------------------------===// -/// \brief Routine that indiscriminately skips bytes in the source file. -void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) { - BufferPtr += Bytes; +/// \brief Routine that indiscriminately sets the offset into the source file. +void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) { + BufferPtr = BufferStart + Offset; if (BufferPtr > BufferEnd) BufferPtr = BufferEnd; // FIXME: What exactly does the StartOfLine bit mean? There are two diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index e1294994df5cb09bd713537d49f92a8506132b7b..1f9a469bc5985ee07d0024a21393363392e2dddf 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -516,9 +516,9 @@ void Preprocessor::EnterMainSourceFile() { // If we've been asked to skip bytes in the main file (e.g., as part of a // precompiled preamble), do so now. if (SkipMainFilePreamble.first > 0) - CurLexer->SkipBytes(SkipMainFilePreamble.first, - SkipMainFilePreamble.second); - + CurLexer->SetByteOffset(SkipMainFilePreamble.first, + SkipMainFilePreamble.second); + // Tell the header info that the main file was entered. If the file is later // #imported, it won't be re-entered. if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) diff --git a/clang/unittests/Frontend/PCHPreambleTest.cpp b/clang/unittests/Frontend/PCHPreambleTest.cpp index a771167cc76f16de59dc19df04dcf6da7ed6e4e6..162a281b04d49ece34976cf4fa0d84579e59ed07 100644 --- a/clang/unittests/Frontend/PCHPreambleTest.cpp +++ b/clang/unittests/Frontend/PCHPreambleTest.cpp @@ -153,4 +153,48 @@ TEST_F(PCHPreambleTest, ReparseWithOverriddenFileDoesNotInvalidatePreamble) { ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2)); } +TEST_F(PCHPreambleTest, ParseWithBom) { + std::string Header = "//./header.h"; + std::string Main = "//./main.cpp"; + AddFile(Header, "int random() { return 4; }"); + AddFile(Main, + "\xef\xbb\xbf" + "#include \"//./header.h\"\n" + "int main() { return random() -2; }"); + + std::unique_ptr AST(ParseAST(Main)); + ASSERT_TRUE(AST.get()); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + unsigned HeaderReadCount = GetFileReadCount(Header); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + // Check preamble PCH was really reused + ASSERT_EQ(HeaderReadCount, GetFileReadCount(Header)); + + // Remove BOM + RemapFile(Main, + "#include \"//./header.h\"\n" + "int main() { return random() -2; }"); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + ASSERT_LE(HeaderReadCount, GetFileReadCount(Header)); + HeaderReadCount = GetFileReadCount(Header); + + // Add BOM back + RemapFile(Main, + "\xef\xbb\xbf" + "#include \"//./header.h\"\n" + "int main() { return random() -2; }"); + + ASSERT_TRUE(ReparseAST(AST)); + ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred()); + + ASSERT_LE(HeaderReadCount, GetFileReadCount(Header)); +} + } // anonymous namespace