diff --git a/CMakeLists.txt b/CMakeLists.txt index 92e85c1d7..596204c87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,10 +45,22 @@ FetchContent_Declare( GIT_REPOSITORY https://github.com/libuv/libuv.git GIT_TAG v1.x ) -FetchContent_MakeAvailable(tomlplusplus libuv) + +message(STATUS "Fetching lz4...") +# Disable building the lz4 command line executable +set(LZ4_BUILD_CLI OFF) +FetchContent_Declare( + lz4 + GIT_REPOSITORY https://github.com/lz4/lz4.git + GIT_TAG v1.10.0 + SOURCE_SUBDIR build/cmake +) + +FetchContent_MakeAvailable(tomlplusplus libuv lz4) message(STATUS "Found tomlplusplus: ${tomlplusplus_SOURCE_DIR}") message(STATUS "Found libuv: ${libuv_SOURCE_DIR}") +message(STATUS "Found lz4: ${lz4_SOURCE_DIR}") if(WIN32) set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL") @@ -147,7 +159,7 @@ target_compile_options(clice-core PUBLIC ${CLICE_CXX_FLAGS}) target_link_options(clice-core PUBLIC ${CLICE_LINKER_FLAGS}) target_include_directories(clice-core PUBLIC "${CMAKE_SOURCE_DIR}/include") -target_link_libraries(clice-core PUBLIC uv_a tomlplusplus::tomlplusplus llvm-libs) +target_link_libraries(clice-core PUBLIC uv_a tomlplusplus::tomlplusplus llvm-libs lz4_static) # clice executable add_executable(clice "${CMAKE_SOURCE_DIR}/src/Driver/clice.cc") diff --git a/include/Support/Compression.h b/include/Support/Compression.h new file mode 100644 index 000000000..0d3bcebe5 --- /dev/null +++ b/include/Support/Compression.h @@ -0,0 +1,46 @@ +#pragma once + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/Frontend/CompilerInstance.h" +#include + +namespace llvm { +class MemoryBuffer; +} + +namespace clice { + +/** + * @brief Compresses a pre-compiled file (PCH/PCM) using LZ4 and removes the original file. + * + * The compressed file has a custom format: an 8-byte header for the + * original size (uint64_t), followed by the LZ4 compressed data. + * + * @param path The path to the pre-compiled file to compress. + */ +void compressPreCompiledFile(std::string path); +/** + * @brief Compresses a file using LZ4 and saves it to an output path. + * + * The compressed file has a custom format: an 8-byte header for the + * original size (uint64_t), followed by the LZ4 compressed data. + * + * @param inputPath The path to the file to compress. + * @param outputPath The path where the compressed file will be saved. + * @return true on success, false on failure (e.g., input file not found). + */ +bool compressToFile(std::string inputPath, std::string outputPath); +/** + * @brief Decompresses a file compressed with `compressToFile`. + * + * The function reads the custom format: an 8-byte header for the + * original size (uint64_t), followed by the LZ4 compressed data. + * + * @param path The path to the compressed file. + * @return A unique pointer to a MemoryBuffer containing the decompressed data, + * or nullptr on failure (e.g., file not found or decompression error). + */ +std::unique_ptr decompressFile(std::string path); + +} // namespace clice diff --git a/src/Compiler/Compilation.cpp b/src/Compiler/Compilation.cpp index bf6da8fda..0c0c70eca 100644 --- a/src/Compiler/Compilation.cpp +++ b/src/Compiler/Compilation.cpp @@ -5,6 +5,9 @@ #include "clang/Lex/PreprocessorOptions.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Frontend/MultiplexConsumer.h" +#include "Support/Compression.h" +#include "Support/FileSystem.h" +#include "Support/Logger.h" namespace clice { @@ -118,7 +121,30 @@ auto create_invocation(CompilationParams& params, params.buffers.clear(); auto [pch, bound] = params.pch; - pp_opts.ImplicitPCHInclude = std::move(pch); + if(pch.ends_with(".lz4")) { + // When a compressed PCH/PCM is found, we decompress it into a memory buffer. + // To allow clang to read this in-memory PCH/PCM as if it were a file on disk, + // we create a virtual file system (VFS). This VFS overlays the real file system. + if(auto buffer = decompressFile(pch)) { + std::string pch_path = pch; + pch_path.resize(pch_path.size() - 4); + + auto imfs = llvm::makeIntrusiveRefCnt(); + imfs->addFile(pch_path, 0, std::move(buffer)); + + auto overlay = llvm::makeIntrusiveRefCnt(params.vfs); + overlay->pushOverlay(imfs); + + params.vfs = std::move(overlay); + + pp_opts.ImplicitPCHInclude = std::move(pch_path); + } else { + log::warn("Failed to decompress PCH '{}', proceeding without it.", pch); + } + } else { + pp_opts.ImplicitPCHInclude = std::move(pch); + } + if(bound != 0) { pp_opts.PrecompiledPreambleBytes = {bound, false}; } diff --git a/src/Server/Document.cpp b/src/Server/Document.cpp index b0821e235..f73815b2d 100644 --- a/src/Server/Document.cpp +++ b/src/Server/Document.cpp @@ -1,4 +1,5 @@ #include "Support/Logger.h" +#include "Support/Compression.h" #include "Server/Server.h" #include "Compiler/Compilation.h" #include "Feature/Diagnostic.h" @@ -226,6 +227,8 @@ async::Task build_pch_task(CompilationDatabase::LookupInfo& info, log::warn("{}", diagnostic.message); } co_return false; + } else { + compressPreCompiledFile(pch.path); } log::info("Building PCH successfully for {}", path); @@ -311,6 +314,10 @@ async::Task<> Server::build_ast(std::string path, std::string content) { params.arguments = database.get_command(path, options).arguments; params.add_remapped_file(path, content); params.pch = {pch->path, pch->preamble.size()}; + // if compressed PCH exists, use it + if(fs::exists(params.pch.first + ".lz4")) { + params.pch.first = params.pch.first + ".lz4"; + } file->diagnostics->clear(); params.diagnostics = file->diagnostics; diff --git a/src/Support/Compression.cpp b/src/Support/Compression.cpp new file mode 100644 index 000000000..45a852046 --- /dev/null +++ b/src/Support/Compression.cpp @@ -0,0 +1,96 @@ +#include "Support/Compression.h" +#include "llvm/Support/MemoryBuffer.h" +#include "lz4.h" +#include +#include "Support/FileSystem.h" +#include "Support/Logger.h" + +namespace clice { + +void compressPreCompiledFile(std::string path) { + if(!fs::exists(path)) { + log::warn("PreCompiledFile does not exist: {}", path); + } else if(!compressToFile(path, path + ".lz4")) { + log::warn("Fail to compress PreCompiledFile: {}", path); + } else { + if(auto ec = fs::remove(path)) { + log::warn("Fail to remove original PreCompiledFile: {}. Reason: {}", + path, + ec.message()); + } + } +} + +bool compressToFile(std::string inputPath, std::string outputPath) { + auto content = fs::read(inputPath); + if(!content) { + return false; + } + uint64_t originalSize = content->size(); + int maxCompressedSize = LZ4_compressBound(originalSize); + std::vector compressed(maxCompressedSize + sizeof(uint64_t)); + + // Our custom format: Prepend the 8-byte original size as a header + // before the compressed data. + memcpy(compressed.data(), &originalSize, sizeof(uint64_t)); + + int compressedDataSize = LZ4_compress_default(content->data(), + compressed.data() + sizeof(uint64_t), + originalSize, + maxCompressedSize); + if(compressedDataSize <= 0) { + return false; + } + + size_t totalSize = compressedDataSize + sizeof(uint64_t); + + llvm::StringRef dataToWrite(compressed.data(), totalSize); + auto result = fs::write(outputPath, dataToWrite); + return result.has_value(); +} + +std::unique_ptr decompressFile(std::string path) { + auto content = fs::read(path); + if(!content) { + log::warn("Fail to read compressed file: {}", path); + return nullptr; + } + + if(content->size() < sizeof(uint64_t)) { + log::warn("Fail to read compressed file: {}", path); + return nullptr; // Not enough data for size header + } + + // Our custom format: Prepend the 8-byte original size as a header + // before the compressed data. + uint64_t originalSize; + memcpy(&originalSize, content->data(), sizeof(uint64_t)); + + const char* compressedData = content->data() + sizeof(uint64_t); + size_t compressedDataSize = content->size() - sizeof(uint64_t); + + auto buffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(originalSize); + if(!buffer) { + log::warn("Fail to allocate memory buffer for decompression: {}", path); + return nullptr; + } + + int decompressedSize = LZ4_decompress_safe(compressedData, + buffer->getBufferStart(), + compressedDataSize, + originalSize); + + if(decompressedSize < 0) { // LZ4 returns negative on error + log::warn("Fail to decompress file: {}", path); + return nullptr; + } + + if(static_cast(decompressedSize) != originalSize) { + // This case should ideally not happen if the stored size is correct + return nullptr; + } + + return buffer; +} + +} // namespace clice diff --git a/tests/unit/Compiler/Module.cpp b/tests/unit/Compiler/Module.cpp index d57e29556..3dcf98de4 100644 --- a/tests/unit/Compiler/Module.cpp +++ b/tests/unit/Compiler/Module.cpp @@ -1,5 +1,6 @@ #include "Test/Test.h" #include "Compiler/Compilation.h" +#include "Support/Compression.h" #include "llvm/Support/ToolOutputFile.h" namespace clice::testing { @@ -29,6 +30,8 @@ auto buildPCM = [](llvm::StringRef file, llvm::StringRef code) { if(!compile(params, pcm)) { llvm::errs() << "Failed to build PCM\n"; std::abort(); + } else { + compressPreCompiledFile(pcm.path); } return pcm; diff --git a/xmake.lua b/xmake.lua index 4be6171d3..1337ebb38 100644 --- a/xmake.lua +++ b/xmake.lua @@ -41,6 +41,7 @@ end add_requires(libuv_require, "toml++") add_requires("llvm", {system = false}) +add_requires("lz4") add_rules("mode.release", "mode.debug", "mode.releasedbg") set_languages("c++23") @@ -51,7 +52,7 @@ target("clice-core") add_files("src/**.cpp|Driver/*.cpp") add_includedirs("include", {public = true}) - add_packages("libuv", "toml++", {public = true}) + add_packages("libuv", "toml++", "lz4", {public = true}) if is_mode("debug") then add_packages("llvm", {