diff --git a/.github/workflows/generate-docs.yml b/.github/workflows/generate-docs.yml index dbc01b8..621cf6f 100644 --- a/.github/workflows/generate-docs.yml +++ b/.github/workflows/generate-docs.yml @@ -8,7 +8,7 @@ on: jobs: generate: - runs-on: windows-2025 + runs-on: windows-latest permissions: contents: write @@ -32,8 +32,41 @@ jobs: restore-keys: | Windows-sccache- + - name: Restore configure artifacts cache + id: restore-configure-cache + uses: actions/cache/restore@v5 + with: + path: | + build/RelWithDebInfo/.llvm + build/RelWithDebInfo/include + build/RelWithDebInfo/_deps + build/RelWithDebInfo/*.tar.* + key: >- + Windows-configure-RelWithDebInfo-${{ + hashFiles('pixi.lock', 'pixi.toml', '**/CMakeLists.txt', 'cmake/**/*.cmake') + }}-${{ github.sha }} + restore-keys: | + Windows-configure-RelWithDebInfo-${{ hashFiles('pixi.lock', 'pixi.toml', '**/CMakeLists.txt', 'cmake/**/*.cmake') }}- + + - name: Start sccache + run: sccache --start-server + + - name: Configure + run: pixi run cmake-config + + - name: Save configure artifacts cache + if: success() + uses: actions/cache/save@v5 + with: + path: | + build/RelWithDebInfo/.llvm + build/RelWithDebInfo/include + build/RelWithDebInfo/_deps + build/RelWithDebInfo/*.tar.* + key: ${{ steps.restore-configure-cache.outputs.cache-primary-key }} + - name: Build - run: pixi run build + run: pixi run cmake-build - name: Generate documentation run: | @@ -53,7 +86,3 @@ jobs: name: clore-docs path: clore-docs.zip retention-days: 30 - - - name: Stop sccache - if: always() - run: pixi run -- sccache --stop-server 2>$null diff --git a/CMakeLists.txt b/CMakeLists.txt index e9e39ea..7489ca1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,9 +43,7 @@ endif() set(CLORE_ENABLE_IPO OFF) set(CLORE_USE_LTO_ARTIFACT OFF) if(CLORE_ENABLE_LTO) - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - message(STATUS "Interprocedural optimization disabled for Debug builds.") - else() + if(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") check_ipo_supported(RESULT CLORE_IPO_SUPPORTED OUTPUT CLORE_IPO_ERROR LANGUAGES C CXX) if(CLORE_IPO_SUPPORTED) set(CLORE_ENABLE_IPO ON) @@ -54,6 +52,8 @@ if(CLORE_ENABLE_LTO) else() message(WARNING "LTO requested but IPO is not supported by the active toolchain: ${CLORE_IPO_ERROR}") endif() + else() + message(STATUS "Interprocedural optimization disabled for non-RelWithDebInfo builds.") endif() endif() diff --git a/cmake/toolchain.cmake b/cmake/toolchain.cmake index f237c37..16bc455 100644 --- a/cmake/toolchain.cmake +++ b/cmake/toolchain.cmake @@ -3,6 +3,13 @@ cmake_minimum_required(VERSION 3.30) set(CMAKE_C_COMPILER clang CACHE STRING "") set(CMAKE_CXX_COMPILER clang++ CACHE STRING "") +find_program(CLANG_SCAN_DEPS_PATH NAMES "clang-scan-deps" NO_CACHE) +if(CLANG_SCAN_DEPS_PATH) + set(CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS "${CLANG_SCAN_DEPS_PATH}" CACHE FILEPATH "") +else() + unset(CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS CACHE) +endif() + find_program(LLVM_AR_PATH "llvm-ar") if(LLVM_AR_PATH) set(CMAKE_AR "${LLVM_AR_PATH}" CACHE FILEPATH "") diff --git a/pixi.lock b/pixi.lock index 45e192c..b3d7243 100644 --- a/pixi.lock +++ b/pixi.lock @@ -15,6 +15,9 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/ccache-4.13.2-hedf47ba_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/clang-20-20.1.8-default_h99862b1_14.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/clang-20.1.8-default_cfg_hcbb2b3e_14.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/clang-format-20-20.1.8-default_h99862b1_14.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/clang-format-20.1.8-default_h99862b1_14.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/clang-tools-20.1.8-default_h57a47db_14.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/clang_impl_linux-64-20.1.8-default_cfg_h027053c_14.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/clangxx-20.1.8-default_cfg_hcbb2b3e_14.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/clangxx_impl_linux-64-20.1.8-default_cfg_h027053c_14.conda @@ -31,6 +34,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.22.2-ha1258a1_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45.1-default_hbd61a6d_102.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp20.1-20.1.8-default_h99862b1_14.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libclang13-22.1.3-default_h746c552_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.19.0-hcf29cc6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda @@ -43,6 +47,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.3.0-h5888daf_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h3b78370_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libllvm20-20.1.8-hf7376ad_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libllvm22-22.1.3-hf7376ad_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb03c661_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.68.1-h877daf1_0.conda @@ -81,6 +86,9 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/cctools_impl_osx-arm64-1030.6.3-llvm20_1_hd60c58f_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-20-20.1.8-default_hf3020a7_14.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-20.1.8-default_cfg_hb78b91e_14.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-format-20-20.1.8-default_hf3020a7_14.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-format-20.1.8-default_hf3020a7_14.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-tools-20.1.8-default_h1589341_14.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang_impl_osx-arm64-20.1.8-default_cfg_h6e044b8_14.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clangxx-20.1.8-default_cfg_h170a469_14.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clangxx_impl_osx-arm64-20.1.8-default_cfg_h6e044b8_14.conda @@ -92,6 +100,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ld64-956.6-llvm20_1_hb625feb_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ld64_osx-arm64-956.6-llvm20_1_h4e43e91_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libclang-cpp20.1-20.1.8-default_hf3020a7_14.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libclang13-22.1.3-default_h13b06bd_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcurl-8.19.0-hd5a2499_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-22.1.3-h55c6f16_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-devel-20.1.8-h6dc3340_3.conda @@ -103,6 +112,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libhiredis-1.3.0-h286801f_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libiconv-1.18-h23cfdf5_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libllvm20-20.1.8-h8e0c9ce_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libllvm22-22.1.3-h89af1be_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblzma-5.8.2-h8088a28_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libmpdec-4.0.0-h84a0fba_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libnghttp2-1.68.1-h8f3e76b_0.conda @@ -135,11 +145,14 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-h4c7d964_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/clang-20-20.1.8-default_hac490eb_14.conda - conda: https://conda.anaconda.org/conda-forge/win-64/clang-20.1.8-default_nocfg_hbb9487a_14.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/clang-format-20.1.8-default_hac490eb_14.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/clang-tools-20.1.8-default_hac490eb_14.conda - conda: https://conda.anaconda.org/conda-forge/win-64/clangxx-20.1.8-default_nocfg_hbb9487a_14.conda - conda: https://conda.anaconda.org/conda-forge/win-64/cmake-4.3.1-hdcbee5b_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/compiler-rt-20.1.8-h49e36cd_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/compiler-rt_win-64-20.1.8-h49e36cd_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/krb5-1.22.2-h0ea6238_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libclang13-22.1.3-default_ha2db4b5_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libcurl-8.19.0-h8206538_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.5-hac47afa_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h3d046cb_0.conda @@ -498,6 +511,20 @@ packages: license_family: Apache size: 73446163 timestamp: 1773141809598 +- conda: https://conda.anaconda.org/conda-forge/linux-64/clang-format-20.1.8-default_h99862b1_14.conda + sha256: b58148bb200231f21a0a78e90456414be7de8dd4fc115ebdf6e9d808d151e9e6 + md5: de9baf19d75c9ca7b4e88f5609db0347 + depends: + - __glibc >=2.17,<3.0.a0 + - clang-format-20 20.1.8 default_h99862b1_14 + - libclang-cpp20.1 >=20.1.8,<20.2.0a0 + - libgcc >=14 + - libllvm20 >=20.1.8,<20.2.0a0 + - libstdcxx >=14 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 71009 + timestamp: 1773111214926 - conda: https://conda.anaconda.org/conda-forge/linux-64/clang-format-21.1.7-default_h99862b1_4.conda sha256: 6a0ae82f085f647a6661b4423297bbb0778e179abe3c8e5dfeb2b52b9b048900 md5: 79eb9cc7bbadb979dd5da6b75fde93d2 @@ -512,6 +539,19 @@ packages: license_family: Apache size: 28255 timestamp: 1767756297733 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-format-20.1.8-default_hf3020a7_14.conda + sha256: 0967d59e8a89c5f9c1cfa3579403ee3295f5770c01b308c9abb434df47acc5db + md5: 86639340249e4d8fa1b37ab92e581d26 + depends: + - __osx >=11.0 + - clang-format-20 20.1.8 default_hf3020a7_14 + - libclang-cpp20.1 >=20.1.8,<20.2.0a0 + - libcxx >=20.1.8 + - libllvm20 >=20.1.8,<20.2.0a0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 71355 + timestamp: 1773110589745 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-format-21.1.7-default_hf3020a7_4.conda sha256: 39f1610a494cc82c8e7646adce5b07a3b586a637acaa1dcf2a997735d4bfba63 md5: 1b3503d3f22b783f5cba16b45c530bd8 @@ -525,6 +565,17 @@ packages: license_family: Apache size: 28477 timestamp: 1767751449926 +- conda: https://conda.anaconda.org/conda-forge/win-64/clang-format-20.1.8-default_hac490eb_14.conda + sha256: 7c657222c2b310387e11a316bf835910181c04ed54f2e35e28885cc5423c8496 + md5: 188355e2c07643a0232b9b817c230a36 + depends: + - ucrt >=10.0.20348.0 + - vc >=14.3,<15 + - vc14_runtime >=14.44.35208 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 1265666 + timestamp: 1773142030144 - conda: https://conda.anaconda.org/conda-forge/win-64/clang-format-21.1.7-default_hac490eb_4.conda sha256: 610a7a72f8c7488f1ae18ac42ad7b93571cb1761089b5bb9ad8056080ba6df0c md5: 598309abac91358ff9dea03061168aad @@ -536,6 +587,31 @@ packages: license_family: Apache size: 1234944 timestamp: 1767759665492 +- conda: https://conda.anaconda.org/conda-forge/linux-64/clang-format-20-20.1.8-default_h99862b1_14.conda + sha256: 17b84830ca43b9db271b50d8474a4231204321016f9ff5c2c690dddeb0d66f28 + md5: 55910d2902458004d9c4e443648c5841 + depends: + - __glibc >=2.17,<3.0.a0 + - libclang-cpp20.1 >=20.1.8,<20.2.0a0 + - libgcc >=14 + - libllvm20 >=20.1.8,<20.2.0a0 + - libstdcxx >=14 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 114295 + timestamp: 1773111143555 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-format-20-20.1.8-default_hf3020a7_14.conda + sha256: 52f77009e2f69db348c10cb20371e0b1bf6f49e2e91e026d5fb93addc6526ef6 + md5: 2250c216f6da8f1f875f2b6a794d5e67 + depends: + - __osx >=11.0 + - libclang-cpp20.1 >=20.1.8,<20.2.0a0 + - libcxx >=20.1.8 + - libllvm20 >=20.1.8,<20.2.0a0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 108365 + timestamp: 1773110420001 - conda: https://conda.anaconda.org/conda-forge/linux-64/clang-format-21-21.1.7-default_h99862b1_4.conda sha256: 9b8ee49ea6a90bdf76e3f3ead9be8d0e2555df68b3e5464120ddaa5132869aad md5: e1af1ab949fdae74788be7ce8356f054 @@ -561,6 +637,62 @@ packages: license_family: Apache size: 65348 timestamp: 1767751325883 +- conda: https://conda.anaconda.org/conda-forge/linux-64/clang-tools-20.1.8-default_h57a47db_14.conda + sha256: 565bbd489160501c5374fe346dca83a6ec4dac23aecc5bc48c276fe47ca2c263 + md5: 897e3de8e6310eb23d01004907520513 + depends: + - __glibc >=2.17,<3.0.a0 + - clang-format 20.1.8 default_h99862b1_14 + - libclang-cpp20.1 >=20.1.8,<20.2.0a0 + - libclang13 >=20.1.8 + - libgcc >=14 + - libllvm20 >=20.1.8,<20.2.0a0 + - libstdcxx >=14 + - libxml2 + - libxml2-16 >=2.14.6 + constrains: + - clangdev 20.1.8 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 21897132 + timestamp: 1773111341498 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/clang-tools-20.1.8-default_h1589341_14.conda + sha256: 372c9e722fd99e42838528d33148ca1fa7dd84a02cf06ac7d291657f657e7026 + md5: 8a25b0558245abf052a89a38eb8dbb58 + depends: + - __osx >=11.0 + - clang-format 20.1.8 default_hf3020a7_14 + - libclang-cpp20.1 >=20.1.8,<20.2.0a0 + - libclang13 >=20.1.8 + - libcxx >=20.1.8 + - libllvm20 >=20.1.8,<20.2.0a0 + - libxml2 + - libxml2-16 >=2.14.6 + constrains: + - clangdev 20.1.8 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 14941399 + timestamp: 1773110809290 +- conda: https://conda.anaconda.org/conda-forge/win-64/clang-tools-20.1.8-default_hac490eb_14.conda + sha256: 25b5962d5e46c01edd9e91b0092a1cbe840276cfce6e0ccd8752ff9d21deb0fb + md5: 7f081578424e00ff4724ded6228f1be8 + depends: + - clang-format 20.1.8 default_hac490eb_14 + - libclang13 >=20.1.8 + - libxml2 + - libxml2-16 >=2.14.6 + - libzlib >=1.3.1,<2.0a0 + - ucrt >=10.0.20348.0 + - vc >=14.3,<15 + - vc14_runtime >=14.44.35208 + - zstd >=1.5.7,<1.6.0a0 + constrains: + - clangdev 20.1.8 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 319341053 + timestamp: 1773142869937 - conda: https://conda.anaconda.org/conda-forge/linux-64/clang_impl_linux-64-20.1.8-default_cfg_h027053c_14.conda sha256: 8df1db031e9d34f86040cf49595bd1debf3fbe2b9c6a5386148e269412faafbd md5: 47da9203dde46c589423f45fbf85e2a8 @@ -1054,6 +1186,42 @@ packages: license_family: Apache size: 13683674 timestamp: 1771032261657 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libclang13-22.1.3-default_h746c552_0.conda + sha256: 485de0c70865eb489d819defea714187c84502e3c50a511173d62135b8cef12f + md5: 9b47a4cd3aabb73201a2b8ed9f127189 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libllvm22 >=22.1.3,<22.2.0a0 + - libstdcxx >=14 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 12822776 + timestamp: 1775789745068 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libclang13-22.1.3-default_h13b06bd_0.conda + sha256: d71aceabf5d3cf23a70379da09dede74f8d68aab1baf34b172659b57cfea9523 + md5: f3f542c978ae2216e7b9ca11f8dfd4fc + depends: + - __osx >=11.0 + - libcxx >=22.1.3 + - libllvm22 >=22.1.3,<22.2.0a0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 8936113 + timestamp: 1775793501872 +- conda: https://conda.anaconda.org/conda-forge/win-64/libclang13-22.1.3-default_ha2db4b5_0.conda + sha256: 78243c98e6cbf86f901012f78a305356fadd960c046c661229184d621b2ff7e7 + md5: deb5befa374fcbc9ec2534c8467b0a6b + depends: + - libzlib >=1.3.2,<2.0a0 + - ucrt >=10.0.20348.0 + - vc >=14.3,<15 + - vc14_runtime >=14.44.35208 + - zstd >=1.5.7,<1.6.0a0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 30490578 + timestamp: 1775788007988 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.19.0-hcf29cc6_0.conda sha256: a0390fd0536ebcd2244e243f5f00ab8e76ab62ed9aa214cd54470fe7496620f4 md5: d50608c443a30c341c24277d28290f76 @@ -1393,6 +1561,35 @@ packages: license_family: Apache size: 29398498 timestamp: 1765924904821 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libllvm22-22.1.3-hf7376ad_0.conda + sha256: ad732019e8dd963efb5a54b5ff49168f191246bc418c3033762b6e8cb64b530c + md5: aeb186f7165bf287495a267fa8ff4129 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libstdcxx >=14 + - libxml2 + - libxml2-16 >=2.14.6 + - libzlib >=1.3.2,<2.0a0 + - zstd >=1.5.7,<1.6.0a0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 44235531 + timestamp: 1775641389057 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libllvm22-22.1.3-h89af1be_0.conda + sha256: 009519933ac584e828c32adeb963f236f912ab66aa688ecf9b723921001ae691 + md5: 34579e09a78af20b408bd9dda75084bb + depends: + - __osx >=11.0 + - libcxx >=19 + - libxml2 + - libxml2-16 >=2.14.6 + - libzlib >=1.3.2,<2.0a0 + - zstd >=1.5.7,<1.6.0a0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 30043021 + timestamp: 1775645036351 - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda sha256: 755c55ebab181d678c12e49cced893598f2bab22d582fbbf4d8b83c18be207eb md5: c7c83eecbb72d88b940c249af56c8b17 diff --git a/pixi.toml b/pixi.toml index 87cb332..428cf9c 100644 --- a/pixi.toml +++ b/pixi.toml @@ -23,6 +23,7 @@ cmake = ">=3.30" ninja = "*" clang = "==20.1.8" clangxx = "==20.1.8" +clang-tools = "==20.1.8" lld = "==20.1.8" llvm-tools = "==20.1.8" compiler-rt = "==20.1.8" @@ -60,7 +61,7 @@ cmake -B build/{{ type }} -G Ninja \ [feature.build.tasks.cmake-build] args = [{ arg = "type", default = "RelWithDebInfo" }] -cmd = "cmake --build build/{{ type }}" +cmd = "cmake --build build/{{ type }} --parallel" [feature.build.tasks.build] args = [{ arg = "type", default = "RelWithDebInfo" }] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5fd2c44..918e967 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,12 +1,26 @@ -file(GLOB_RECURSE CLORE_CORE_SOURCES CONFIGURE_DEPENDS - "${CMAKE_CURRENT_SOURCE_DIR}/config/*.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/extract/*.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/generate/*.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/support/*.cpp" +file(GLOB_RECURSE CLORE_MODULE_SOURCES CONFIGURE_DEPENDS + "${CMAKE_CURRENT_SOURCE_DIR}/config/*.cppm" + "${CMAKE_CURRENT_SOURCE_DIR}/extract/*.cppm" + "${CMAKE_CURRENT_SOURCE_DIR}/generate/*.cppm" + "${CMAKE_CURRENT_SOURCE_DIR}/support/*.cppm" ) -add_library(clore-core STATIC ${CLORE_CORE_SOURCES}) +# ── report clang-scan-deps configuration for C++20 module compilation ─── +if(CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS) + message(STATUS "Found clang-scan-deps: ${CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS}") +else() + message(STATUS "clang-scan-deps not found; will rely on CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS cache") +endif() + +add_library(clore-core STATIC) add_library(clore::core ALIAS clore-core) +set_property(TARGET clore-core PROPERTY CXX_SCAN_FOR_MODULES ON) + +target_sources(clore-core + PUBLIC + FILE_SET CXX_MODULES FILES + ${CLORE_MODULE_SOURCES} +) target_include_directories(clore-core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" @@ -22,6 +36,7 @@ target_link_libraries(clore-core PUBLIC ) add_executable(clore "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") +set_property(TARGET clore PROPERTY CXX_SCAN_FOR_MODULES ON) target_link_libraries(clore PRIVATE clore::core eventide::deco) install(TARGETS clore RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) @@ -50,6 +65,7 @@ if(CLORE_ENABLE_TEST) "${PROJECT_SOURCE_DIR}/tests/unit" ) target_link_libraries(unit_tests PRIVATE clore::core eventide::zest eventide::deco) + set_property(TARGET unit_tests PROPERTY CXX_SCAN_FOR_MODULES ON) endif() if(CLORE_ENABLE_IPO) diff --git a/src/config/config.cppm b/src/config/config.cppm new file mode 100644 index 0000000..daaccc4 --- /dev/null +++ b/src/config/config.cppm @@ -0,0 +1,6 @@ +export module config; + +export import :schema; +export import :load; +export import :normalize; +export import :validate; diff --git a/src/config/config.h b/src/config/config.h deleted file mode 100644 index ab2a5ce..0000000 --- a/src/config/config.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once - -#include "config/load.h" -#include "config/normalize.h" -#include "config/schema.h" -#include "config/validate.h" diff --git a/src/config/load.cpp b/src/config/load.cppm similarity index 87% rename from src/config/load.cpp rename to src/config/load.cppm index f599b0f..5ebb45e 100644 --- a/src/config/load.cpp +++ b/src/config/load.cppm @@ -1,12 +1,34 @@ -#include "config/load.h" +module; +#include #include +#include #include #include +#include +#include #include "eventide/serde/toml/toml.h" #include -#include "support/logging.h" + +export module config:load; + +import :schema; +import support; + +export namespace clore::config { + +struct ConfigError { + std::string message; +}; + +auto load_config(std::string_view path) -> std::expected; + +auto load_config_from_string(std::string_view toml_content) -> std::expected; + +} // namespace clore::config + +// ── implementation ────────────────────────────────────────────────── namespace clore::config { diff --git a/src/config/load.h b/src/config/load.h deleted file mode 100644 index 582b9ef..0000000 --- a/src/config/load.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "config/schema.h" - -namespace clore::config { - -struct ConfigError { - std::string message; -}; - -auto load_config(std::string_view path) -> std::expected; - -auto load_config_from_string(std::string_view toml_content) -> std::expected; - -} // namespace clore::config diff --git a/src/config/normalize.cpp b/src/config/normalize.cppm similarity index 84% rename from src/config/normalize.cpp rename to src/config/normalize.cppm index 7773cb5..60a47fa 100644 --- a/src/config/normalize.cpp +++ b/src/config/normalize.cppm @@ -1,15 +1,32 @@ -#include "config/normalize.h" +module; +#include #include #include +#include +#include + +export module config:normalize; + +import :schema; + +export namespace clore::config { + +struct NormalizeError { + std::string message; +}; + +auto normalize(TaskConfig& config) -> std::expected; + +} // namespace clore::config + +// ── implementation ────────────────────────────────────────────────── namespace clore::config { auto normalize(TaskConfig& config) -> std::expected { namespace fs = std::filesystem; - // Reject empty required path fields before any filesystem operations. - // fs::absolute("") silently resolves to cwd, which would bypass validation. auto make_absolute = [](std::string& path, std::string_view field, const std::optional& base = std::nullopt) @@ -62,7 +79,6 @@ auto normalize(TaskConfig& config) -> std::expected { return r; } - // Normalize path separators to forward slashes. auto normalize_separators = [](std::string& path) { for(auto& c : path) { if(c == '\\') { diff --git a/src/config/normalize.h b/src/config/normalize.h deleted file mode 100644 index 4bfb840..0000000 --- a/src/config/normalize.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include -#include - -#include "config/schema.h" - -namespace clore::config { - -struct NormalizeError { - std::string message; -}; - -auto normalize(TaskConfig& config) -> std::expected; - -} // namespace clore::config diff --git a/src/config/schema.h b/src/config/schema.cppm similarity index 94% rename from src/config/schema.h rename to src/config/schema.cppm index bbedac6..927ee8b 100644 --- a/src/config/schema.h +++ b/src/config/schema.cppm @@ -1,11 +1,13 @@ -#pragma once +module; #include #include #include #include -namespace clore::config { +export module config:schema; + +export namespace clore::config { struct FrontmatterField { std::string key; diff --git a/src/config/validate.cpp b/src/config/validate.cppm similarity index 84% rename from src/config/validate.cpp rename to src/config/validate.cppm index 7322857..8443077 100644 --- a/src/config/validate.cpp +++ b/src/config/validate.cppm @@ -1,14 +1,31 @@ -#include "config/validate.h" +module; +#include #include #include +#include + +export module config:validate; + +import :schema; + +export namespace clore::config { + +struct ValidationError { + std::string message; +}; + +auto validate(const TaskConfig& config) -> std::expected; + +} // namespace clore::config + +// ── implementation ────────────────────────────────────────────────── namespace clore::config { auto validate(const TaskConfig& config) -> std::expected { namespace fs = std::filesystem; - // compile_commands_path: required, must exist, must be a regular file if(config.compile_commands_path.empty()) { return std::unexpected(ValidationError{ .message = "compile_commands_path is required"}); @@ -24,7 +41,6 @@ auto validate(const TaskConfig& config) -> std::expected config.compile_commands_path)}); } - // project_root: required, must exist, must be a directory if(config.project_root.empty()) { return std::unexpected(ValidationError{ .message = "project_root is required"}); @@ -38,8 +54,6 @@ auto validate(const TaskConfig& config) -> std::expected .message = std::format("project_root is not a directory: {}", config.project_root)}); } - // output_root: required. If it already exists it must be a directory; it is not - // created here — the caller is responsible for creating it before extraction. if(config.output_root.empty()) { return std::unexpected(ValidationError{ .message = "output_root is required"}); diff --git a/src/config/validate.h b/src/config/validate.h deleted file mode 100644 index e8e24e4..0000000 --- a/src/config/validate.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include -#include - -#include "config/schema.h" - -namespace clore::config { - -struct ValidationError { - std::string message; -}; - -auto validate(const TaskConfig& config) -> std::expected; - -} // namespace clore::config diff --git a/src/extract/ast.cpp b/src/extract/ast.cppm similarity index 91% rename from src/extract/ast.cpp rename to src/extract/ast.cppm index 230cc1a..abf6c4f 100644 --- a/src/extract/ast.cpp +++ b/src/extract/ast.cppm @@ -1,8 +1,13 @@ -#include "extract/ast.h" +module; #include +#include +#include #include +#include +#include #include +#include #include "clang/AST/ASTConsumer.h" #include "clang/AST/Comment.h" @@ -16,8 +21,37 @@ #include "clang/Index/USRGeneration.h" #include "llvm/Support/Error.h" -#include "extract/tooling.h" -#include "support/logging.h" +export module extract:ast; + +import :compdb; +import :model; +import :symbol; +import :tooling; +import support; + +export namespace clore::extract { + +struct ASTError { + std::string message; +}; + +struct ExtractedRelation { + SymbolID from; + SymbolID to; + bool is_call; ///< true = call edge, false = reference edge +}; + +struct ASTResult { + std::vector symbols; + std::vector relations; +}; + +auto extract_symbols(const CompileEntry& entry, std::uint32_t max_snippet_bytes) + -> std::expected; + +} // namespace clore::extract + +// ── implementation ────────────────────────────────────────────────── namespace clore::extract { @@ -116,7 +150,6 @@ struct RelationEdge { RelationKind kind; }; -/// Produce a hash for de-duplicating (from, to, kind) triples. auto edge_hash(SymbolID from, SymbolID to, RelationKind kind) -> std::uint64_t { auto h = std::hash{}(from.hash); h ^= std::hash{}(to.hash) + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); @@ -132,11 +165,7 @@ class SymbolExtractorVisitor : public clang::RecursiveASTVisitor enclosing_stack; - - /// De-duplicate edges per (from, to, kind) triple within this TU. std::unordered_set seen_edges; SymbolExtractorVisitor(clang::ASTContext& ctx, std::vector& syms, @@ -155,13 +184,11 @@ class SymbolExtractorVisitor : public clang::RecursiveASTVisitorgetLocation(); if(loc.isInvalid()) return true; - // Only process declarations from the main file or its includes if(sm.isInSystemHeader(loc)) return true; auto kind = classify_decl(decl); if(kind == SymbolKind::Unknown) return true; - // Skip anonymous declarations if(decl->getDeclName().isEmpty()) return true; auto id = compute_symbol_id(decl); @@ -174,12 +201,10 @@ class SymbolExtractorVisitor : public clang::RecursiveASTVisitorgetQualifiedNameAsString(); info.declaration_location = make_source_location(sm, decl->getLocation()); - // Try to get definition location if(auto* func = llvm::dyn_cast(decl)) { if(func->isThisDeclarationADefinition()) { info.definition_location = make_source_location(sm, func->getLocation()); } - // Get signature std::string sig; llvm::raw_string_ostream os(sig); func->print(os, context.getPrintingPolicy()); @@ -187,7 +212,6 @@ class SymbolExtractorVisitor : public clang::RecursiveASTVisitor(decl)) { if(record->isThisDeclarationADefinition()) { info.definition_location = make_source_location(sm, record->getLocation()); - // Collect base classes for(auto& base : record->bases()) { auto* base_type = base.getType()->getAsCXXRecordDecl(); if(base_type) { @@ -204,16 +228,10 @@ class SymbolExtractorVisitor : public clang::RecursiveASTVisitorgetAccess()); - - // Doc comment info.doc_comment = get_doc_comment(context, decl); - - // Source snippet info.source_snippet = get_source_snippet(context, decl, max_snippet_bytes); - // Parent (for methods -> class, fields -> struct, etc.) auto* parent_ctx = decl->getDeclContext(); if(auto* parent_decl = llvm::dyn_cast_or_null( clang::Decl::castFromDeclContext(parent_ctx))) { @@ -223,7 +241,6 @@ class SymbolExtractorVisitor : public clang::RecursiveASTVisitor(decl)) { info.is_template = true; auto* params = tmpl->getTemplateParameters(); @@ -246,8 +263,6 @@ class SymbolExtractorVisitor : public clang::RecursiveASTVisitor(expr->getDecl()); if(!referenced) return true; - // Skip function calls — those are captured by VisitCallExpr. if(llvm::isa(referenced)) return true; auto ref_id = compute_symbol_id(referenced); @@ -326,7 +336,6 @@ class SymbolExtractorVisitor : public clang::RecursiveASTVisitorgetMemberDecl(); if(!member) return true; - // Skip method calls — captured by VisitCallExpr via CXXMemberCallExpr. if(llvm::isa(member)) return true; auto member_id = compute_symbol_id(member); @@ -414,7 +423,6 @@ auto extract_symbols(const CompileEntry& entry, std::uint32_t max_snippet_bytes) action.EndSourceFile(); - // Convert internal RelationEdge to public ExtractedRelation. result.relations.reserve(raw_relations.size()); for(auto& edge : raw_relations) { result.relations.push_back(ExtractedRelation{ diff --git a/src/extract/ast.h b/src/extract/ast.h deleted file mode 100644 index 687c8d4..0000000 --- a/src/extract/ast.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "extract/compdb.h" -#include "extract/model.h" - -namespace clore::extract { - -struct ASTError { - std::string message; -}; - -struct ExtractedRelation { - SymbolID from; - SymbolID to; - bool is_call; ///< true = call edge, false = reference edge -}; - -struct ASTResult { - std::vector symbols; - std::vector relations; -}; - -/// Extract all named symbols and their call/reference relations from `entry`, -/// capturing at most `max_snippet_bytes` of raw source text per symbol. -auto extract_symbols(const CompileEntry& entry, std::uint32_t max_snippet_bytes) - -> std::expected; - -} // namespace clore::extract diff --git a/src/extract/compdb.cpp b/src/extract/compdb.cpp deleted file mode 100644 index a6b2a7e..0000000 --- a/src/extract/compdb.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include "extract/compdb.h" - -#include - -#include "clang/Tooling/CompilationDatabase.h" -#include "clang/Tooling/JSONCompilationDatabase.h" -#include "llvm/Support/MemoryBuffer.h" - -#include "support/logging.h" - -namespace clore::extract { - -auto load_compdb(std::string_view path) -> std::expected { - namespace fs = std::filesystem; - - auto compdb_path = fs::path(path); - if(!fs::exists(compdb_path)) { - return std::unexpected(CompDbError{ - .message = std::format("compile_commands.json not found: {}", path)}); - } - - std::string error_message; - auto json_db = clang::tooling::JSONCompilationDatabase::loadFromFile( - std::string(path), error_message, clang::tooling::JSONCommandLineSyntax::AutoDetect); - - if(!json_db) { - return std::unexpected(CompDbError{ - .message = std::format("failed to load compile_commands.json: {}", error_message)}); - } - - CompilationDatabase db; - for(auto& cmd : json_db->getAllCompileCommands()) { - CompileEntry entry; - entry.file = cmd.Filename; - entry.directory = cmd.Directory; - entry.arguments.reserve(cmd.CommandLine.size()); - for(auto& arg : cmd.CommandLine) { - entry.arguments.push_back(arg); - } - db.entries.push_back(std::move(entry)); - } - - logging::info("loaded {} compile commands from {}", db.entries.size(), path); - return db; -} - -auto lookup(const CompilationDatabase& db, std::string_view file) - -> std::vector { - std::vector results; - - namespace fs = std::filesystem; - auto target = fs::path(file).lexically_normal(); - - for(auto& entry : db.entries) { - auto entry_path = fs::path(entry.file).lexically_normal(); - if(entry_path == target) { - results.push_back(&entry); - } - } - return results; -} - -} // namespace clore::extract diff --git a/src/extract/compdb.cppm b/src/extract/compdb.cppm new file mode 100644 index 0000000..a9cfc50 --- /dev/null +++ b/src/extract/compdb.cppm @@ -0,0 +1,141 @@ +module; + +#include +#include +#include +#include +#include +#include +#include + +#include "clang/Tooling/CompilationDatabase.h" +#include "clang/Tooling/JSONCompilationDatabase.h" +#include "llvm/Support/MemoryBuffer.h" + +export module extract:compdb; + +import support; + +export namespace clore::extract { + +struct CompileEntry { + std::string file; + std::string directory; + std::vector arguments; +}; + +struct CompilationDatabase { + std::vector entries; +}; + +struct CompDbError { + std::string message; +}; + +auto load_compdb(std::string_view path) -> std::expected; + +auto lookup(const CompilationDatabase& db, std::string_view file) + -> std::vector; + +// ── argument utilities ────────────────────────────────────────────── + +auto strip_compiler_path(const std::vector& args) -> std::vector; + +auto normalize_argument_path(std::string_view path, std::string_view directory) + -> std::filesystem::path; + +auto sanitize_driver_arguments(const CompileEntry& entry) -> std::vector; + +auto sanitize_tool_arguments(const CompileEntry& entry) -> std::vector; + +} // namespace clore::extract + +// ── implementation ────────────────────────────────────────────────── + +namespace clore::extract { + +auto load_compdb(std::string_view path) -> std::expected { + namespace fs = std::filesystem; + + auto compdb_path = fs::path(path); + if(!fs::exists(compdb_path)) { + return std::unexpected(CompDbError{ + .message = std::format("compile_commands.json not found: {}", path)}); + } + + std::string error_message; + auto json_db = clang::tooling::JSONCompilationDatabase::loadFromFile( + std::string(path), error_message, clang::tooling::JSONCommandLineSyntax::AutoDetect); + + if(!json_db) { + return std::unexpected(CompDbError{ + .message = std::format("failed to load compile_commands.json: {}", error_message)}); + } + + CompilationDatabase db; + for(auto& cmd : json_db->getAllCompileCommands()) { + CompileEntry entry; + entry.file = cmd.Filename; + entry.directory = cmd.Directory; + entry.arguments.reserve(cmd.CommandLine.size()); + for(auto& arg : cmd.CommandLine) { + entry.arguments.push_back(arg); + } + db.entries.push_back(std::move(entry)); + } + + logging::info("loaded {} compile commands from {}", db.entries.size(), path); + return db; +} + +auto lookup(const CompilationDatabase& db, std::string_view file) + -> std::vector { + std::vector results; + + namespace fs = std::filesystem; + auto target = fs::path(file).lexically_normal(); + + for(auto& entry : db.entries) { + auto entry_path = (fs::path(entry.directory) / entry.file).lexically_normal(); + if(entry_path == target) { + results.push_back(&entry); + } + } + return results; +} + +auto strip_compiler_path(const std::vector& args) -> std::vector { + if(args.size() <= 1) { + return {}; + } + return std::vector(args.begin() + 1, args.end()); +} + +auto normalize_argument_path(std::string_view path, std::string_view directory) + -> std::filesystem::path { + auto normalized = std::filesystem::path(path); + if(normalized.is_relative()) { + normalized = std::filesystem::path(directory) / normalized; + } + return normalized.lexically_normal(); +} + +auto sanitize_driver_arguments(const CompileEntry& entry) -> std::vector { + auto adjusted = entry.arguments; + auto source_path = normalize_argument_path(entry.file, entry.directory); + + std::erase_if(adjusted, [&](const std::string& arg) { + if(arg.empty() || arg.starts_with('-')) { + return false; + } + return normalize_argument_path(arg, entry.directory) == source_path; + }); + + return adjusted; +} + +auto sanitize_tool_arguments(const CompileEntry& entry) -> std::vector { + return strip_compiler_path(sanitize_driver_arguments(entry)); +} + +} // namespace clore::extract diff --git a/src/extract/compdb.h b/src/extract/compdb.h deleted file mode 100644 index 0b76509..0000000 --- a/src/extract/compdb.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace clore::extract { - -struct CompileEntry { - std::string file; - std::string directory; - std::vector arguments; -}; - -struct CompilationDatabase { - std::vector entries; -}; - -struct CompDbError { - std::string message; -}; - -auto load_compdb(std::string_view path) -> std::expected; - -auto lookup(const CompilationDatabase& db, std::string_view file) - -> std::vector; - -} // namespace clore::extract diff --git a/src/extract/extract.cpp b/src/extract/extract.cppm similarity index 80% rename from src/extract/extract.cpp rename to src/extract/extract.cppm index 2971da5..e57c202 100644 --- a/src/extract/extract.cpp +++ b/src/extract/extract.cppm @@ -1,4 +1,4 @@ -#include "extract/extract.h" +module; #include #include @@ -6,27 +6,43 @@ #include #include #include +#include #include +#include +#include -#include "extract/ast.h" -#include "extract/compdb.h" -#include "extract/scan.h" -#include "support/logging.h" +export module extract; + +export import :symbol; +export import :model; +export import :compdb; +export import :scan; +export import :tooling; +export import :ast; + +import config; +import support; + +export namespace clore::extract { + +struct ExtractError { + std::string message; +}; + +auto extract_project(const config::TaskConfig& config) + -> std::expected; + +} // namespace clore::extract + +// ── implementation ────────────────────────────────────────────────── namespace clore::extract { namespace { -/// Returns true if `relative` matches the configured pattern. -/// -/// Patterns are interpreted as *workspace-relative path prefixes* (with forward -/// slashes), not as arbitrary substrings. This prevents accidental matches like -/// "src/" matching "build/_deps/.../src/...". bool path_prefix_matches(std::string_view relative, std::string_view pattern) { if(pattern.empty()) return false; - // If the pattern contains a path separator, treat it as an explicit prefix. - // Otherwise treat it as a top-level directory name. if(pattern.find('/') != std::string_view::npos) { return relative.starts_with(pattern); } @@ -71,11 +87,6 @@ auto resolve_path_under_directory(const std::string& path, return p.lexically_normal(); } -/// Returns true if `file` should be processed according to `filter`. -/// -/// Security: rejects any path whose fs::relative result contains a ".." -/// component, which would mean `file` escapes `filter_root` and could be -/// used to bypass the expected-path boundary. bool matches_filter(const std::string& file, const config::FilterRule& filter, const std::filesystem::path& filter_root) { namespace fs = std::filesystem; @@ -86,9 +97,8 @@ bool matches_filter(const std::string& file, const config::FilterRule& filter, auto rel_opt = project_relative_path(file_path, root_path); if(!rel_opt.has_value()) return false; - auto relative_str = rel_opt->generic_string(); // forward slashes + auto relative_str = rel_opt->generic_string(); - // File must match at least one include pattern (if any are specified). if(!filter.include.empty()) { bool matched = false; for(auto& pattern : filter.include) { @@ -100,7 +110,6 @@ bool matches_filter(const std::string& file, const config::FilterRule& filter, if(!matched) return false; } - // File must not match any exclude pattern. for(auto& pattern : filter.exclude) { if(path_prefix_matches(relative_str, pattern)) { return false; @@ -276,6 +285,30 @@ auto rebuild_model_indexes(const config::TaskConfig& config, ProjectModel& model } } +/// Populate module information from scan cache into the project model. +auto build_module_info(ProjectModel& model, const ScanCache& scan_cache) -> void { + namespace fs = std::filesystem; + + for(auto& [file_path, scan_result] : scan_cache) { + if(scan_result.module_name.empty()) continue; + + model.uses_modules = true; + + auto source_file = fs::path(file_path).lexically_normal().generic_string(); + auto& mod_unit = model.modules[source_file]; + mod_unit.name = scan_result.module_name; + mod_unit.is_interface = scan_result.is_interface_unit; + mod_unit.source_file = source_file; + mod_unit.imports = scan_result.module_imports; + + // Associate symbols from that file to this module unit + auto file_it = model.files.find(source_file); + if(file_it != model.files.end()) { + mod_unit.symbols = file_it->second.symbols; + } + } +} + } // namespace auto extract_project(const config::TaskConfig& config) @@ -294,9 +327,7 @@ auto extract_project(const config::TaskConfig& config) auto dt_load = std::chrono::duration_cast(Clock::now() - t0); logging::info("loaded {} compile entries ({}ms)", db.entries.size(), dt_load.count()); - // 1b. Pre-filter entries so the dependency graph and symbol extraction - // only process files that pass the user's include/exclude rules. - // This avoids expensive preprocessor and AST work on irrelevant files. + // 1b. Pre-filter entries CompilationDatabase filtered_db; auto filter_root = filter_root_path(config); for(auto& entry : db.entries) { @@ -305,7 +336,7 @@ auto extract_project(const config::TaskConfig& config) if(!abs_path.has_value()) { return std::unexpected(std::move(abs_path.error())); } - entry_copy.file = abs_path->string(); // keep OS-native separators for clang + entry_copy.file = abs_path->string(); if(matches_filter(entry_copy.file, config.filter, filter_root)) { filtered_db.entries.push_back(std::move(entry_copy)); @@ -315,9 +346,7 @@ auto extract_project(const config::TaskConfig& config) logging::info("filter: {} entries pass, {} skipped", filtered_db.entries.size(), skipped); - // 2. Build dependency graph and get topological order. - // build_dependency_graph now also populates a ScanCache so we can reuse - // the per-file scan results later without re-running the preprocessor. + // 2. Build dependency graph + ScanCache auto t1 = Clock::now(); auto dep_result = build_dependency_graph(filtered_db); if(!dep_result.has_value()) { @@ -338,7 +367,7 @@ auto extract_project(const config::TaskConfig& config) logging::info("dependency graph: {} files, {} edges ({}ms)", dep_graph.files.size(), dep_graph.edges.size(), dt_graph.count()); - // 3. Extract symbols for each file, reusing cached scan results. + // 3. Extract symbols for each file auto t2 = Clock::now(); ProjectModel model; model.file_order = std::move(*order_result); @@ -352,7 +381,6 @@ auto extract_project(const config::TaskConfig& config) logging::info("extracting {}/{}: {}", idx + 1, total_entries, normalized); auto t_file = Clock::now(); - // Extract symbols and relations -- fail fast on any error auto t_ast = Clock::now(); auto ast_result = extract_symbols(entry, *config.extract.max_snippet_bytes); if(!ast_result.has_value()) { @@ -364,9 +392,6 @@ auto extract_project(const config::TaskConfig& config) logging::info(" ast: {} symbols, {} relations ({}ms)", ast_result->symbols.size(), ast_result->relations.size(), dt_ast.count()); - // Look up includes from the scan cache instead of re-running the - // preprocessor. The key must use the OS-native normalized form that - // build_dependency_graph stored (lexically_normal().string()). auto cache_key = fs::path(entry.file).lexically_normal().string(); auto cache_it = scan_cache.find(cache_key); @@ -374,25 +399,7 @@ auto extract_project(const config::TaskConfig& config) current_file_info.path = normalized; std::size_t includes_kept = 0; - if(cache_it != scan_cache.end()) { - for(auto& inc : cache_it->second.includes) { - // Keep include edges only within the configured project filter; - // external headers (deps, system headers) should not affect the - // project page graph. - namespace fs = std::filesystem; - auto inc_path = fs::path(inc.path); - if(inc_path.is_relative()) { - inc_path = fs::path(entry.directory) / inc_path; - } - inc_path = inc_path.lexically_normal(); - if(matches_filter(inc_path.string(), config.filter, filter_root)) { - append_unique(current_file_info.includes, inc_path.generic_string()); - ++includes_kept; - } - } - } else { - // Fallback: scan if the file somehow wasn't in the cache (should - // not happen for entries from the compilation database). + if(cache_it == scan_cache.end()) { logging::warn("scan cache miss for {}, re-scanning", entry.file); auto scan_result = scan_file(entry); if(!scan_result.has_value()) { @@ -400,26 +407,26 @@ auto extract_project(const config::TaskConfig& config) .message = std::format("failed to scan includes for {}: {}", entry.file, scan_result.error().message)}); } - for(auto& inc : scan_result->includes) { - namespace fs = std::filesystem; - auto inc_path = fs::path(inc.path); - if(inc_path.is_relative()) { - inc_path = fs::path(entry.directory) / inc_path; - } - inc_path = inc_path.lexically_normal(); - if(matches_filter(inc_path.string(), config.filter, filter_root)) { - append_unique(current_file_info.includes, inc_path.generic_string()); - ++includes_kept; - } + + auto [rescanned_it, _] = scan_cache.insert_or_assign(cache_key, std::move(*scan_result)); + cache_it = rescanned_it; + } + + for(auto& inc : cache_it->second.includes) { + namespace fs = std::filesystem; + auto inc_path = fs::path(inc.path); + if(inc_path.is_relative()) { + inc_path = fs::path(entry.directory) / inc_path; + } + inc_path = inc_path.lexically_normal(); + if(matches_filter(inc_path.string(), config.filter, filter_root)) { + append_unique(current_file_info.includes, inc_path.generic_string()); + ++includes_kept; } } - // Process extracted symbols std::size_t symbols_kept = 0; for(auto& sym : ast_result->symbols) { - // Filter symbols by their declared source file so dependency code - // (e.g. FetchContent sources under build/_deps) does not end up in - // the project model or generated docs. namespace fs = std::filesystem; auto decl_file = fs::path(sym.declaration_location.file); if(decl_file.is_relative()) { @@ -454,7 +461,6 @@ auto extract_project(const config::TaskConfig& config) } } - // Wire forward call/reference edges onto SymbolInfo. for(auto& rel : ast_result->relations) { auto from_it = model.symbols.find(rel.from); if(from_it == model.symbols.end()) continue; @@ -472,9 +478,7 @@ auto extract_project(const config::TaskConfig& config) rebuild_model_indexes(config, model); - // 4. Build reverse edges (called_by / referenced_by). - // Forward edges (calls / references) were written above; now iterate - // all symbols and populate the reverse direction. + // 4. Build reverse edges auto t3 = Clock::now(); logging::info("building reverse edges for {} symbols...", model.symbols.size()); for(auto& [id, sym] : model.symbols) { @@ -494,7 +498,16 @@ auto extract_project(const config::TaskConfig& config) auto dt_reverse = std::chrono::duration_cast(Clock::now() - t3); logging::info("reverse edges done ({}ms)", dt_reverse.count()); - // Count total relation edges for logging. + // 5. Build module information from scan results + build_module_info(model, scan_cache); + if(model.uses_modules) { + logging::info("detected {} module units", model.modules.size()); + for(auto& [source_file, mod] : model.modules) { + logging::info(" module '{}' (interface={}) from {}", + mod.name, mod.is_interface, source_file); + } + } + std::size_t total_calls = 0, total_refs = 0; for(auto& [id, sym] : model.symbols) { total_calls += sym.calls.size(); diff --git a/src/extract/extract.h b/src/extract/extract.h deleted file mode 100644 index d5c7518..0000000 --- a/src/extract/extract.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include -#include - -#include "config/schema.h" -#include "extract/model.h" - -namespace clore::extract { - -struct ExtractError { - std::string message; -}; - -auto extract_project(const config::TaskConfig& config) - -> std::expected; - -} // namespace clore::extract diff --git a/src/extract/model.h b/src/extract/model.cppm similarity index 62% rename from src/extract/model.h rename to src/extract/model.cppm index c8b4f50..2dd7ca7 100644 --- a/src/extract/model.h +++ b/src/extract/model.cppm @@ -1,4 +1,4 @@ -#pragma once +module; #include #include @@ -6,9 +6,11 @@ #include #include -#include "extract/symbol.h" +export module extract:model; -namespace clore::extract { +import :symbol; + +export namespace clore::extract { struct SourceLocation { std::string file; @@ -26,8 +28,6 @@ struct SourceRange { struct SymbolInfo { SymbolID id; - /// Unknown is the only kind that is never emitted for valid symbols; - /// it indicates the Decl passed classify_decl but could not be mapped. SymbolKind kind = SymbolKind::Unknown; std::string name; std::string qualified_name; @@ -45,22 +45,14 @@ struct SymbolInfo { std::vector bases; std::vector derived; - /// Symbols this symbol calls (direct callees). std::vector calls; - /// Symbols that call this symbol (direct callers). Populated as a reverse - /// edge during model assembly — not filled by the AST visitor. std::vector called_by; - /// Symbols this symbol references (type uses, member accesses, etc.) - /// excluding call targets which go into `calls`. std::vector references; - /// Symbols that reference this symbol. Reverse edge, populated during - /// model assembly. std::vector referenced_by; std::string access; - /// Explicitly false until the symbol is confirmed to be a template. bool is_template = false; std::string template_params; }; @@ -76,12 +68,29 @@ struct NamespaceInfo { std::vector symbols; }; +// ── module information ────────────────────────────────────────────── + +/// Represents a single C++20 module unit (interface or partition). +struct ModuleUnit { + std::string name; ///< Full module name, e.g. "foo" or "foo:bar" + bool is_interface = false; ///< true for `export module`, false for `module` + std::string source_file; ///< Normalized path to the source file + std::vector imports; ///< Module imports + std::vector symbols; ///< Symbols declared in this module unit +}; + struct ProjectModel { std::unordered_map symbols; std::unordered_map files; std::unordered_map namespaces; std::vector file_order; + + /// Module units indexed by normalized source file path. + std::unordered_map modules; + + /// True if the project uses C++20 modules (at least one module declaration found). + bool uses_modules = false; }; } // namespace clore::extract diff --git a/src/extract/scan.cpp b/src/extract/scan.cppm similarity index 50% rename from src/extract/scan.cpp rename to src/extract/scan.cppm index dba96d7..f375bc0 100644 --- a/src/extract/scan.cpp +++ b/src/extract/scan.cppm @@ -1,25 +1,113 @@ -#include "extract/scan.h" +module; #include +#include #include #include +#include +#include #include +#include +#include #include #include +#include #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendActions.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Support/Error.h" -#include "support/logging.h" -#include "extract/tooling.h" +export module extract:scan; + +import :compdb; +import :tooling; +import support; + +export namespace clore::extract { + +struct ScanError { + std::string message; +}; + +struct IncludeInfo { + std::string path; + bool is_angled = false; +}; + +struct ScanResult { + std::string module_name; + bool is_interface_unit = false; + std::vector includes; + std::vector module_imports; +}; + +/// Cache mapping normalized file path -> ScanResult. +using ScanCache = std::unordered_map; + +auto scan_file(const CompileEntry& entry) -> std::expected; + +/// Fast module declaration scan using Clang's dependency directives scanner. +/// Populates module_name, is_interface_unit, and module_imports in ScanResult +/// without running the full preprocessor. +auto scan_module_decl(std::string_view file_content, ScanResult& result) -> void; + +struct DependencyEdge { + std::string from; + std::string to; +}; + +struct DependencyGraph { + std::vector files; + std::vector edges; +}; + +struct DependencyResult { + DependencyGraph graph; + ScanCache cache; +}; + +auto build_dependency_graph(const CompilationDatabase& db) + -> std::expected; + +auto topological_order(const DependencyGraph& graph) + -> std::expected, ScanError>; + +} // namespace clore::extract + +// ── implementation ────────────────────────────────────────────────── namespace clore::extract { namespace { +auto append_unique_import(ScanResult& result, std::string import_name) -> void { + if(std::ranges::find(result.module_imports, import_name) != result.module_imports.end()) { + return; + } + result.module_imports.push_back(std::move(import_name)); +} + +auto normalize_partition_import(std::string_view current_module_name, + std::string import_name) -> std::string { + if(import_name.starts_with(':') && !current_module_name.empty()) { + auto main_name = current_module_name; + if(auto colon_pos = current_module_name.find(':'); colon_pos != std::string::npos) { + main_name = current_module_name.substr(0, colon_pos); + } + + std::string normalized; + normalized.reserve(main_name.size() + import_name.size()); + normalized += main_name; + normalized += import_name; + return normalized; + } + + return import_name; +} + class ScanPPCallbacks : public clang::PPCallbacks { public: ScanResult& result; @@ -65,6 +153,74 @@ class ScanAction : public clang::PreprocessOnlyAction { } // namespace +auto scan_module_decl(std::string_view file_content, ScanResult& result) -> void { + // Use Clang's dependency directives scanner for fast module detection. + llvm::SmallVector tokens; + llvm::SmallVector directives; + + if(clang::scanSourceForDependencyDirectives(file_content, tokens, directives)) { + // Scanner failed; fall back to no module detection. + return; + } + + namespace dds = clang::dependency_directives_scan; + + for(auto& dir : directives) { + if(dir.Kind == dds::cxx_export_module_decl || dir.Kind == dds::cxx_module_decl) { + // Collect module name from tokens: identifiers + '.' + ':' + std::string module_name; + + // Skip 'export' and 'module' keywords + bool past_module_keyword = false; + for(auto& tok : dir.Tokens) { + auto tok_text = file_content.substr(tok.Offset, tok.Length); + + if(!past_module_keyword) { + if(tok_text == "module") { + past_module_keyword = true; + } + continue; + } + + // Stop at semicolon or end + if(tok_text == ";") break; + + module_name += tok_text; + } + + if(!module_name.empty()) { + result.module_name = std::move(module_name); + result.is_interface_unit = (dir.Kind == dds::cxx_export_module_decl); + } + } else if(dir.Kind == dds::cxx_import_decl) { + // Collect import name + std::string import_name; + + bool past_import_keyword = false; + for(auto& tok : dir.Tokens) { + auto tok_text = file_content.substr(tok.Offset, tok.Length); + + if(!past_import_keyword) { + if(tok_text == "import") { + past_import_keyword = true; + } + continue; + } + + if(tok_text == ";") break; + + import_name += tok_text; + } + + if(!import_name.empty()) { + append_unique_import(result, + normalize_partition_import(result.module_name, + std::move(import_name))); + } + } + } +} + auto scan_file(const CompileEntry& entry) -> std::expected { if(entry.arguments.empty()) { return std::unexpected(ScanError{ @@ -72,6 +228,22 @@ auto scan_file(const CompileEntry& entry) -> std::expected(ifs)), + std::istreambuf_iterator()); + scan_module_decl(content, result); + } + } + auto instance = create_compiler_instance(entry); if(!instance) { return std::unexpected(ScanError{ @@ -104,10 +276,6 @@ auto build_dependency_graph(const CompilationDatabase& db) std::unordered_set entry_files; std::unordered_set file_set; - // Only track compilation-database entries as nodes in the dependency graph. - // Includes are treated as edges but do not become standalone nodes, which - // avoids pulling in dependency/system headers (and potential cycles) that - // are outside the project boundary. for(auto& entry : db.entries) { namespace fs = std::filesystem; auto normalized = fs::path(entry.file).lexically_normal().string(); @@ -134,7 +302,6 @@ auto build_dependency_graph(const CompilationDatabase& db) } } - // Cache the scan result so callers don't need to re-scan. cache.emplace(normalized, std::move(*scan_result)); } @@ -179,8 +346,6 @@ auto topological_order(const DependencyGraph& graph) } } - // A cycle means some files were never enqueued. Fail fast instead of - // silently processing files in an arbitrary order. if(order.size() < graph.files.size()) { return std::unexpected(ScanError{ .message = "dependency cycle detected in project include graph"}); diff --git a/src/extract/scan.h b/src/extract/scan.h deleted file mode 100644 index a24261e..0000000 --- a/src/extract/scan.h +++ /dev/null @@ -1,100 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "extract/compdb.h" - -namespace clore::extract { - -struct ScanError { - std::string message; -}; - -struct IncludeInfo { - std::string path; - bool is_angled = false; -}; - -struct ScanResult { - std::string module_name; - bool is_interface_unit = false; - std::vector includes; - std::vector module_imports; -}; - -/// Cache mapping normalized file path -> ScanResult from the dependency graph -/// build phase, so that callers can reuse scan results without re-running the -/// preprocessor. -using ScanCache = std::unordered_map; - -/// Strip the leading compiler-executable element (args[0]) from a raw argument -/// list. Both scan_file and extract_symbols use this; defining it here avoids -/// duplication across translation units. -inline auto strip_compiler_path(const std::vector& args) -> std::vector { - if(args.size() <= 1) { - return {}; - } - return std::vector(args.begin() + 1, args.end()); -} - -inline auto normalize_argument_path(std::string_view path, std::string_view directory) - -> std::filesystem::path { - auto normalized = std::filesystem::path(path); - if(normalized.is_relative()) { - normalized = std::filesystem::path(directory) / normalized; - } - return normalized.lexically_normal(); -} - -inline auto sanitize_driver_arguments(const CompileEntry& entry) -> std::vector { - auto adjusted = entry.arguments; - auto source_path = normalize_argument_path(entry.file, entry.directory); - - std::erase_if(adjusted, [&](const std::string& arg) { - if(arg.empty() || arg.starts_with('-')) { - return false; - } - return normalize_argument_path(arg, entry.directory) == source_path; - }); - - return adjusted; -} - -inline auto sanitize_tool_arguments(const CompileEntry& entry) -> std::vector { - return strip_compiler_path(sanitize_driver_arguments(entry)); -} - -/// Returns an error if the entry's argument list is empty or the tool fails. -auto scan_file(const CompileEntry& entry) -> std::expected; - -struct DependencyEdge { - std::string from; - std::string to; -}; - -struct DependencyGraph { - std::vector files; - std::vector edges; -}; - -struct DependencyResult { - DependencyGraph graph; - ScanCache cache; -}; - -/// Build the include dependency graph from the compilation database. Also -/// populates a ScanCache so callers can look up per-file scan results without -/// re-running the preprocessor. -auto build_dependency_graph(const CompilationDatabase& db) - -> std::expected; - -/// Returns an error if a dependency cycle is detected. -auto topological_order(const DependencyGraph& graph) - -> std::expected, ScanError>; - -} // namespace clore::extract diff --git a/src/extract/symbol.cpp b/src/extract/symbol.cpp deleted file mode 100644 index cc536fb..0000000 --- a/src/extract/symbol.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "extract/symbol.h" - -namespace clore::extract { - -auto symbol_kind_name(SymbolKind kind) -> std::string_view { - switch(kind) { - case SymbolKind::Namespace: return "namespace"; - case SymbolKind::Class: return "class"; - case SymbolKind::Struct: return "struct"; - case SymbolKind::Union: return "union"; - case SymbolKind::Enum: return "enum"; - case SymbolKind::EnumMember: return "enum_member"; - case SymbolKind::Function: return "function"; - case SymbolKind::Method: return "method"; - case SymbolKind::Variable: return "variable"; - case SymbolKind::Field: return "field"; - case SymbolKind::TypeAlias: return "type_alias"; - case SymbolKind::Macro: return "macro"; - case SymbolKind::Template: return "template"; - case SymbolKind::Concept: return "concept"; - case SymbolKind::Unknown: return "unknown"; - } - return "unknown"; -} - -} // namespace clore::extract diff --git a/src/extract/symbol.cppm b/src/extract/symbol.cppm new file mode 100644 index 0000000..85335c8 --- /dev/null +++ b/src/extract/symbol.cppm @@ -0,0 +1,77 @@ +module; + +#include +#include +#include +#include +#include + +export module extract:symbol; + +export namespace clore::extract { + +enum class SymbolKind : std::uint8_t { + Namespace, + Class, + Struct, + Union, + Enum, + EnumMember, + Function, + Method, + Variable, + Field, + TypeAlias, + Macro, + Template, + Concept, + Unknown, +}; + +auto symbol_kind_name(SymbolKind kind) -> std::string_view; + +struct SymbolID { + /// A hash of 0 is the invalid/null sentinel. Valid IDs are always non-zero. + std::uint64_t hash = 0; + + [[nodiscard]] bool is_valid() const noexcept { return hash != 0; } + + bool operator==(const SymbolID&) const = default; + auto operator<=>(const SymbolID&) const = default; +}; + +} // namespace clore::extract + +export template <> +struct std::hash { + std::size_t operator()(const clore::extract::SymbolID& id) const noexcept { + return std::hash{}(id.hash); + } +}; + +// ── implementation ────────────────────────────────────────────────── + +namespace clore::extract { + +auto symbol_kind_name(SymbolKind kind) -> std::string_view { + switch(kind) { + case SymbolKind::Namespace: return "namespace"; + case SymbolKind::Class: return "class"; + case SymbolKind::Struct: return "struct"; + case SymbolKind::Union: return "union"; + case SymbolKind::Enum: return "enum"; + case SymbolKind::EnumMember: return "enum_member"; + case SymbolKind::Function: return "function"; + case SymbolKind::Method: return "method"; + case SymbolKind::Variable: return "variable"; + case SymbolKind::Field: return "field"; + case SymbolKind::TypeAlias: return "type_alias"; + case SymbolKind::Macro: return "macro"; + case SymbolKind::Template: return "template"; + case SymbolKind::Concept: return "concept"; + case SymbolKind::Unknown: return "unknown"; + } + return "unknown"; +} + +} // namespace clore::extract diff --git a/src/extract/symbol.h b/src/extract/symbol.h deleted file mode 100644 index 67b0ea0..0000000 --- a/src/extract/symbol.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace clore::extract { - -enum class SymbolKind : std::uint8_t { - Namespace, - Class, - Struct, - Union, - Enum, - EnumMember, - Function, - Method, - Variable, - Field, - TypeAlias, - Macro, - Template, - Concept, - Unknown, -}; - -auto symbol_kind_name(SymbolKind kind) -> std::string_view; - -struct SymbolID { - /// A hash of 0 is the invalid/null sentinel. Valid IDs are always non-zero - /// (guaranteed by compute_symbol_id, which rejects Decls with no USR). - std::uint64_t hash = 0; - - [[nodiscard]] bool is_valid() const noexcept { return hash != 0; } - - bool operator==(const SymbolID&) const = default; - auto operator<=>(const SymbolID&) const = default; -}; - -} // namespace clore::extract - -template <> -struct std::hash { - std::size_t operator()(const clore::extract::SymbolID& id) const noexcept { - return std::hash{}(id.hash); - } -}; diff --git a/src/extract/tooling.cpp b/src/extract/tooling.cppm similarity index 82% rename from src/extract/tooling.cpp rename to src/extract/tooling.cppm index 5c1d6f1..8a853b1 100644 --- a/src/extract/tooling.cpp +++ b/src/extract/tooling.cppm @@ -1,13 +1,26 @@ -#include "extract/tooling.h" +module; #include #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/Utils.h" #include "llvm/Support/VirtualFileSystem.h" -#include "extract/scan.h" +export module extract:tooling; + +import :compdb; + +export namespace clore::extract { + +auto create_compiler_instance(const CompileEntry& entry, + bool suppress_diagnostics = true) + -> std::unique_ptr; + +} // namespace clore::extract + +// ── implementation ────────────────────────────────────────────────── namespace clore::extract { diff --git a/src/extract/tooling.h b/src/extract/tooling.h deleted file mode 100644 index 0747e7d..0000000 --- a/src/extract/tooling.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -#include "clang/Frontend/CompilerInstance.h" - -#include "extract/compdb.h" - -namespace clore::extract { - -auto create_compiler_instance(const CompileEntry& entry, - bool suppress_diagnostics = true) - -> std::unique_ptr; - -} // namespace clore::extract diff --git a/src/generate/generate.cpp b/src/generate/generate.cppm similarity index 60% rename from src/generate/generate.cpp rename to src/generate/generate.cppm index ccfeee3..32f165f 100644 --- a/src/generate/generate.cpp +++ b/src/generate/generate.cppm @@ -1,17 +1,111 @@ -#include "generate/generate.h" +module; #include +#include +#include #include #include #include +#include #include +#include #include #include +#include #include #include +#include + +export module generate; + +export import :llm; + +import config; +import extract; +import support; + +export namespace clore::generate { + +// ── page hierarchy ────────────────────────────────────────────────── + +enum class PageLevel : std::uint8_t { + Symbol, + ClassStruct, + File, + Module, ///< A C++20 module unit. + Namespace, + Repository, +}; + +// ── structured page context ───────────────────────────────────────── + +struct SymbolContext { + extract::SymbolInfo self; + std::vector direct_callers; + std::vector direct_callees; + std::vector siblings; + std::optional parent_class; +}; + +struct StructuredPagePlan { + std::string relative_path; + std::string title; + PageLevel level = PageLevel::File; + std::vector contexts; + std::vector linked_pages; +}; + +// ── page graph ────────────────────────────────────────────────────── + +struct PageNode { + StructuredPagePlan plan; + std::vector depends_on; + std::vector depended_by; +}; + +struct PageGraph { + std::unordered_map nodes; + std::vector generation_order; +}; + +// ── output ────────────────────────────────────────────────────────── + +struct PromptPage { + std::string relative_path; + std::string title; + std::string prompt; +}; + +struct GeneratedPage { + std::string relative_path; + std::string content; +}; + +struct GenerateError { + std::string message; +}; + +// ── public API ────────────────────────────────────────────────────── + +auto build_page_graph(const config::TaskConfig& config, const extract::ProjectModel& model) + -> PageGraph; + +auto build_prompts(const config::TaskConfig& config, const extract::ProjectModel& model) + -> std::expected, GenerateError>; + +auto generate_pages(const config::TaskConfig& config, const extract::ProjectModel& model, + std::string_view llm_model) + -> std::expected, GenerateError>; -#include "generate/llm.h" -#include "support/logging.h" +auto write_prompts(const std::vector& prompts, std::string_view output_root) + -> std::expected; + +auto write_pages(const std::vector& pages, std::string_view output_root) + -> std::expected; + +} // namespace clore::generate + +// ── implementation ────────────────────────────────────────────────── namespace clore::generate { @@ -40,7 +134,6 @@ auto write_page_to_root(const GeneratedPage& page, std::string_view output_root) auto target = (root / rel).lexically_normal(); - // Create parent directories auto parent = target.parent_path(); if(!fs::exists(parent)) { std::error_code ec; @@ -52,7 +145,6 @@ auto write_page_to_root(const GeneratedPage& page, std::string_view output_root) } } - // Write file std::ofstream f(target); if(!f.is_open()) { return std::unexpected(GenerateError{ @@ -93,6 +185,48 @@ auto output_path_for_source_file(const std::filesystem::path& file_path, return *rel + ".md"; } +// ── module-aware output path ──────────────────────────────────────── + +/// Convert a module name to a documentation output path. +/// Main modules: "foo.bar" → "foo.bar/index.md" +/// Partitions: "foo.bar:baz" → "foo.bar/baz.md" +auto output_path_for_module(const std::string& module_name) -> std::string { + auto colon_pos = module_name.find(':'); + if(colon_pos != std::string::npos) { + // Partition: "foo.bar:baz" → "foo.bar/baz.md" + auto main_name = module_name.substr(0, colon_pos); + auto partition_name = module_name.substr(colon_pos + 1); + return main_name + "/" + partition_name + ".md"; + } + // Main module: "foo.bar" → "foo.bar/index.md" + return module_name + "/index.md"; +} + +auto append_unique_page_ref(std::vector& refs, const std::string& ref) -> bool { + if(std::ranges::find(refs, ref) != refs.end()) { + return false; + } + refs.push_back(ref); + return true; +} + +auto add_page_dependency(PageGraph& graph, const std::string& dependent_page, + const std::string& dependency_page) -> void { + if(dependent_page == dependency_page) { + return; + } + + auto dependent_it = graph.nodes.find(dependent_page); + auto dependency_it = graph.nodes.find(dependency_page); + if(dependent_it == graph.nodes.end() || dependency_it == graph.nodes.end()) { + return; + } + + append_unique_page_ref(dependent_it->second.depends_on, dependency_page); + append_unique_page_ref(dependent_it->second.plan.linked_pages, dependency_page); + append_unique_page_ref(dependency_it->second.depended_by, dependent_page); +} + // ── symbol context assembly ───────────────────────────────────────── auto lookup_symbol(const extract::ProjectModel& model, extract::SymbolID id) @@ -104,14 +238,13 @@ auto lookup_symbol(const extract::ProjectModel& model, extract::SymbolID id) auto build_symbol_context(const extract::SymbolInfo& sym, const extract::ProjectModel& model, - const extract::FileInfo& file_info) -> SymbolContext { + const std::vector& sibling_ids) -> SymbolContext { SymbolContext ctx; ctx.self = sym; std::unordered_set seen_callers; std::unordered_set seen_callees; std::unordered_set seen_siblings; - // Direct callers (called_by) — limit to keep context manageable. for(auto& caller_id : sym.called_by) { if(!seen_callers.insert(caller_id).second) continue; if(auto s = lookup_symbol(model, caller_id)) { @@ -120,7 +253,6 @@ auto build_symbol_context(const extract::SymbolInfo& sym, } } - // Direct callees (calls). for(auto& callee_id : sym.calls) { if(!seen_callees.insert(callee_id).second) continue; if(auto s = lookup_symbol(model, callee_id)) { @@ -129,8 +261,7 @@ auto build_symbol_context(const extract::SymbolInfo& sym, } } - // Siblings: other symbols in the same file. - for(auto& sib_id : file_info.symbols) { + for(auto& sib_id : sibling_ids) { if(sib_id == sym.id) continue; if(!seen_siblings.insert(sib_id).second) continue; if(auto s = lookup_symbol(model, sib_id)) { @@ -139,7 +270,6 @@ auto build_symbol_context(const extract::SymbolInfo& sym, } } - // Parent class / struct. if(sym.parent.has_value()) { if(auto p = lookup_symbol(model, *sym.parent)) { if(p->kind == extract::SymbolKind::Class || @@ -152,15 +282,20 @@ auto build_symbol_context(const extract::SymbolInfo& sym, return ctx; } -// ── planner: build PageGraph ──────────────────────────────────────── +auto build_symbol_context_from_file(const extract::SymbolInfo& sym, + const extract::ProjectModel& model, + const extract::FileInfo& file_info) -> SymbolContext { + return build_symbol_context(sym, model, file_info.symbols); +} + +// ── file-based page graph (traditional) ───────────────────────────── -auto build_page_graph_impl(const config::TaskConfig& config, +auto build_file_page_graph(const config::TaskConfig& config, const extract::ProjectModel& model) -> PageGraph { namespace fs = std::filesystem; PageGraph graph; auto source_root = fs::path(config.project_root).lexically_normal(); - // --- Create one File-level page per source file --- for(auto& [file_path, file_info] : model.files) { if(file_info.symbols.empty()) continue; @@ -176,7 +311,6 @@ auto build_page_graph_impl(const config::TaskConfig& config, node.plan.level = PageLevel::File; } - // Assemble SymbolContext for each symbol in the file. for(auto& sym_id : file_info.symbols) { auto sym_it = model.symbols.find(sym_id); if(sym_it == model.symbols.end()) continue; @@ -184,33 +318,27 @@ auto build_page_graph_impl(const config::TaskConfig& config, return ctx.self.id == sym_id; }); if(exists) continue; - node.plan.contexts.push_back(build_symbol_context(sym_it->second, model, file_info)); + node.plan.contexts.push_back(build_symbol_context_from_file(sym_it->second, model, file_info)); } } - // --- Build inter-page dependency edges from include graph --- - // If file A includes file B and both have pages, then page(A) depends on - // page(B) (B's docs should be generated first). + // Include graph edges for(auto& [file_path, file_info] : model.files) { auto page_a = output_path_for_source_file(fs::path(file_path), source_root); if(!page_a.has_value()) continue; - if(graph.nodes.find(*page_a) == graph.nodes.end()) continue; for(auto& inc_path : file_info.includes) { auto page_b = output_path_for_source_file(fs::path(inc_path), source_root); if(!page_b.has_value()) continue; - if(*page_b == *page_a) continue; if(graph.nodes.find(*page_b) == graph.nodes.end()) continue; - graph.nodes[*page_a].depends_on.push_back(*page_b); - graph.nodes[*page_b].depended_by.push_back(*page_a); - graph.nodes[*page_a].plan.linked_pages.push_back(*page_b); + add_page_dependency(graph, *page_a, *page_b); } } - // --- Also add call-graph driven edges between pages --- + // Call graph edges std::unordered_map sym_to_page; for(auto& [page_path, node] : graph.nodes) { for(auto& ctx : node.plan.contexts) { @@ -218,23 +346,100 @@ auto build_page_graph_impl(const config::TaskConfig& config, } } for(auto& [page_path, node] : graph.nodes) { - std::unordered_set already; - for(auto& dep : node.depends_on) already.insert(dep); + for(auto& ctx : node.plan.contexts) { + for(auto& callee_id : ctx.self.calls) { + auto it = sym_to_page.find(callee_id); + if(it == sym_to_page.end()) continue; + if(it->second == page_path) continue; + add_page_dependency(graph, page_path, it->second); + } + } + } + + return graph; +} + +// ── module-based page graph ───────────────────────────────────────── +auto build_module_page_graph(const config::TaskConfig& config, + const extract::ProjectModel& model) -> PageGraph { + PageGraph graph; + + std::unordered_map> units_by_name; + for(auto& [source_file, mod_unit] : model.modules) { + (void)source_file; + units_by_name[mod_unit.name].push_back(&mod_unit); + } + + std::unordered_map mod_to_page; + for(auto& [mod_name, mod_units] : units_by_name) { + auto has_interface = std::ranges::any_of(mod_units, [](const extract::ModuleUnit* unit) { + return unit->is_interface; + }); + if(!has_interface) continue; + + auto out_path = output_path_for_module(mod_name); + auto& node = graph.nodes[out_path]; + node.plan.relative_path = out_path; + node.plan.level = PageLevel::Module; + node.plan.title = mod_name; + mod_to_page.emplace(mod_name, out_path); + + std::vector page_symbol_ids; + for(auto* mod_unit : mod_units) { + for(auto& sym_id : mod_unit->symbols) { + if(std::ranges::find(page_symbol_ids, sym_id) == page_symbol_ids.end()) { + page_symbol_ids.push_back(sym_id); + } + } + } + + for(auto& sym_id : page_symbol_ids) { + auto sym_it = model.symbols.find(sym_id); + if(sym_it == model.symbols.end()) continue; + node.plan.contexts.push_back(build_symbol_context(sym_it->second, model, page_symbol_ids)); + } + } + + for(auto& [mod_name, mod_units] : units_by_name) { + auto page_it = mod_to_page.find(mod_name); + if(page_it == mod_to_page.end()) continue; + + auto& page_a = page_it->second; + for(auto* mod_unit : mod_units) { + for(auto& import_name : mod_unit->imports) { + auto it = mod_to_page.find(import_name); + if(it == mod_to_page.end()) continue; + add_page_dependency(graph, page_a, it->second); + } + } + + } + + // Call graph edges between module pages + std::unordered_map sym_to_page; + for(auto& [page_path, node] : graph.nodes) { + for(auto& ctx : node.plan.contexts) { + sym_to_page[ctx.self.id] = page_path; + } + } + for(auto& [page_path, node] : graph.nodes) { for(auto& ctx : node.plan.contexts) { for(auto& callee_id : ctx.self.calls) { auto it = sym_to_page.find(callee_id); if(it == sym_to_page.end()) continue; if(it->second == page_path) continue; - if(!already.insert(it->second).second) continue; - node.depends_on.push_back(it->second); - graph.nodes[it->second].depended_by.push_back(page_path); - node.plan.linked_pages.push_back(it->second); + add_page_dependency(graph, page_path, it->second); } } } - // --- Topological sort (Kahn's) for generation order --- + return graph; +} + +// ── topological sort for page graph ───────────────────────────────── + +auto sort_page_graph(PageGraph& graph) -> void { std::unordered_map in_degree; for(auto& [path, _] : graph.nodes) in_degree[path] = 0; for(auto& [path, node] : graph.nodes) { @@ -260,7 +465,6 @@ auto build_page_graph_impl(const config::TaskConfig& config, } } - // If cycles exist, append remaining pages in arbitrary order. if(graph.generation_order.size() < graph.nodes.size()) { for(auto& [path, _] : graph.nodes) { if(in_degree[path] > 0) { @@ -269,17 +473,14 @@ auto build_page_graph_impl(const config::TaskConfig& config, } } - // De-duplicate linked_pages lists. for(auto& [_, node] : graph.nodes) { auto& lp = node.plan.linked_pages; std::sort(lp.begin(), lp.end()); lp.erase(std::unique(lp.begin(), lp.end()), lp.end()); } - - return graph; } -// ── prompt builder (consumes StructuredPagePlan) ──────────────────── +// ── prompt builder ────────────────────────────────────────────────── auto build_prompt_for_page(const StructuredPagePlan& plan, const config::TaskConfig& config) -> std::string { @@ -287,13 +488,17 @@ auto build_prompt_for_page(const StructuredPagePlan& plan, auto language = config.language.has_value() ? *config.language : std::string("English"); - ss << "Generate Markdown documentation for the following C++ source file.\n\n"; - ss << "## File: `" << plan.relative_path << "`\n\n"; + if(plan.level == PageLevel::Module) { + ss << "Generate Markdown documentation for the following C++20 module.\n\n"; + ss << "## Module: `" << plan.title << "`\n\n"; + } else { + ss << "Generate Markdown documentation for the following C++ source file.\n\n"; + ss << "## File: `" << plan.relative_path << "`\n\n"; + } ss << "### Output language\n"; ss << "- " << language << "\n\n"; - // Linked pages for cross references if(!plan.linked_pages.empty()) { ss << "### Related pages\n"; for(auto& lp : plan.linked_pages) { @@ -321,7 +526,6 @@ auto build_prompt_for_page(const StructuredPagePlan& plan, ss << "**Source:**\n```cpp\n" << sym.source_snippet << "\n```\n\n"; } - // Callers — "why does this exist?" if(!ctx.direct_callers.empty()) { ss << "**Called by:**\n"; for(auto& caller : ctx.direct_callers) { @@ -332,7 +536,6 @@ auto build_prompt_for_page(const StructuredPagePlan& plan, ss << "\n"; } - // Callees — "what does this use?" if(!ctx.direct_callees.empty()) { ss << "**Calls:**\n"; for(auto& callee : ctx.direct_callees) { @@ -341,7 +544,6 @@ auto build_prompt_for_page(const StructuredPagePlan& plan, ss << "\n"; } - // Inheritance if(!sym.bases.empty()) { ss << "**Bases:** "; bool first = true; @@ -360,7 +562,6 @@ auto build_prompt_for_page(const StructuredPagePlan& plan, ss << "\n\n"; } - // Members if(!sym.children.empty()) { ss << "**Members:**\n"; for(auto& child_id : sym.children) { @@ -379,11 +580,18 @@ auto build_prompt_for_page(const StructuredPagePlan& plan, } ss << "\n### Instructions\n\n"; - ss << "1. Write a brief overview of the file's purpose.\n"; - ss << "2. Document each class/struct with its purpose and members.\n"; - ss << "3. Document each function: explain WHY it exists (using caller context), " - "parameters, and return value.\n"; - ss << "4. Note any inheritance relationships.\n"; + if(plan.level == PageLevel::Module) { + ss << "1. Write a brief overview of this module's purpose and public interface.\n"; + ss << "2. Document each exported class/struct with its purpose and members.\n"; + ss << "3. Document each exported function: explain WHY it exists, parameters, and return value.\n"; + ss << "4. Note module dependencies and partition relationships.\n"; + } else { + ss << "1. Write a brief overview of the file's purpose.\n"; + ss << "2. Document each class/struct with its purpose and members.\n"; + ss << "3. Document each function: explain WHY it exists (using caller context), " + "parameters, and return value.\n"; + ss << "4. Note any inheritance relationships.\n"; + } ss << "5. Include cross-references to related pages where appropriate.\n"; ss << "6. Use proper Markdown headings and code blocks.\n"; ss << "7. Do NOT wrap the output in a code fence.\n"; @@ -391,8 +599,6 @@ auto build_prompt_for_page(const StructuredPagePlan& plan, return ss.str(); } -// ── frontmatter ───────────────────────────────────────────────────── - auto render_frontmatter(std::string_view title, const config::FrontmatterConfig& fm_config) -> std::string { std::ostringstream ss; @@ -407,8 +613,6 @@ auto render_frontmatter(std::string_view title, return ss.str(); } -// ── markdown assembly ─────────────────────────────────────────────── - auto assemble_page(std::string_view frontmatter, std::string_view llm_body) -> std::string { std::string content; @@ -426,12 +630,23 @@ auto assemble_page(std::string_view frontmatter, auto build_page_graph(const config::TaskConfig& config, const extract::ProjectModel& model) -> PageGraph { - return build_page_graph_impl(config, model); + PageGraph graph; + + if(model.uses_modules) { + logging::info("building module-based page graph ({} module units)", + model.modules.size()); + graph = build_module_page_graph(config, model); + } else { + graph = build_file_page_graph(config, model); + } + + sort_page_graph(graph); + return graph; } auto build_prompts(const config::TaskConfig& config, const extract::ProjectModel& model) -> std::expected, GenerateError> { - auto graph = build_page_graph_impl(config, model); + auto graph = build_page_graph(config, model); if(graph.nodes.empty()) { return std::unexpected(GenerateError{ @@ -482,7 +697,6 @@ auto generate_pages(const config::TaskConfig& config, const extract::ProjectMode logging::info("generating page {}/{}: {}", i + 1, prompts.size(), prompt.relative_path); - // Call LLM auto llm_result = call_llm(llm_model, prompt.prompt); if(!llm_result.has_value()) { logging::warn("LLM call failed for {}: {}", prompt.relative_path, @@ -490,10 +704,7 @@ auto generate_pages(const config::TaskConfig& config, const extract::ProjectMode continue; } - // Render frontmatter auto frontmatter = render_frontmatter(prompt.title, config.frontmatter); - - // Assemble page auto content = assemble_page(frontmatter, *llm_result); auto page = GeneratedPage{ @@ -548,4 +759,3 @@ auto write_pages(const std::vector& pages, std::string_view outpu } } // namespace clore::generate - diff --git a/src/generate/generate.h b/src/generate/generate.h deleted file mode 100644 index 2f516d7..0000000 --- a/src/generate/generate.h +++ /dev/null @@ -1,94 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "config/schema.h" -#include "extract/model.h" -#include "extract/symbol.h" - -namespace clore::generate { - -// ── page hierarchy ────────────────────────────────────────────────── - -enum class PageLevel : std::uint8_t { - Symbol, ///< A single function or variable (used only for very large files). - ClassStruct, ///< A class/struct and all its members. - File, ///< One source/header file. - Namespace, ///< A namespace aggregating multiple files. - Repository, ///< Top-level project overview. -}; - -// ── structured page context ───────────────────────────────────────── - -/// Pre-assembled context slice for one symbol — ready for prompt -/// serialization without touching ProjectModel again. -struct SymbolContext { - extract::SymbolInfo self; - std::vector direct_callers; - std::vector direct_callees; - std::vector siblings; - std::optional parent_class; -}; - -struct StructuredPagePlan { - std::string relative_path; - std::string title; - PageLevel level = PageLevel::File; - std::vector contexts; - std::vector linked_pages; -}; - -// ── page graph ────────────────────────────────────────────────────── - -struct PageNode { - StructuredPagePlan plan; - std::vector depends_on; - std::vector depended_by; -}; - -struct PageGraph { - std::unordered_map nodes; - std::vector generation_order; -}; - -// ── output ────────────────────────────────────────────────────────── - -struct PromptPage { - std::string relative_path; - std::string title; - std::string prompt; -}; - -struct GeneratedPage { - std::string relative_path; - std::string content; -}; - -struct GenerateError { - std::string message; -}; - -// ── public API ────────────────────────────────────────────────────── - -auto build_page_graph(const config::TaskConfig& config, const extract::ProjectModel& model) - -> PageGraph; - -auto build_prompts(const config::TaskConfig& config, const extract::ProjectModel& model) - -> std::expected, GenerateError>; - -auto generate_pages(const config::TaskConfig& config, const extract::ProjectModel& model, - std::string_view llm_model) - -> std::expected, GenerateError>; - -auto write_prompts(const std::vector& prompts, std::string_view output_root) - -> std::expected; - -auto write_pages(const std::vector& pages, std::string_view output_root) - -> std::expected; - -} // namespace clore::generate diff --git a/src/generate/llm.cpp b/src/generate/llm.cpp deleted file mode 100644 index b621f26..0000000 --- a/src/generate/llm.cpp +++ /dev/null @@ -1,208 +0,0 @@ -#include "generate/llm.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "llvm/Support/JSON.h" - -#include "support/logging.h" - -#ifdef _WIN32 -#define popen _popen -#define pclose _pclose -#endif - -namespace clore::generate { - -namespace { - -constexpr std::string_view kOpenAIBaseUrlEnv = "OPENAI_BASE_URL"; -constexpr std::string_view kOpenAIApiKeyEnv = "OPENAI_API_KEY"; - -auto read_required_env(std::string_view name) -> std::expected { - auto* value = std::getenv(std::string{name}.c_str()); - if(value == nullptr || value[0] == '\0') { - return std::unexpected(LLMError{ - .message = std::format("required environment variable {} is not set", name)}); - } - return std::string(value); -} - -auto build_chat_completions_url(std::string_view api_base) -> std::string { - std::string url(api_base); - if(!url.empty() && url.back() != '/') { - url += '/'; - } - url += "chat/completions"; - return url; -} - -auto escape_json_string(std::string_view s) -> std::string { - std::string out; - out.reserve(s.size() + 16); - for(char c : s) { - switch(c) { - case '"': out += "\\\""; break; - case '\\': out += "\\\\"; break; - case '\n': out += "\\n"; break; - case '\r': out += "\\r"; break; - case '\t': out += "\\t"; break; - default: - if(static_cast(c) < 0x20) { - char buf[8]; - std::snprintf(buf, sizeof(buf), "\\u%04x", static_cast(c)); - out += buf; - } else { - out += c; - } - break; - } - } - return out; -} - -} // namespace - -namespace detail { - -auto build_request_json(std::string_view model, std::string_view prompt) -> std::string { - std::string system_msg = - "You are a C++ documentation writer. Generate clear, well-structured " - "Markdown documentation for C++ code elements. Focus on explaining purpose, " - "parameters, return values, and relationships. Use proper Markdown formatting."; - - return std::format( - R"({{"model":"{}","messages":[{{"role":"system","content":"{}"}},{{"role":"user","content":"{}"}}]}})", - escape_json_string(model), - escape_json_string(system_msg), - escape_json_string(prompt)); -} - -auto parse_response(std::string_view json) -> std::expected { - auto parsed = llvm::json::parse(json); - if(!parsed) { - return std::unexpected(LLMError{ - .message = std::format("failed to parse LLM response JSON: {}", - llvm::toString(parsed.takeError()))}); - } - - auto* root = parsed->getAsObject(); - if(!root) { - return std::unexpected(LLMError{.message = "LLM response is not a JSON object"}); - } - - // Check for API error - if(auto* error = root->getObject("error")) { - auto msg = error->getString("message").value_or("unknown error"); - return std::unexpected(LLMError{ - .message = std::format("LLM API error: {}", std::string_view(msg))}); - } - - auto* choices = root->getArray("choices"); - if(!choices || choices->empty()) { - return std::unexpected(LLMError{.message = "LLM response has no choices"}); - } - - auto* first = (*choices)[0].getAsObject(); - if(!first) { - return std::unexpected(LLMError{.message = "LLM response choice is not an object"}); - } - - auto* message = first->getObject("message"); - if(!message) { - return std::unexpected(LLMError{.message = "LLM response choice has no message"}); - } - - auto content = message->getString("content"); - if(!content) { - return std::unexpected(LLMError{.message = "LLM response message has no content"}); - } - - return std::string(*content); -} - -} // namespace detail - -namespace { - -auto read_pipe(FILE* pipe) -> std::string { - std::string result; - char buf[4096]; - while(auto n = std::fread(buf, 1, sizeof(buf), pipe)) { - result.append(buf, n); - } - return result; -} - -} // namespace - -auto call_llm(std::string_view model, std::string_view prompt) - -> std::expected { - namespace fs = std::filesystem; - - if(model.empty()) { - return std::unexpected(LLMError{.message = "llm model must not be empty"}); - } - - auto api_base_result = read_required_env(kOpenAIBaseUrlEnv); - if(!api_base_result.has_value()) { - return std::unexpected(std::move(api_base_result.error())); - } - - auto api_key_result = read_required_env(kOpenAIApiKeyEnv); - if(!api_key_result.has_value()) { - return std::unexpected(std::move(api_key_result.error())); - } - - auto url = build_chat_completions_url(*api_base_result); - - // Write request body to temp file (avoids shell escaping issues) - auto body_path = fs::temp_directory_path() / "clore_llm_request.json"; - { - std::ofstream f(body_path); - if(!f.is_open()) { - return std::unexpected(LLMError{ - .message = "failed to create temp file for LLM request"}); - } - auto body = detail::build_request_json(model, prompt); - f << body; - } - - // Build curl command - auto cmd = std::format( - "curl -s -X POST \"{}\" " - "-H \"Content-Type: application/json\" " - "-H \"Authorization: Bearer {}\" " - "-d \"@{}\"", - url, *api_key_result, body_path.generic_string()); - - logging::info("calling LLM: {} model={}", url, model); - - auto* pipe = popen(cmd.c_str(), "r"); - if(!pipe) { - fs::remove(body_path); - return std::unexpected(LLMError{.message = "failed to execute curl"}); - } - - auto response = read_pipe(pipe); - auto exit_code = pclose(pipe); - fs::remove(body_path); - - if(exit_code != 0) { - return std::unexpected(LLMError{ - .message = std::format("curl exited with code {}", exit_code)}); - } - - if(response.empty()) { - return std::unexpected(LLMError{.message = "empty response from LLM API"}); - } - - return detail::parse_response(response); -} - -} // namespace clore::generate diff --git a/src/generate/llm.cppm b/src/generate/llm.cppm new file mode 100644 index 0000000..ff22a84 --- /dev/null +++ b/src/generate/llm.cppm @@ -0,0 +1,358 @@ +module; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Program.h" + +export module generate:llm; + +import support; + +export namespace clore::generate { + +struct LLMError { + std::string message; +}; + +namespace detail { + +auto build_request_json(std::string_view model, std::string_view prompt) -> std::string; + +auto parse_response(std::string_view json) -> std::expected; + +} // namespace detail + +auto call_llm(std::string_view model, std::string_view prompt) + -> std::expected; + +} // namespace clore::generate + +// ── implementation ────────────────────────────────────────────────── + +namespace clore::generate { + +namespace { + +constexpr std::string_view kOpenAIBaseUrlEnv = "OPENAI_BASE_URL"; +constexpr std::string_view kOpenAIApiKeyEnv = "OPENAI_API_KEY"; + +auto read_required_env(std::string_view name) -> std::expected { + auto* value = std::getenv(std::string{name}.c_str()); + if(value == nullptr || value[0] == '\0') { + return std::unexpected(LLMError{ + .message = std::format("required environment variable {} is not set", name)}); + } + return std::string(value); +} + +auto build_chat_completions_url(std::string_view api_base) -> std::string { + std::string url(api_base); + if(!url.empty() && url.back() != '/') { + url += '/'; + } + url += "chat/completions"; + return url; +} + +auto escape_json_string(std::string_view s) -> std::string { + std::string out; + out.reserve(s.size() + 16); + for(char c : s) { + switch(c) { + case '"': out += "\\\""; break; + case '\\': out += "\\\\"; break; + case '\n': out += "\\n"; break; + case '\r': out += "\\r"; break; + case '\t': out += "\\t"; break; + default: + if(static_cast(c) < 0x20) { + char buf[8]; + std::snprintf(buf, sizeof(buf), "\\u%04x", static_cast(c)); + out += buf; + } else { + out += c; + } + break; + } + } + return out; +} + +struct TempFileCleanup { + std::vector& paths; + + ~TempFileCleanup() { + for(auto& path : paths) { + std::error_code ec; + std::filesystem::remove(path, ec); + } + } +}; + +auto create_temp_file(std::string_view prefix, std::string_view suffix, + std::vector& temp_paths) + -> std::expected { + llvm::SmallString<128> temp_path; + if(auto ec = llvm::sys::fs::createTemporaryFile(prefix, suffix, temp_path); ec) { + return std::unexpected(LLMError{ + .message = std::format("failed to create temp file: {}", ec.message())}); + } + + auto path = std::filesystem::path(std::string(temp_path)); + temp_paths.push_back(path); + return path; +} + +auto write_text_file(const std::filesystem::path& path, std::string_view content) + -> std::expected { + std::ofstream f(path, std::ios::binary); + if(!f.is_open()) { + return std::unexpected(LLMError{ + .message = std::format("failed to open temp file for writing: {}", + path.generic_string())}); + } + + f.write(content.data(), static_cast(content.size())); + if(!f) { + return std::unexpected(LLMError{ + .message = std::format("failed to write temp file: {}", path.generic_string())}); + } + + return {}; +} + +auto read_text_file(const std::filesystem::path& path) -> std::expected { + std::ifstream f(path, std::ios::binary); + if(!f.is_open()) { + return std::unexpected(LLMError{ + .message = std::format("failed to open temp file for reading: {}", + path.generic_string())}); + } + + std::string content((std::istreambuf_iterator(f)), + std::istreambuf_iterator()); + if(!f.good() && !f.eof()) { + return std::unexpected(LLMError{ + .message = std::format("failed to read temp file: {}", path.generic_string())}); + } + + return content; +} + +auto find_curl_executable() -> std::expected { + auto curl_path = llvm::sys::findProgramByName("curl"); + if(!curl_path) { + return std::unexpected(LLMError{ + .message = std::format("failed to locate curl executable: {}", + curl_path.getError().message())}); + } + + return *curl_path; +} + +} // namespace + +namespace detail { + +auto build_request_json(std::string_view model, std::string_view prompt) -> std::string { + std::string system_msg = + "You are a C++ documentation writer. Generate clear, well-structured " + "Markdown documentation for C++ code elements. Focus on explaining purpose, " + "parameters, return values, and relationships. Use proper Markdown formatting."; + + return std::format( + R"({{"model":"{}","messages":[{{"role":"system","content":"{}"}},{{"role":"user","content":"{}"}}]}})", + escape_json_string(model), + escape_json_string(system_msg), + escape_json_string(prompt)); +} + +auto parse_response(std::string_view json) -> std::expected { + auto parsed = llvm::json::parse(json); + if(!parsed) { + return std::unexpected(LLMError{ + .message = std::format("failed to parse LLM response JSON: {}", + llvm::toString(parsed.takeError()))}); + } + + auto* root = parsed->getAsObject(); + if(!root) { + return std::unexpected(LLMError{.message = "LLM response is not a JSON object"}); + } + + if(auto* error = root->getObject("error")) { + auto msg = error->getString("message").value_or("unknown error"); + return std::unexpected(LLMError{ + .message = std::format("LLM API error: {}", std::string_view(msg))}); + } + + auto* choices = root->getArray("choices"); + if(!choices || choices->empty()) { + return std::unexpected(LLMError{.message = "LLM response has no choices"}); + } + + auto* first = (*choices)[0].getAsObject(); + if(!first) { + return std::unexpected(LLMError{.message = "LLM response choice is not an object"}); + } + + auto* message = first->getObject("message"); + if(!message) { + return std::unexpected(LLMError{.message = "LLM response choice has no message"}); + } + + auto content = message->getString("content"); + if(!content) { + return std::unexpected(LLMError{.message = "LLM response message has no content"}); + } + + return std::string(*content); +} + +} // namespace detail + +auto call_llm(std::string_view model, std::string_view prompt) + -> std::expected { + namespace fs = std::filesystem; + + if(model.empty()) { + return std::unexpected(LLMError{.message = "llm model must not be empty"}); + } + + auto api_base_result = read_required_env(kOpenAIBaseUrlEnv); + if(!api_base_result.has_value()) { + return std::unexpected(std::move(api_base_result.error())); + } + + auto api_key_result = read_required_env(kOpenAIApiKeyEnv); + if(!api_key_result.has_value()) { + return std::unexpected(std::move(api_key_result.error())); + } + + auto url = build_chat_completions_url(*api_base_result); + + std::vector temp_paths; + TempFileCleanup cleanup{temp_paths}; + + auto body_path_result = create_temp_file("clore_llm_request", "json", temp_paths); + if(!body_path_result.has_value()) { + return std::unexpected(std::move(body_path_result.error())); + } + auto curl_config_result = create_temp_file("clore_llm_headers", "conf", temp_paths); + if(!curl_config_result.has_value()) { + return std::unexpected(std::move(curl_config_result.error())); + } + auto response_path_result = create_temp_file("clore_llm_response", "json", temp_paths); + if(!response_path_result.has_value()) { + return std::unexpected(std::move(response_path_result.error())); + } + auto stderr_path_result = create_temp_file("clore_llm_stderr", "log", temp_paths); + if(!stderr_path_result.has_value()) { + return std::unexpected(std::move(stderr_path_result.error())); + } + + auto body_path = *body_path_result; + auto curl_config_path = *curl_config_result; + auto response_path = *response_path_result; + auto stderr_path = *stderr_path_result; + + if(auto write_result = write_text_file(body_path, detail::build_request_json(model, prompt)); + !write_result.has_value()) { + return std::unexpected(std::move(write_result.error())); + } + + auto curl_config = std::format( + "header = \"Content-Type: application/json\"\n" + "header = \"Authorization: Bearer {}\"\n", + *api_key_result); + if(auto write_result = write_text_file(curl_config_path, curl_config); + !write_result.has_value()) { + return std::unexpected(std::move(write_result.error())); + } + + auto curl_path_result = find_curl_executable(); + if(!curl_path_result.has_value()) { + return std::unexpected(std::move(curl_path_result.error())); + } + auto curl_path = *curl_path_result; + + auto body_arg = std::string("@") + body_path.string(); + auto response_path_string = response_path.string(); + auto stderr_path_string = stderr_path.string(); + std::vector arg_storage{ + curl_path, + "--silent", + "--show-error", + "--request", + "POST", + url, + "--config", + curl_config_path.string(), + "--data-binary", + body_arg, + }; + llvm::SmallVector args; + args.reserve(arg_storage.size()); + for(auto& arg : arg_storage) { + args.push_back(arg); + } + + std::array, 3> redirects{ + std::nullopt, + llvm::StringRef(response_path_string), + llvm::StringRef(stderr_path_string), + }; + + logging::info("calling LLM: {} model={}", url, model); + + std::string err_msg; + bool execution_failed = false; + auto exit_code = llvm::sys::ExecuteAndWait(curl_path, args, std::nullopt, redirects, + 0, 0, &err_msg, &execution_failed); + + auto stderr_result = read_text_file(stderr_path); + auto stderr_text = stderr_result.has_value() ? *stderr_result : std::string{}; + + if(execution_failed) { + auto message = err_msg.empty() ? stderr_text : err_msg; + if(message.empty()) { + message = "failed to execute curl"; + } + return std::unexpected(LLMError{.message = std::move(message)}); + } + + if(exit_code != 0) { + return std::unexpected(LLMError{ + .message = stderr_text.empty() + ? std::format("curl exited with code {}", exit_code) + : std::format("curl exited with code {}: {}", + exit_code, stderr_text)}); + } + + auto response_result = read_text_file(response_path); + if(!response_result.has_value()) { + return std::unexpected(std::move(response_result.error())); + } + + if(response_result->empty()) { + return std::unexpected(LLMError{.message = "empty response from LLM"}); + } + + return detail::parse_response(*response_result); +} + +} // namespace clore::generate diff --git a/src/generate/llm.h b/src/generate/llm.h deleted file mode 100644 index 0e0b49a..0000000 --- a/src/generate/llm.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include -#include - -namespace clore::generate { - -struct LLMError { - std::string message; -}; - -namespace detail { - -auto build_request_json(std::string_view model, std::string_view prompt) -> std::string; - -auto parse_response(std::string_view json) -> std::expected; - -} // namespace detail - -auto call_llm(std::string_view model, std::string_view prompt) - -> std::expected; - -} // namespace clore::generate diff --git a/src/main.cpp b/src/main.cpp index 884ce2c..7454db1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,11 +6,12 @@ #include #include "eventide/deco/deco.h" +#include "spdlog/spdlog.h" -#include "config/config.h" -#include "extract/extract.h" -#include "generate/generate.h" -#include "support/logging.h" +import config; +import extract; +import generate; +import support; namespace clore { diff --git a/src/support/logging.cpp b/src/support/logging.cpp deleted file mode 100644 index f3e9a5e..0000000 --- a/src/support/logging.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "support/logging.h" diff --git a/src/support/logging.h b/src/support/logging.cppm similarity index 60% rename from src/support/logging.h rename to src/support/logging.cppm index ce45348..8d0f861 100644 --- a/src/support/logging.h +++ b/src/support/logging.cppm @@ -1,14 +1,18 @@ -#pragma once +module; #include #include #include +#include #include +#include #include "spdlog/spdlog.h" #include "spdlog/sinks/stdout_color_sinks.h" -namespace clore::logging { +export module support; + +export namespace clore::logging { struct Options { /// No built-in default — caller must set this explicitly before logging. @@ -18,8 +22,6 @@ struct Options { inline Options options; inline void log(spdlog::level::level_enum lvl, std::string_view msg) { - // When no level filter is set, pass every message through to the underlying - // spdlog logger which applies its own level filter. if(options.level.has_value() && lvl < *options.level) { return; } @@ -70,22 +72,3 @@ inline void stderr_logger(std::string_view name) { } } // namespace clore::logging - -#define LOG_MESSAGE(name, fmt, ...) \ - do { \ - clore::logging::name(fmt __VA_OPT__(, ) __VA_ARGS__); \ - } while(0) - -#define LOG_TRACE(fmt, ...) LOG_MESSAGE(trace, fmt, __VA_ARGS__) -#define LOG_DEBUG(fmt, ...) LOG_MESSAGE(debug, fmt, __VA_ARGS__) -#define LOG_INFO(fmt, ...) LOG_MESSAGE(info, fmt, __VA_ARGS__) -#define LOG_WARN(fmt, ...) LOG_MESSAGE(warn, fmt, __VA_ARGS__) -#define LOG_ERROR(fmt, ...) LOG_MESSAGE(err, fmt, __VA_ARGS__) - -#define LOG_MESSAGE_RET(ret, name, fmt, ...) \ - do { \ - LOG_MESSAGE(name, fmt, __VA_ARGS__); \ - return ret; \ - } while(0); - -#define LOG_ERROR_RET(ret, fmt, ...) LOG_MESSAGE_RET(ret, err, fmt, __VA_ARGS__) diff --git a/tests/unit/config/config.cpp b/tests/unit/config/config.cpp index 8172e08..b41904e 100644 --- a/tests/unit/config/config.cpp +++ b/tests/unit/config/config.cpp @@ -3,7 +3,7 @@ #include #include -#include "config/config.h" +import config; using namespace clore::config; diff --git a/tests/unit/config/validate.cpp b/tests/unit/config/validate.cpp index f4cca66..2274692 100644 --- a/tests/unit/config/validate.cpp +++ b/tests/unit/config/validate.cpp @@ -3,7 +3,7 @@ #include #include -#include "config/config.h" +import config; using namespace clore::config; diff --git a/tests/unit/extract/ast.cpp b/tests/unit/extract/ast.cpp index 50a5c35..ea4c206 100644 --- a/tests/unit/extract/ast.cpp +++ b/tests/unit/extract/ast.cpp @@ -3,8 +3,7 @@ #include #include -#include "extract/ast.h" -#include "extract/compdb.h" +import extract; using namespace clore::extract; diff --git a/tests/unit/extract/compdb.cpp b/tests/unit/extract/compdb.cpp index 3c9aa5a..ba46628 100644 --- a/tests/unit/extract/compdb.cpp +++ b/tests/unit/extract/compdb.cpp @@ -3,9 +3,10 @@ #include #include -#include "extract/compdb.h" #include "extract/compdb_test_utils.h" +import extract; + using namespace clore::extract; TEST_SUITE(compdb) { @@ -86,4 +87,23 @@ TEST_SUITE(compdb) { auto no_results = lookup(db, "/project/src/nonexistent.cpp"); EXPECT_EQ(no_results.size(), 0u); } + + TEST_CASE(lookup_resolves_relative_file_against_entry_directory) { + namespace fs = std::filesystem; + + auto project_dir = (fs::temp_directory_path() / "clore_test_compdb_relative").lexically_normal(); + auto source_path = (project_dir / "src" / "main.cpp").lexically_normal(); + + CompilationDatabase db; + db.entries.push_back(CompileEntry{ + .file = "src/main.cpp", + .directory = project_dir.generic_string(), + .arguments = {"clang++", "-c", "src/main.cpp"}, + }); + + auto results = lookup(db, source_path.generic_string()); + ASSERT_EQ(results.size(), 1u); + EXPECT_EQ(results.front()->file, "src/main.cpp"); + EXPECT_EQ(results.front()->directory, project_dir.generic_string()); + } }; diff --git a/tests/unit/extract/extract.cpp b/tests/unit/extract/extract.cpp index f145299..bf0e5a6 100644 --- a/tests/unit/extract/extract.cpp +++ b/tests/unit/extract/extract.cpp @@ -1,16 +1,16 @@ #include "eventide/zest/zest.h" #include +#include #include #include #include -#include "config/config.h" -#include "extract/compdb.h" -#include "extract/extract.h" -#include "extract/scan.h" #include "extract/compdb_test_utils.h" -#include "generate/generate.h" + +import config; +import extract; +import generate; using namespace clore; @@ -156,6 +156,88 @@ void Widget::set_value(int v) { value_ = v; } fs::remove_all(temp_dir); } + + TEST_CASE(detects_distinct_module_units_in_project_model) { + namespace fs = std::filesystem; + + auto ticks = std::chrono::steady_clock::now().time_since_epoch().count(); + auto root = fs::temp_directory_path() / + std::format("clore_module_extract_test_{}", ticks); + fs::create_directories(root / "src"); + + { + std::ofstream f(root / "src" / "math.cppm"); + f << R"( +export module demo.math; + +export int add(int lhs, int rhs) { + return lhs + rhs; +} +)"; + } + + { + std::ofstream f(root / "src" / "math.detail.cppm"); + f << R"( +export module demo.math:detail; + +export int detail() { + return 7; +} +)"; + } + + clore::testing::write_compile_commands( + root / "compile_commands.json", + {{ + .directory = root / "src", + .file = root / "src" / "math.cppm", + .arguments = { + "clang++", + "-std=c++23", + "-c", + "math.cppm", + "-o", + "math.pcm", + }, + }, + { + .directory = root / "src", + .file = root / "src" / "math.detail.cppm", + .arguments = { + "clang++", + "-std=c++23", + "-c", + "math.detail.cppm", + "-o", + "math.detail.pcm", + }, + }}); + + config::TaskConfig cfg; + cfg.compile_commands_path = (root / "compile_commands.json").string(); + cfg.project_root = root.string(); + cfg.output_root = (root / "out").string(); + cfg.workspace_root = root.string(); + cfg.extract.max_snippet_bytes = 1024; + + auto result = extract::extract_project(cfg); + ASSERT_TRUE(result.has_value()); + + EXPECT_TRUE(result->uses_modules); + ASSERT_EQ(result->modules.size(), 2u); + + auto math_path = (root / "src" / "math.cppm").lexically_normal().generic_string(); + auto detail_path = (root / "src" / "math.detail.cppm").lexically_normal().generic_string(); + ASSERT_TRUE(result->modules.contains(math_path)); + ASSERT_TRUE(result->modules.contains(detail_path)); + EXPECT_EQ(result->modules.at(math_path).name, "demo.math"); + EXPECT_EQ(result->modules.at(detail_path).name, "demo.math:detail"); + EXPECT_TRUE(result->modules.at(math_path).is_interface); + EXPECT_TRUE(result->modules.at(detail_path).is_interface); + + fs::remove_all(root); + } }; TEST_SUITE(extract_filter_security) { diff --git a/tests/unit/extract/scan.cpp b/tests/unit/extract/scan.cpp index 4a8bd0d..1bd408e 100644 --- a/tests/unit/extract/scan.cpp +++ b/tests/unit/extract/scan.cpp @@ -1,6 +1,8 @@ #include "eventide/zest/zest.h" -#include "extract/scan.h" +#include + +import extract; using namespace clore::extract; @@ -81,4 +83,22 @@ TEST_SUITE(scan) { EXPECT_LT(pos_left, pos_top); EXPECT_LT(pos_right, pos_top); } + + TEST_CASE(scan_module_decl_normalizes_partition_imports_and_deduplicates) { + ScanResult result; + + scan_module_decl(R"( +export module extract:scan; +import :tooling; +import :tooling; +import support; +)", + result); + + EXPECT_EQ(result.module_name, "extract:scan"); + EXPECT_TRUE(result.is_interface_unit); + ASSERT_EQ(result.module_imports.size(), 2u); + EXPECT_EQ(result.module_imports[0], "extract:tooling"); + EXPECT_EQ(result.module_imports[1], "support"); + } }; diff --git a/tests/unit/extract/symbol.cpp b/tests/unit/extract/symbol.cpp index 6589ec8..a91b685 100644 --- a/tests/unit/extract/symbol.cpp +++ b/tests/unit/extract/symbol.cpp @@ -1,6 +1,6 @@ #include "eventide/zest/zest.h" -#include "extract/symbol.h" +import extract; using namespace clore::extract; diff --git a/tests/unit/generate/generate.cpp b/tests/unit/generate/generate.cpp index 7fd2f64..0493eb3 100644 --- a/tests/unit/generate/generate.cpp +++ b/tests/unit/generate/generate.cpp @@ -1,13 +1,17 @@ #include "eventide/zest/zest.h" +#include #include +#include #include #include #include #include #include -#include "generate/generate.h" +import config; +import extract; +import generate; using namespace clore; using namespace clore::generate; @@ -77,6 +81,24 @@ auto make_model(const fs::path& project_root) -> extract::ProjectModel { return model; } +auto make_symbol(std::uint64_t id, std::string_view name, std::string_view qualified_name, + std::string_view signature, const std::string& file) + -> extract::SymbolInfo { + extract::SymbolInfo symbol; + symbol.id = extract::SymbolID{.hash = id}; + symbol.kind = extract::SymbolKind::Function; + symbol.name = std::string(name); + symbol.qualified_name = std::string(qualified_name); + symbol.signature = std::string(signature); + symbol.source_snippet = std::format("{} {{}}", signature); + symbol.declaration_location = extract::SourceLocation{ + .file = file, + .line = 1, + .column = 1, + }; + return symbol; +} + } // namespace TEST_SUITE(generate) { @@ -228,4 +250,179 @@ TEST_SUITE(generate) { ASSERT_TRUE(fs::exists(target)); EXPECT_EQ(read_text_file(target), "# Math\n"); } -}; \ No newline at end of file + + TEST_CASE(build_module_graph_and_prompts_preserve_distinct_module_units) { + ScopedTempDir temp("build_module_prompts"); + fs::create_directories(temp.path / "src"); + + auto config = make_config(temp.path); + + extract::ProjectModel model; + model.uses_modules = true; + + auto util_file = (temp.path / "src" / "util.cppm").generic_string(); + auto main_file = (temp.path / "src" / "math.cppm").generic_string(); + auto partition_file = (temp.path / "src" / "math.detail.cppm").generic_string(); + auto impl_file = (temp.path / "src" / "math_impl.cpp").generic_string(); + + auto util_symbol = make_symbol(10, "helper", "demo::util::helper", "int helper()", util_file); + auto api_symbol = make_symbol(11, "add", "demo::math::add", "int add(int lhs, int rhs)", main_file); + auto partition_symbol = make_symbol(12, "detail", "demo::math::detail", "int detail()", partition_file); + auto impl_symbol = make_symbol(13, "internal", "demo::math::internal", "int internal()", impl_file); + + model.symbols.emplace(util_symbol.id, util_symbol); + model.symbols.emplace(api_symbol.id, api_symbol); + model.symbols.emplace(partition_symbol.id, partition_symbol); + model.symbols.emplace(impl_symbol.id, impl_symbol); + + model.modules.emplace( + util_file, + extract::ModuleUnit{ + .name = "demo.util", + .is_interface = true, + .source_file = util_file, + .imports = {}, + .symbols = {util_symbol.id}, + }); + model.modules.emplace( + main_file, + extract::ModuleUnit{ + .name = "demo.math", + .is_interface = true, + .source_file = main_file, + .imports = {"demo.util", "demo.util"}, + .symbols = {api_symbol.id}, + }); + model.modules.emplace( + partition_file, + extract::ModuleUnit{ + .name = "demo.math:detail", + .is_interface = true, + .source_file = partition_file, + .imports = {"demo.util"}, + .symbols = {partition_symbol.id}, + }); + model.modules.emplace( + impl_file, + extract::ModuleUnit{ + .name = "demo.math", + .is_interface = false, + .source_file = impl_file, + .imports = {"demo.util", "demo.util"}, + .symbols = {impl_symbol.id}, + }); + + auto graph = build_page_graph(config, model); + + ASSERT_EQ(graph.nodes.size(), 3u); + ASSERT_TRUE(graph.nodes.contains("demo.util/index.md")); + ASSERT_TRUE(graph.nodes.contains("demo.math/index.md")); + ASSERT_TRUE(graph.nodes.contains("demo.math/detail.md")); + + auto& main_node = graph.nodes.at("demo.math/index.md"); + auto& partition_node = graph.nodes.at("demo.math/detail.md"); + + EXPECT_EQ(std::count(main_node.depends_on.begin(), main_node.depends_on.end(), "demo.util/index.md"), + 1); + EXPECT_EQ(std::count(partition_node.depends_on.begin(), partition_node.depends_on.end(), + "demo.util/index.md"), + 1); + EXPECT_EQ(std::count(partition_node.depends_on.begin(), partition_node.depends_on.end(), + "demo.math/index.md"), + 1); + + auto pos_util = std::find(graph.generation_order.begin(), graph.generation_order.end(), + "demo.util/index.md"); + auto pos_main = std::find(graph.generation_order.begin(), graph.generation_order.end(), + "demo.math/index.md"); + auto pos_partition = std::find(graph.generation_order.begin(), graph.generation_order.end(), + "demo.math/detail.md"); + ASSERT_TRUE(pos_util != graph.generation_order.end()); + ASSERT_TRUE(pos_main != graph.generation_order.end()); + ASSERT_TRUE(pos_partition != graph.generation_order.end()); + EXPECT_LT(pos_util, pos_main); + EXPECT_LT(pos_main, pos_partition); + + auto prompts_result = build_prompts(config, model); + ASSERT_TRUE(prompts_result.has_value()); + ASSERT_EQ(prompts_result->size(), 3u); + + auto main_prompt_it = std::find_if(prompts_result->begin(), prompts_result->end(), + [](const PromptPage& page) { + return page.relative_path == "demo.math/index.md"; + }); + ASSERT_TRUE(main_prompt_it != prompts_result->end()); + EXPECT_NE(main_prompt_it->prompt.find("## Module: `demo.math`"), std::string::npos); + EXPECT_NE(main_prompt_it->prompt.find("#### function: `demo::math::internal`"), + std::string::npos); + + auto partition_prompt_it = std::find_if(prompts_result->begin(), prompts_result->end(), + [](const PromptPage& page) { + return page.relative_path == "demo.math/detail.md"; + }); + ASSERT_TRUE(partition_prompt_it != prompts_result->end()); + EXPECT_NE(partition_prompt_it->prompt.find("## Module: `demo.math:detail`"), + std::string::npos); + } + + TEST_CASE(build_module_graph_does_not_create_partition_main_cycle) { + ScopedTempDir temp("build_module_cycle"); + fs::create_directories(temp.path / "src"); + + auto config = make_config(temp.path); + + extract::ProjectModel model; + model.uses_modules = true; + + auto main_file = (temp.path / "src" / "math.cppm").generic_string(); + auto partition_file = (temp.path / "src" / "math.detail.cppm").generic_string(); + + auto api_symbol = make_symbol(21, "add", "demo::math::add", "int add(int lhs, int rhs)", main_file); + auto partition_symbol = make_symbol(22, "detail", "demo::math::detail", "int detail()", partition_file); + + model.symbols.emplace(api_symbol.id, api_symbol); + model.symbols.emplace(partition_symbol.id, partition_symbol); + + model.modules.emplace( + main_file, + extract::ModuleUnit{ + .name = "demo.math", + .is_interface = true, + .source_file = main_file, + .imports = {"demo.math:detail"}, + .symbols = {api_symbol.id}, + }); + model.modules.emplace( + partition_file, + extract::ModuleUnit{ + .name = "demo.math:detail", + .is_interface = true, + .source_file = partition_file, + .imports = {}, + .symbols = {partition_symbol.id}, + }); + + auto graph = build_page_graph(config, model); + + ASSERT_TRUE(graph.nodes.contains("demo.math/index.md")); + ASSERT_TRUE(graph.nodes.contains("demo.math/detail.md")); + + auto& main_node = graph.nodes.at("demo.math/index.md"); + auto& partition_node = graph.nodes.at("demo.math/detail.md"); + + EXPECT_EQ(std::count(main_node.depends_on.begin(), main_node.depends_on.end(), + "demo.math/detail.md"), + 1); + EXPECT_EQ(std::count(partition_node.depends_on.begin(), partition_node.depends_on.end(), + "demo.math/index.md"), + 0); + + auto pos_main = std::find(graph.generation_order.begin(), graph.generation_order.end(), + "demo.math/index.md"); + auto pos_partition = std::find(graph.generation_order.begin(), graph.generation_order.end(), + "demo.math/detail.md"); + ASSERT_TRUE(pos_main != graph.generation_order.end()); + ASSERT_TRUE(pos_partition != graph.generation_order.end()); + EXPECT_LT(pos_partition, pos_main); + } +}; diff --git a/tests/unit/generate/llm.cpp b/tests/unit/generate/llm.cpp index 4af9b08..045e228 100644 --- a/tests/unit/generate/llm.cpp +++ b/tests/unit/generate/llm.cpp @@ -4,7 +4,7 @@ #include #include -#include "generate/llm.h" +import generate; using namespace clore::generate;