Skip to content

Commit 2bbdf6c

Browse files
16bit-ykikoclaude
andauthored
refactor(command): split CompilationContext into ResolvedFlags → CompileCommand → to_argv() (#408)
## Summary - Replace flat `CompilationContext { directory, arguments }` with a three-layer abstraction: `ResolvedFlags` (file-independent flags) → `CompileCommand` (+ source file) → `to_argv()` (full argv on demand) - `ResolvedFlags.flags` never contains source file path or `-main-file-name`, making it directly usable as a clean cache key input (e.g. PCH sharing across files with identical preambles) - `to_argv()` handles `-main-file-name` insertion for cc1 mode automatically — consumers no longer need to search/replace in the argument list - Eliminates the pollute-then-clean anti-pattern in `lookup()` and the manual source-file replacement in `fill_header_context_args()` ## Test plan - [x] `pixi run format` — no changes - [x] `pixi run unit-test` — 481 passed - [x] `pixi run integration-test` — 113 passed 🤖 Generated with [Claude Code](https://claude.com/claude-code) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Refactor** * Unified compile-command handling across the server and tools for more consistent argument and flag behavior (driver vs frontend modes). * **New Features** * Added an LRU-backed in-memory cache to improve performance and eviction control. * **Chores** * Added an option to control injection of resource-directory flags (enabled by default). * **Tests** * Updated unit and integration tests to adopt the new command representation and verify cache behavior. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9c9e6b0 commit 2bbdf6c

File tree

9 files changed

+240
-179
lines changed

9 files changed

+240
-179
lines changed

src/command/command.cpp

Lines changed: 102 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,44 @@ namespace ranges = std::ranges;
2323

2424
} // namespace
2525

26+
std::vector<const char*> CompileCommand::to_argv() const {
27+
std::vector<const char*> argv;
28+
argv.reserve(resolved.flags.size() + 4);
29+
30+
if(resolved.is_cc1 && source_file) {
31+
// cc1 mode requires TWO file-related arguments (both are needed):
32+
// 1. -main-file-name <basename> — used by clang for diagnostics/debug info
33+
// 2. <source_file> at the end — the actual input file path
34+
// These are NOT duplicates: (1) is just the basename, (2) is the full path.
35+
for(std::size_t i = 0; i < resolved.flags.size(); ++i) {
36+
argv.push_back(resolved.flags[i]);
37+
if(resolved.flags[i] == llvm::StringRef("-cc1")) {
38+
argv.push_back("-main-file-name");
39+
// path::filename returns a suffix of source_file (a pointer into
40+
// the same buffer), so .data() is null-terminated because source_file is.
41+
argv.push_back(path::filename(source_file).data());
42+
}
43+
}
44+
} else {
45+
argv.insert(argv.end(), resolved.flags.begin(), resolved.flags.end());
46+
}
47+
48+
if(source_file) {
49+
argv.push_back(source_file);
50+
}
51+
return argv;
52+
}
53+
54+
std::vector<std::string> CompileCommand::to_string_argv() const {
55+
auto argv = to_argv();
56+
std::vector<std::string> result;
57+
result.reserve(argv.size());
58+
for(auto* arg: argv) {
59+
result.emplace_back(arg);
60+
}
61+
return result;
62+
}
63+
2664
CompilationDatabase::CompilationDatabase() = default;
2765

2866
CompilationDatabase::~CompilationDatabase() = default;
@@ -329,26 +367,27 @@ std::size_t CompilationDatabase::load(llvm::StringRef path) {
329367
return entries.size();
330368
}
331369

332-
llvm::SmallVector<CompilationContext> CompilationDatabase::lookup(llvm::StringRef file,
333-
const CommandOptions& options) {
370+
llvm::SmallVector<CompileCommand> CompilationDatabase::lookup(llvm::StringRef file,
371+
const CommandOptions& options) {
334372
auto path_id = paths.intern(file);
335373
auto matched = find_entries(path_id);
336374

337375
auto render_arg = [&](auto& out, llvm::opt::Arg& arg) {
338376
render_arg_to([&](llvm::StringRef s) { out.push_back(strings.save(s).data()); }, arg);
339377
};
340378

341-
/// Build one CompilationContext from a single CompilationInfo.
342-
auto build_context = [&](object_ptr<CompilationInfo> info) -> CompilationContext {
379+
/// Build one CompileCommand from a single CompilationInfo.
380+
auto build_command = [&](object_ptr<CompilationInfo> info) -> CompileCommand {
343381
llvm::StringRef directory = info->directory;
344-
std::vector<const char*> arguments;
382+
std::vector<const char*> flags;
383+
bool is_cc1 = false;
345384

346385
auto append_arg = [&](llvm::StringRef s) {
347-
arguments.emplace_back(strings.save(s).data());
386+
flags.emplace_back(strings.save(s).data());
348387
};
349388

350389
auto append_args = [&](llvm::ArrayRef<const char*> args) {
351-
arguments.insert(arguments.end(), args.begin(), args.end());
390+
flags.insert(flags.end(), args.begin(), args.end());
352391
};
353392

354393
if(options.query_toolchain) {
@@ -361,23 +400,20 @@ llvm::SmallVector<CompilationContext> CompilationDatabase::lookup(llvm::StringRe
361400
append_args(info->canonical->arguments);
362401
append_args(info->patch);
363402
} else {
364-
arguments.assign(cached.begin(), cached.end());
365-
// TODO: add an assertion that the last arg is the temp source
366-
// file (e.g., contains "query-toolchain") to guard against
367-
// future changes in clang cc1 argument ordering.
368-
arguments.pop_back(); // remove temp source file
403+
flags.assign(cached.begin(), cached.end());
404+
flags.pop_back(); // remove temp source file
369405

370406
// Replace resource dir if needed.
371407
if(!resource_dir().empty()) {
372408
llvm::StringRef old_resource_dir;
373-
for(std::size_t i = 0; i + 1 < arguments.size(); ++i) {
374-
if(arguments[i] == llvm::StringRef("-resource-dir")) {
375-
old_resource_dir = arguments[i + 1];
409+
for(std::size_t i = 0; i + 1 < flags.size(); ++i) {
410+
if(flags[i] == llvm::StringRef("-resource-dir")) {
411+
old_resource_dir = flags[i + 1];
376412
break;
377413
}
378414
}
379415
if(!old_resource_dir.empty() && old_resource_dir != resource_dir()) {
380-
for(auto& arg: arguments) {
416+
for(auto& arg: flags) {
381417
llvm::StringRef s(arg);
382418
if(s.starts_with(old_resource_dir)) {
383419
auto replaced =
@@ -390,39 +426,42 @@ llvm::SmallVector<CompilationContext> CompilationDatabase::lookup(llvm::StringRe
390426

391427
append_args(info->patch);
392428

393-
// Fix -main-file-name to match the actual file.
394-
bool next_main_file = false;
395-
for(auto& arg: arguments) {
396-
if(arg == llvm::StringRef("-main-file-name")) {
397-
next_main_file = true;
429+
// Strip -main-file-name and its value from flags (to_argv() will
430+
// re-inject it with the correct basename when is_cc1 is set).
431+
std::vector<const char*> cleaned;
432+
cleaned.reserve(flags.size());
433+
for(std::size_t i = 0; i < flags.size(); ++i) {
434+
if(flags[i] == llvm::StringRef("-main-file-name") && i + 1 < flags.size()) {
435+
++i; // skip the value
398436
continue;
399437
}
400-
if(next_main_file) {
401-
arg = strings.save(path::filename(file)).data();
402-
next_main_file = false;
403-
}
438+
cleaned.push_back(flags[i]);
404439
}
405-
}
440+
flags = std::move(cleaned);
406441

407-
// Inject our resource dir if not already present.
408-
if(!resource_dir().empty()) {
409-
bool has_resource_dir = false;
410-
for(auto& arg: arguments) {
411-
if(arg == llvm::StringRef("-resource-dir")) {
412-
has_resource_dir = true;
413-
break;
414-
}
415-
}
416-
if(!has_resource_dir) {
417-
append_arg("-resource-dir");
418-
append_arg(resource_dir());
419-
}
442+
// Detect cc1 mode (search rather than assuming index).
443+
is_cc1 = ranges::contains(flags, llvm::StringRef("-cc1"));
420444
}
421445
} else {
422446
append_args(info->canonical->arguments);
423447
append_args(info->patch);
424448
}
425449

450+
// Inject our resource dir if not already present.
451+
if(options.inject_resource_dir && !resource_dir().empty()) {
452+
bool has_resource_dir = false;
453+
for(auto& arg: flags) {
454+
if(arg == llvm::StringRef("-resource-dir")) {
455+
has_resource_dir = true;
456+
break;
457+
}
458+
}
459+
if(!has_resource_dir) {
460+
append_arg("-resource-dir");
461+
append_arg(resource_dir());
462+
}
463+
}
464+
426465
// Apply remove filter.
427466
if(!options.remove.empty()) {
428467
using Arg = std::unique_ptr<llvm::opt::Arg>;
@@ -440,12 +479,12 @@ llvm::SmallVector<CompilationContext> CompilationDatabase::lookup(llvm::StringRe
440479
};
441480
std::ranges::sort(remove_args, {}, get_id);
442481

443-
auto saved_args = std::move(arguments);
444-
arguments.clear();
445-
arguments.push_back(saved_args.front());
482+
auto saved_flags = std::move(flags);
483+
flags.clear();
484+
flags.push_back(saved_flags.front());
446485

447486
parser->parse(
448-
llvm::ArrayRef(saved_args).drop_front(),
487+
llvm::ArrayRef(saved_flags).drop_front(),
449488
[&](Arg arg) {
450489
auto id = arg->getOption().getID();
451490
auto range = std::ranges::equal_range(remove_args, id, {}, get_id);
@@ -461,7 +500,7 @@ llvm::SmallVector<CompilationContext> CompilationDatabase::lookup(llvm::StringRe
461500
return;
462501
}
463502
}
464-
render_arg(arguments, *arg);
503+
render_arg(flags, *arg);
465504
},
466505
[](int, int) {});
467506
}
@@ -470,26 +509,34 @@ llvm::SmallVector<CompilationContext> CompilationDatabase::lookup(llvm::StringRe
470509
append_arg(arg);
471510
}
472511

473-
arguments.emplace_back(paths.resolve(path_id).data());
474-
return CompilationContext(directory, std::move(arguments));
512+
return CompileCommand{
513+
ResolvedFlags{directory, std::move(flags), is_cc1},
514+
paths.resolve(path_id).data()
515+
};
475516
};
476517

477-
llvm::SmallVector<CompilationContext> results;
518+
llvm::SmallVector<CompileCommand> results;
478519

479520
if(!matched.empty()) {
480521
for(auto& entry: matched) {
481-
results.push_back(build_context(entry.info));
522+
results.push_back(build_command(entry.info));
482523
}
483524
} else {
484525
// No matching entry — synthesize a default command.
485-
std::vector<const char*> arguments;
526+
std::vector<const char*> flags;
486527
if(file.ends_with(".cpp") || file.ends_with(".hpp") || file.ends_with(".cc")) {
487-
arguments = {"clang++", "-std=c++20"};
528+
flags = {"clang++", "-std=c++20"};
488529
} else {
489-
arguments = {"clang"};
530+
flags = {"clang"};
531+
}
532+
if(options.inject_resource_dir && !resource_dir().empty()) {
533+
flags.push_back(strings.save("-resource-dir").data());
534+
flags.push_back(strings.save(resource_dir()).data());
490535
}
491-
arguments.emplace_back(paths.resolve(path_id).data());
492-
results.push_back(CompilationContext({}, std::move(arguments)));
536+
results.push_back(CompileCommand{
537+
ResolvedFlags{{}, std::move(flags), false},
538+
paths.resolve(path_id).data()
539+
});
493540
}
494541

495542
return results;
@@ -513,8 +560,8 @@ SearchConfig CompilationDatabase::lookup_search_config(llvm::StringRef file,
513560
}
514561

515562
auto results = lookup(file, options);
516-
auto& ctx = results.front();
517-
auto config = extract_search_config(ctx.arguments, ctx.directory);
563+
auto& cmd = results.front();
564+
auto config = extract_search_config(cmd.to_argv(), cmd.resolved.directory);
518565

519566
if(cacheable) {
520567
auto key = ConfigCacheKey{matched.front().info.ptr, options_bits(options)};

src/command/command.h

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,47 @@ struct CommandOptions {
2929
/// Set true in unittests to avoid cluttering test output.
3030
bool suppress_logging = false;
3131

32+
/// Inject our resource dir into the flags if not already present.
33+
/// Enabled by default so clang tools always use matching builtin headers.
34+
/// Disable in unit tests that assert exact argument counts.
35+
bool inject_resource_dir = true;
36+
3237
/// Extra arguments to remove from the original command line.
3338
llvm::ArrayRef<std::string> remove;
3439

3540
/// Extra arguments to append to the original command line.
3641
llvm::ArrayRef<std::string> append;
3742
};
3843

39-
struct CompilationContext {
44+
/// File-independent compilation flags (shareable, suitable as cache key input).
45+
/// Does NOT contain source file path or -main-file-name.
46+
struct ResolvedFlags {
4047
/// The working directory of compilation.
4148
llvm::StringRef directory;
4249

43-
/// The compilation arguments.
44-
std::vector<const char*> arguments;
50+
/// All flags excluding source file path and -main-file-name.
51+
std::vector<const char*> flags;
52+
53+
/// Whether flags come from toolchain query (cc1 mode).
54+
/// When true, flags are cc1 frontend args (resolved clang binary + "-cc1" + ...),
55+
/// NOT the original driver command. to_argv() scans for "-cc1" in flags and
56+
/// inserts -main-file-name immediately after it.
57+
bool is_cc1 = false;
58+
};
59+
60+
/// Compilation command = resolved flags + source file identity.
61+
struct CompileCommand {
62+
ResolvedFlags resolved;
63+
64+
/// Interned, pointer-stable. Must be null-terminated (required by to_argv()
65+
/// and path::filename().data() which relies on the suffix being null-terminated).
66+
const char* source_file = nullptr;
67+
68+
/// Produce full argv: flags + [-main-file-name <basename> if cc1] + source_file.
69+
std::vector<const char*> to_argv() const;
70+
71+
/// Convenience: to_argv() converted to vector<string>.
72+
std::vector<std::string> to_string_argv() const;
4573
};
4674

4775
/// Shared compiler identity — driver + all semantics-affecting flags.
@@ -174,10 +202,10 @@ class CompilationDatabase {
174202
/// but toolchain cache survives. Returns the number of entries loaded.
175203
std::size_t load(llvm::StringRef path);
176204

177-
/// Lookup the compilation contexts for a file. A file may have multiple
205+
/// Lookup the compile commands for a file. A file may have multiple
178206
/// compilation commands (e.g. different build configurations); all are returned.
179-
llvm::SmallVector<CompilationContext> lookup(llvm::StringRef file,
180-
const CommandOptions& options = {});
207+
llvm::SmallVector<CompileCommand> lookup(llvm::StringRef file,
208+
const CommandOptions& options = {});
181209

182210
/// Combined lookup + extract_search_config with internal caching.
183211
SearchConfig lookup_search_config(llvm::StringRef file, const CommandOptions& options = {});

src/server/compiler.cpp

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ void Compiler::init_compile_graph() {
5252
if(results.empty())
5353
return {};
5454

55-
auto& ctx = results[0];
56-
auto scan_result = scan_precise(ctx.arguments, ctx.directory);
55+
auto& cmd = results[0];
56+
auto scan_result = scan_precise(cmd.to_argv(), cmd.resolved.directory);
5757

5858
llvm::SmallVector<std::uint32_t> deps;
5959
for(auto& mod_name: scan_result.modules) {
@@ -158,12 +158,9 @@ bool Compiler::fill_compile_args(llvm::StringRef path,
158158
// 2. Normal CDB lookup for the file itself.
159159
auto results = workspace.cdb.lookup(path, {.query_toolchain = true});
160160
if(!results.empty()) {
161-
auto& ctx = results.front();
162-
directory = ctx.directory.str();
163-
arguments.clear();
164-
for(auto* arg: ctx.arguments) {
165-
arguments.emplace_back(arg);
166-
}
161+
auto& cmd = results.front();
162+
directory = cmd.resolved.directory.str();
163+
arguments = cmd.to_string_argv();
167164
return true;
168165
}
169166

@@ -214,33 +211,20 @@ bool Compiler::fill_header_context_args(llvm::StringRef path,
214211
return false;
215212
}
216213

217-
auto& host_ctx = host_results.front();
218-
directory = host_ctx.directory.str();
219-
arguments.clear();
214+
auto& host_cmd = host_results.front();
215+
directory = host_cmd.resolved.directory.str();
220216

221-
// Copy host arguments, replacing the host source file path with the header.
222-
bool replaced = false;
223-
for(auto& arg: host_ctx.arguments) {
224-
if(llvm::StringRef(arg) == host_path) {
225-
arguments.emplace_back(path);
226-
replaced = true;
227-
} else {
228-
arguments.emplace_back(arg);
229-
}
230-
}
231-
if(!replaced) {
232-
LOG_WARN("fill_header_context_args: host path {} not found in arguments, appending header",
233-
host_path);
234-
arguments.emplace_back(path);
235-
}
217+
// Replace source_file and inject -include preamble into flags directly.
218+
CompileCommand header_cmd = host_cmd;
219+
header_cmd.source_file = workspace.path_pool.resolve(path_id).data();
236220

237-
// Inject preamble: for cc1 args insert after "-cc1", otherwise after driver.
238-
std::size_t inject_pos = 1;
239-
if(arguments.size() >= 2 && arguments[1] == "-cc1") {
240-
inject_pos = 2;
241-
}
242-
arguments.insert(arguments.begin() + inject_pos, ctx_ptr->preamble_path);
243-
arguments.insert(arguments.begin() + inject_pos, "-include");
221+
// Inject -include <preamble> into flags: after "-cc1" for cc1, after driver otherwise.
222+
std::size_t inject_pos = header_cmd.resolved.is_cc1 ? 2 : 1;
223+
header_cmd.resolved.flags.insert(header_cmd.resolved.flags.begin() + inject_pos,
224+
ctx_ptr->preamble_path.c_str());
225+
header_cmd.resolved.flags.insert(header_cmd.resolved.flags.begin() + inject_pos, "-include");
226+
227+
arguments = header_cmd.to_string_argv();
244228

245229
LOG_INFO("fill_compile_args: header context for {} (host={}, preamble={})",
246230
path,

0 commit comments

Comments
 (0)