Skip to content

Commit d42d9d5

Browse files
16bit-ykikoclaude
andauthored
refactor(document links): use Lexer for unified directive argument scanning (#421)
## Summary - Replace hand-written character scanning in `document_links.cpp` with the project's `Lexer` class for finding filename arguments in preprocessor directives - Extend `Lexer` to activate `header_name` mode for `#embed`/`#include_next`, and expose `set_header_name_mode()` for `__has_include`/`__has_embed` contexts - Remove unused `Include::filename_range` field (had a latent assert crash on macro-expanded includes) - Add `MacroInclude` unit test covering `#include MACRO` scenario ## Test plan - [x] 498 unit tests pass (including new `MacroInclude` test) - [x] 119 integration tests pass - [x] 2/2 smoke tests pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Document links now resolve includes written via macros; directive parsing recognizes include, include_next, embed and __has_* patterns more reliably using lexer-driven argument detection. * **Refactor** * Removed an internal filename-range field previously stored for include directives. * **Tests** * Added unit tests covering directive argument extraction and macro-based include linking. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9c89d20 commit d42d9d5

File tree

7 files changed

+183
-37
lines changed

7 files changed

+183
-37
lines changed

src/compile/directive.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ class DirectiveCollector : public clang::PPCallbacks {
9494
const clang::Token& include_tok,
9595
llvm::StringRef,
9696
bool,
97-
clang::CharSourceRange filename_range,
97+
clang::CharSourceRange,
9898
clang::OptionalFileEntryRef,
9999
llvm::StringRef,
100100
llvm::StringRef,
@@ -108,7 +108,6 @@ class DirectiveCollector : public clang::PPCallbacks {
108108
unit->directives[prev_fid].includes.emplace_back(Include{
109109
.fid = {},
110110
.location = include_tok.getLocation(),
111-
.filename_range = filename_range.getAsRange(),
112111
});
113112
}
114113

src/compile/directive.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,8 @@ struct Include {
2020
/// The file id of included file.
2121
clang::FileID fid;
2222

23-
/// Location of the `include`.
23+
/// Location of the `include` keyword.
2424
clang::SourceLocation location;
25-
26-
/// The range of filename(includes `""` or `<>`).
27-
clang::SourceRange filename_range;
2825
};
2926

3027
/// Information about `__has_include` directive.

src/feature/document_links.cpp

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
#include <algorithm>
21
#include <cstdint>
32
#include <string>
43
#include <vector>
54

65
#include "feature/feature.h"
6+
#include "syntax/lexer.h"
77

88
namespace clice::feature {
99

10-
namespace {} // namespace
11-
1210
auto document_links(CompilationUnitRef unit, PositionEncoding encoding)
1311
-> std::vector<protocol::DocumentLink> {
1412
std::vector<protocol::DocumentLink> links;
@@ -22,59 +20,41 @@ auto document_links(CompilationUnitRef unit, PositionEncoding encoding)
2220
auto content = unit.interested_content();
2321
PositionMapper converter(content, encoding);
2422
auto& directives = directives_it->second;
23+
auto* lang_opts = &unit.lang_options();
2524

26-
// Scan forward from offset to find a quoted/angled filename range.
27-
auto find_filename_range = [&](std::uint32_t offset) -> std::optional<LocalSourceRange> {
28-
auto tail = content.substr(offset);
29-
auto quote_pos = tail.find_first_of("<\"");
30-
if(quote_pos == llvm::StringRef::npos) {
31-
return std::nullopt;
32-
}
33-
char open = tail[quote_pos];
34-
char close = open == '<' ? '>' : '"';
35-
auto close_pos = tail.find(close, quote_pos + 1);
36-
if(close_pos == llvm::StringRef::npos) {
37-
return std::nullopt;
38-
}
39-
return LocalSourceRange(offset + static_cast<std::uint32_t>(quote_pos),
40-
offset + static_cast<std::uint32_t>(close_pos + 1));
41-
};
42-
43-
auto add_link_by_location = [&](clang::SourceLocation loc, llvm::StringRef target) {
25+
auto add_link = [&](clang::SourceLocation loc, llvm::StringRef target) {
4426
auto [fid, offset] = unit.decompose_location(loc);
45-
if(fid != interested || offset >= content.size()) {
27+
if(fid != interested || offset >= content.size())
4628
return;
47-
}
48-
auto range = find_filename_range(offset);
49-
if(!range) {
29+
auto range = find_directive_argument(content, offset, lang_opts);
30+
if(!range)
5031
return;
51-
}
5232
protocol::DocumentLink link{.range = to_range(converter, *range)};
5333
link.target = target.str();
5434
links.push_back(std::move(link));
5535
};
5636

5737
for(const auto& include: directives.includes) {
5838
if(include.fid.isValid()) {
59-
add_link_by_location(include.location, unit.file_path(include.fid));
39+
add_link(include.location, unit.file_path(include.fid));
6040
}
6141
}
6242

6343
for(const auto& has_include: directives.has_includes) {
6444
if(has_include.fid.isValid()) {
65-
add_link_by_location(has_include.location, unit.file_path(has_include.fid));
45+
add_link(has_include.location, unit.file_path(has_include.fid));
6646
}
6747
}
6848

6949
for(const auto& embed: directives.embeds) {
7050
if(embed.file) {
71-
add_link_by_location(embed.loc, embed.file->getName());
51+
add_link(embed.loc, embed.file->getName());
7252
}
7353
}
7454

7555
for(const auto& has_embed: directives.has_embeds) {
7656
if(has_embed.file) {
77-
add_link_by_location(has_embed.loc, has_embed.file->getName());
57+
add_link(has_embed.loc, has_embed.file->getName());
7858
}
7959
}
8060

src/syntax/lexer.cpp

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ void Lexer::lex(Token& token) {
5353
}
5454
} else if(parse_pp_keyword) {
5555
parse_pp_keyword = false;
56-
parse_header_name = token.text(content) == "include";
56+
auto kw = token.text(content);
57+
parse_header_name = kw == "include" || kw == "include_next" || kw == "embed";
5758
}
5859
}
5960

@@ -105,4 +106,60 @@ Token Lexer::advance_until(TokenKind kind) {
105106
}
106107
}
107108

109+
static bool is_directive_keyword(llvm::StringRef word) {
110+
return word == "include" || word == "include_next" || word == "import" || word == "embed" ||
111+
word == "__has_include" || word == "__has_include_next" || word == "__has_embed";
112+
}
113+
114+
std::optional<LocalSourceRange> find_directive_argument(llvm::StringRef content,
115+
std::uint32_t offset,
116+
const clang::LangOptions* lang_opts) {
117+
std::uint32_t line_start = 0;
118+
if(auto nl = content.rfind('\n', offset); nl != llvm::StringRef::npos)
119+
line_start = static_cast<std::uint32_t>(nl + 1);
120+
121+
auto line = content.substr(line_start);
122+
Lexer lexer(line, true, lang_opts);
123+
bool after_has_keyword = false;
124+
bool ready = false;
125+
126+
while(true) {
127+
auto tok = lexer.advance();
128+
if(tok.is_eof() || tok.is_eod())
129+
break;
130+
131+
auto abs_begin = line_start + tok.range.begin;
132+
auto abs_end = line_start + tok.range.end;
133+
134+
if(tok.is_identifier()) {
135+
auto text = tok.text(line);
136+
if(text == "__has_include" || text == "__has_include_next" || text == "__has_embed") {
137+
after_has_keyword = true;
138+
continue;
139+
}
140+
if(text == "include" || text == "include_next" || text == "embed") {
141+
ready = true;
142+
continue;
143+
}
144+
}
145+
146+
if(tok.kind == clang::tok::l_paren && after_has_keyword) {
147+
after_has_keyword = false;
148+
ready = true;
149+
lexer.set_header_name_mode();
150+
continue;
151+
}
152+
153+
if(abs_begin < offset || !ready)
154+
continue;
155+
156+
if(tok.is_header_name() || tok.kind == clang::tok::string_literal)
157+
return LocalSourceRange(abs_begin, abs_end);
158+
159+
if(tok.is_identifier())
160+
return LocalSourceRange(abs_begin, abs_end);
161+
}
162+
return std::nullopt;
163+
}
164+
108165
} // namespace clice

src/syntax/lexer.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ class Lexer {
5151

5252
Token advance_until(TokenKind kind);
5353

54+
/// Force the lexer into header-name mode so the next token is lexed
55+
/// via LexIncludeFilename (correctly handling both "..." and <...>).
56+
/// Use this before lexing filename arguments in contexts like
57+
/// __has_include() or __has_embed() where the lexer cannot detect
58+
/// the mode automatically.
59+
void set_header_name_mode() {
60+
parse_header_name = true;
61+
}
62+
5463
private:
5564
bool ignore_end_of_directive = true;
5665
bool parse_pp_keyword = false;
@@ -64,4 +73,13 @@ class Lexer {
6473
std::unique_ptr<clang::Lexer> lexer;
6574
};
6675

76+
/// Find the range of the filename argument in a preprocessor directive line.
77+
/// `content` is the full source text, `offset` points at or before the directive keyword.
78+
/// Returns the range of the first filename-like token (header name, string literal,
79+
/// or macro identifier) found on the same line, or nullopt if none.
80+
std::optional<LocalSourceRange>
81+
find_directive_argument(llvm::StringRef content,
82+
std::uint32_t offset,
83+
const clang::LangOptions* lang_opts = nullptr);
84+
6785
} // namespace clice

tests/unit/feature/document_link_tests.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,19 @@ TEST_CASE(HasInclude) {
8989
EXPECT_LINK(1, "1", TestVFS::path("test.h"));
9090
}
9191

92+
TEST_CASE(MacroInclude) {
93+
run(R"cpp(
94+
#[test.h]
95+
96+
#[main.cpp]
97+
#define HEADER "test.h"
98+
#include @0[HEADER$]
99+
)cpp");
100+
101+
ASSERT_EQ(links.size(), 1U);
102+
EXPECT_LINK(0, "0", TestVFS::path("test.h"));
103+
}
104+
92105
TEST_CASE(Embed) {
93106
run(R"cpp(
94107
#[bytes.bin]

tests/unit/syntax/lexer_tests.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,5 +83,87 @@ int x = 1;
8383
}
8484

8585
}; // TEST_SUITE(SourceText)
86+
87+
TEST_SUITE(DirectiveArgument) {
88+
89+
void EXPECT_RANGE(llvm::StringRef content, std::uint32_t offset, llvm::StringRef expected) {
90+
auto result = find_directive_argument(content, offset);
91+
ASSERT_TRUE(result.has_value());
92+
ASSERT_EQ(content.substr(result->begin, result->length()), expected);
93+
}
94+
95+
void EXPECT_NONE(llvm::StringRef content, std::uint32_t offset) {
96+
auto result = find_directive_argument(content, offset);
97+
ASSERT_FALSE(result.has_value());
98+
}
99+
100+
TEST_CASE(IncludeQuoted) {
101+
llvm::StringRef src = R"(#include "foo.h")";
102+
EXPECT_RANGE(src, 0, R"("foo.h")");
103+
}
104+
105+
TEST_CASE(IncludeAngled) {
106+
llvm::StringRef src = "#include <iostream>";
107+
EXPECT_RANGE(src, 0, "<iostream>");
108+
}
109+
110+
TEST_CASE(IncludeMacro) {
111+
llvm::StringRef src = "#include HEADER";
112+
EXPECT_RANGE(src, 0, "HEADER");
113+
}
114+
115+
TEST_CASE(HasIncludeQuoted) {
116+
llvm::StringRef src = R"(#if __has_include("foo.h"))";
117+
// offset at __has_include
118+
auto pos = src.find("__has_include");
119+
EXPECT_RANGE(src, static_cast<std::uint32_t>(pos), R"("foo.h")");
120+
}
121+
122+
TEST_CASE(HasIncludeAngled) {
123+
llvm::StringRef src = "#if __has_include(<vector>)";
124+
auto pos = src.find("__has_include");
125+
EXPECT_RANGE(src, static_cast<std::uint32_t>(pos), "<vector>");
126+
}
127+
128+
TEST_CASE(EmbedQuoted) {
129+
llvm::StringRef src = R"(#embed "data.bin")";
130+
EXPECT_RANGE(src, 0, R"("data.bin")");
131+
}
132+
133+
TEST_CASE(HasEmbedQuoted) {
134+
llvm::StringRef src = R"(#if __has_embed("data.bin"))";
135+
auto pos = src.find("__has_embed");
136+
EXPECT_RANGE(src, static_cast<std::uint32_t>(pos), R"("data.bin")");
137+
}
138+
139+
TEST_CASE(MultilineOffset) {
140+
llvm::StringRef src = "#include \"a.h\"\n#include \"b.h\"";
141+
// offset pointing into the second line
142+
auto pos = src.find("#include \"b.h\"");
143+
EXPECT_RANGE(src, static_cast<std::uint32_t>(pos), R"("b.h")");
144+
}
145+
146+
TEST_CASE(EmptyDirective) {
147+
llvm::StringRef src = "#include \n";
148+
EXPECT_NONE(src, 0);
149+
}
150+
151+
TEST_CASE(HasIncludeFromLineStart) {
152+
llvm::StringRef src = "#if __has_include(<vector>)";
153+
EXPECT_RANGE(src, 0, "<vector>");
154+
}
155+
156+
TEST_CASE(HasEmbedFromLineStart) {
157+
llvm::StringRef src = R"(#if __has_embed("data.bin"))";
158+
EXPECT_RANGE(src, 0, R"("data.bin")");
159+
}
160+
161+
TEST_CASE(IncludeNext) {
162+
llvm::StringRef src = "#include_next <stdlib.h>";
163+
EXPECT_RANGE(src, 0, "<stdlib.h>");
164+
}
165+
166+
}; // TEST_SUITE(DirectiveArgument)
167+
86168
} // namespace
87169
} // namespace clice::testing

0 commit comments

Comments
 (0)