Skip to content

Commit f290be1

Browse files
committed
Separate trivia into whitespace and comments
Split trivia into smaller tokens. Also, use string_views everywhere rather than copies of strings where possible. Still TODO: - Split whitespace into smaller parts (whitespace should be separated by '\n' characters - a block of whitespace is up to and including the first '\n' character) - Split between leading trivia and trailing trivia
1 parent 5955ff0 commit f290be1

File tree

3 files changed

+111
-34
lines changed

3 files changed

+111
-34
lines changed

batteries/codemod/ast_types.luau

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,20 @@ export type Whitespace = {
1414
text: string,
1515
}
1616

17-
-- type SingleLineComment = {
18-
-- tag: "single_line_comment",
19-
-- location: Location,
20-
-- text: string,
21-
-- }
22-
23-
-- type MultiLineComment = {
24-
-- tag: "multi_line_comment",
25-
-- location: Location,
26-
-- text: string,
27-
-- depth: number,
28-
-- }
29-
30-
export type Trivia = Whitespace
17+
export type SingleLineComment = {
18+
tag: "comment",
19+
location: Location,
20+
text: string,
21+
}
22+
23+
export type MultiLineComment = {
24+
tag: "blockcomment",
25+
location: Location,
26+
text: string,
27+
-- TODO: depth: number,
28+
}
29+
30+
export type Trivia = Whitespace | SingleLineComment | MultiLineComment
3131

3232
export type Token<Kind = string> = {
3333
read leadingTrivia: { Trivia },

batteries/codemod/printer.luau

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ local function exhaustiveMatch(value: never): never
77
end
88

99
local function printTrivia(trivia: T.Trivia): string
10-
if trivia.tag == "whitespace" then
10+
if trivia.tag == "whitespace" or trivia.tag == "comment" or trivia.tag == "blockcomment" then
1111
return trivia.text
1212
else
1313
return exhaustiveMatch(trivia.tag)

luau/src/luau.cpp

Lines changed: 96 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ static ExprResult parseExpr(std::string& source)
8282
}
8383
}
8484

85-
static std::vector<size_t> computeLineOffsets(const std::string& content)
85+
static std::vector<size_t> computeLineOffsets(std::string_view content)
8686
{
8787
std::vector<size_t> result{};
8888
result.emplace_back(0);
@@ -102,22 +102,48 @@ static std::vector<size_t> computeLineOffsets(const std::string& content)
102102
return result;
103103
}
104104

105+
static std::vector<Luau::Comment> commentsWithinSpan(const std::vector<Luau::Comment> comments, Luau::Location span)
106+
{
107+
// TODO: O(n), we could probably binary search if there are a lot of comments
108+
std::vector<Luau::Comment> result;
109+
110+
for (const auto& comment : comments)
111+
if (span.encloses(comment.location))
112+
result.emplace_back(comment);
113+
114+
return result;
115+
}
116+
117+
struct Trivia {
118+
enum TriviaKind {
119+
Whitespace,
120+
SingleLineComment,
121+
MultiLineComment,
122+
};
123+
124+
TriviaKind kind;
125+
Luau::Location location;
126+
std::string_view text;
127+
};
128+
105129
struct AstSerialize : public Luau::AstVisitor
106130
{
107131
lua_State* L;
108132
Luau::CstNodeMap cstNodeMap;
109-
const std::string& source;
133+
std::string_view source;
110134
Luau::Position currentPosition{0,0};
111135
std::vector<size_t> lineOffsets;
136+
std::vector<Luau::Comment> commentLocations;
112137

113138
// absolute index for the table where we're storing locals
114139
int localTableIndex;
115140

116-
AstSerialize(lua_State* L, const std::string& source, Luau::CstNodeMap cstNodeMap)
141+
AstSerialize(lua_State* L, std::string_view source, Luau::CstNodeMap cstNodeMap, std::vector<Luau::Comment> commentLocations)
117142
: L(L)
118143
, cstNodeMap(std::move(cstNodeMap))
119144
, source(source)
120145
, lineOffsets(computeLineOffsets(source))
146+
, commentLocations(std::move(commentLocations))
121147
{
122148
lua_createtable(L, 0, 0);
123149
localTableIndex = lua_absindex(L, -1);
@@ -155,23 +181,63 @@ struct AstSerialize : public Luau::AstVisitor
155181
currentPosition.column += unsigned(contents.size());
156182
}
157183

158-
std::string extractTrivia(const Luau::Position& newPos)
184+
Trivia extractWhitespace(const Luau::Position& newPos)
159185
{
160-
LUAU_ASSERT(currentPosition <= newPos);
161-
if (currentPosition == newPos)
162-
return "";
186+
const auto beginPosition = currentPosition;
163187

188+
LUAU_ASSERT(currentPosition < newPos);
164189
LUAU_ASSERT(currentPosition.line < lineOffsets.size());
165190
LUAU_ASSERT(newPos.line < lineOffsets.size());
166191
size_t startOffset = lineOffsets[currentPosition.line] + currentPosition.column;
167192
size_t endOffset = lineOffsets[newPos.line] + newPos.column;
168193

169-
std::string trivia = source.substr(startOffset, endOffset - startOffset);
194+
std::string_view trivia = source.substr(startOffset, endOffset - startOffset);
170195

196+
// TODO: advancePosition is more of a debug check - we can probably just rely on newPos here
171197
advancePosition(trivia);
172198
LUAU_ASSERT(currentPosition == newPos);
173199

174-
return trivia;
200+
return Trivia{Trivia::Whitespace, Luau::Location{beginPosition, newPos}, trivia};
201+
}
202+
203+
std::vector<Trivia> extractTrivia(const Luau::Position& newPos)
204+
{
205+
LUAU_ASSERT(currentPosition <= newPos);
206+
if (currentPosition == newPos)
207+
return {};
208+
209+
std::vector<Trivia> result;
210+
211+
const auto comments = commentsWithinSpan(commentLocations, Luau::Location{currentPosition, newPos});
212+
for (const auto& comment : comments)
213+
{
214+
if (currentPosition < comment.location.begin)
215+
result.emplace_back(extractWhitespace(comment.location.begin));
216+
217+
LUAU_ASSERT(comment.location.begin.line < lineOffsets.size());
218+
LUAU_ASSERT(comment.location.end.line < lineOffsets.size());
219+
220+
size_t startOffset = lineOffsets[comment.location.begin.line] + comment.location.begin.column;
221+
size_t endOffset = lineOffsets[comment.location.end.line] + comment.location.end.column;
222+
223+
std::string_view commentText = source.substr(startOffset, endOffset - startOffset);
224+
225+
// TODO: advancePosition is more of a debug check - we can probably just set currentPosition directly here
226+
advancePosition(commentText);
227+
LUAU_ASSERT(currentPosition == comment.location.end);
228+
229+
// TODO: currently the text includes the `--` / `--[[` characters, should it?
230+
LUAU_ASSERT(comment.type != Luau::Lexeme::BrokenComment);
231+
auto kind = comment.type == Luau::Lexeme::Comment ? Trivia::SingleLineComment : Trivia::MultiLineComment;
232+
result.emplace_back(Trivia{kind, comment.location, commentText});
233+
}
234+
235+
if (currentPosition < newPos)
236+
result.emplace_back(extractWhitespace(newPos));
237+
238+
LUAU_ASSERT(currentPosition == newPos);
239+
240+
return result;
175241
}
176242

177243
void serialize(Luau::Position position)
@@ -329,26 +395,37 @@ struct AstSerialize : public Luau::AstVisitor
329395
withLocation(node->location);
330396
}
331397

332-
void serializeTrivia(const std::string& trivia)
398+
void serializeTrivia(const std::vector<Trivia>& trivia)
333399
{
334400
lua_rawcheckstack(L, 2);
335-
lua_createtable(L, 1, 0);
401+
lua_createtable(L, trivia.size(), 0);
336402

337-
if (!trivia.empty())
403+
for (size_t i = 0; i < trivia.size(); i++)
338404
{
339-
// TODO: tokenize the trivia into smaller parts
340405
lua_rawcheckstack(L, 2);
341406
lua_createtable(L, 0, 3);
342407

343-
lua_pushstring(L, "whitespace");
408+
switch (trivia[i].kind)
409+
{
410+
case Trivia::Whitespace:
411+
lua_pushstring(L, "whitespace");
412+
break;
413+
case Trivia::SingleLineComment:
414+
lua_pushstring(L, "comment");
415+
break;
416+
case Trivia::MultiLineComment:
417+
lua_pushstring(L, "blockcomment");
418+
break;
419+
}
344420
lua_setfield(L, -2, "tag");
345421

346-
// TODO: push location
422+
serialize(trivia[i].location);
423+
lua_setfield(L, -2, "location");
347424

348-
lua_pushlstring(L, trivia.data(), trivia.size());
425+
lua_pushlstring(L, trivia[i].text.data(), trivia[i].text.size());
349426
lua_setfield(L, -2, "text");
350427

351-
lua_rawseti(L, -2, 1);
428+
lua_rawseti(L, -2, i + 1);
352429
}
353430
}
354431

@@ -1517,7 +1594,7 @@ int luau_parse(lua_State* L)
15171594

15181595
lua_createtable(L, 0, 2);
15191596

1520-
AstSerialize serializer{L, source, result.parseResult.cstNodeMap};
1597+
AstSerialize serializer{L, source, result.parseResult.cstNodeMap, result.parseResult.commentLocations};
15211598
serializer.visit(result.parseResult.root);
15221599
lua_setfield(L, -2, "root");
15231600

@@ -1559,7 +1636,7 @@ int luau_parseexpr(lua_State* L)
15591636
luaL_error(L, "parsing failed:\n%s", fullError.c_str());
15601637
}
15611638

1562-
AstSerialize serializer{L, source, Luau::CstNodeMap{nullptr}};
1639+
AstSerialize serializer{L, source, Luau::CstNodeMap{nullptr}, result.commentLocations};
15631640
serializer.visit(result.root);
15641641

15651642
return 1;

0 commit comments

Comments
 (0)