Skip to content

Commit 25c00ef

Browse files
Start making the CST usable in Lute (luau-lang#71)
Now that CST nodes exist in Luau, this PR extends the existing `@lute/luau` C++ module to return full-fidelity CST nodes to Luau, rather than just AST nodes. This is aimed to support future usages of the CST for tooling such as code modding, linting or formatting implemented fully in Luau. We extend the serialization of some AST nodes with extra tokens, and create tests to check for validity of tokenization and the roundtrippability of the serialization state in Luau (i.e., `print(parse(source) == source` where `print` is fully defined in Luau). Future AST nodes will be added in follow up commits. Alongside the CST data, a key concept that this PR introduces is the idea of trivia. Right now, the Luau CST does not preserve trivia itself, but retains enough information to compute spans of trivia, allowing later lookup in the original source code. To extract trivia, we keep the original input buffer around, and then take the span between the last token and the next token, and call this "trivia". Trivia is then individually tokenized and attached to "Token" nodes. There are some key rules for trivia: - [x] Trivia consists of whitespace, single line comments and multi line comments. Each kind is a separate trivia token - [x] Whitespace trivia is split up into separate tokens based on the '\n' character. Other characters of whitespace are joined together up to and including the first newline character. i.e., `<space><space>\n\t\t\n\n` is split into `[<space><space>\n, \t\t\n, \n]` - [ ] Trivia on a token is split up into leading and trailing trivia. Leading / trailing split is based on the Roslyn Compiler trivia rules: trailing trivia consists of everything up to and including the first `\n` newline character, and all further trivia is leading trivia of the next token. The idea being that `local x = ...\n` can then be represented as a single line in trivia, to allow easier replacement (rather than the `\n` being part of the next statement). The last rule is not yet implemented but will be introduced in a follow-up commit. The API still remains experimental and is subject to change. In particular, I'm not fully sold on the naming scheme for nodes and fields (particularly the fields that match keyword names) and it may require some standardisation. Also, we are somewhat mixing Ast nodes with Cst nodes, and we may want to separate the two.
1 parent f00f952 commit 25c00ef

File tree

6 files changed

+908
-85
lines changed

6 files changed

+908
-85
lines changed

batteries/syntax/ast_types.luau

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
export type Position = {
2+
line: number,
3+
column: number,
4+
}
5+
6+
export type Location = {
7+
begin: Position,
8+
["end"]: Position, -- TODO: do we really want to have to use brackets everywhere?
9+
}
10+
11+
export type Whitespace = {
12+
tag: "whitespace",
13+
location: Location,
14+
text: string,
15+
}
16+
17+
export type SingleLineComment = {
18+
tag: "comment",
19+
location: Location,
20+
text: string,
21+
}
22+
23+
export type MultiLineComment = {
24+
tag: "blockcomment",
25+
location: Location,
26+
text: string,
27+
-- TODO: depth: number,
28+
}
29+
30+
export type Trivia = Whitespace | SingleLineComment | MultiLineComment
31+
32+
export type Token<Kind = string> = {
33+
read leadingTrivia: { Trivia },
34+
read position: Position,
35+
read text: Kind,
36+
read trailingTrivia: { Trivia },
37+
}
38+
39+
export type Pair<T, Separator> = { node: T, separator: Token<Separator>? }
40+
export type Punctuated<T, Separator = ","> = { Pair<T, Separator> }
41+
42+
export type AstLocal = {
43+
name: Token<string>,
44+
}
45+
46+
export type AstExprConstantString = Token<string> & {
47+
tag: "string",
48+
quoteStyle: "single" | "double" | "block" | "interp",
49+
blockDepth: number,
50+
}
51+
52+
export type AstExprLocal = {
53+
tag: "local",
54+
token: Token<string>,
55+
["local"]: AstLocal,
56+
upvalue: boolean,
57+
}
58+
59+
export type AstExprGlobal = {
60+
tag: "global",
61+
name: Token,
62+
}
63+
64+
export type AstExprCall = {
65+
tag: "call",
66+
func: AstExpr, -- TODO: stricter?
67+
openParens: Token<"(">?,
68+
arguments: Punctuated<AstExpr>,
69+
closeParens: Token<")">?,
70+
}
71+
72+
export type AstExprIndexName = {
73+
tag: "indexname",
74+
expr: AstExpr,
75+
accessor: Token<"." | ":">,
76+
index: Token<string>,
77+
indexLocation: Location,
78+
}
79+
80+
export type AstExprIndexExpr = {
81+
tag: "index",
82+
expr: AstExpr,
83+
index: AstExpr,
84+
}
85+
86+
export type AstExprBinary = {
87+
tag: "binary",
88+
lhsoperand: AstExpr,
89+
operator: Token, -- TODO: enforce token type
90+
rhsoperand: AstExpr,
91+
}
92+
93+
export type AstExprTableItem =
94+
| { kind: "list", value: AstExpr }
95+
| { kind: "record", key: string, equals: Token<"=">, value: AstExpr }
96+
| { kind: "general", key: string, equals: Token<"=">, value: AstExpr }
97+
98+
export type AstExprTable = {
99+
tag: "table",
100+
openBrace: Token<"{">,
101+
entries: { never },
102+
closeBrace: Token<"}">,
103+
}
104+
105+
export type AstExpr =
106+
| AstExprConstantString
107+
| AstExprLocal
108+
| AstExprGlobal
109+
| AstExprCall
110+
| AstExprIndexName
111+
| AstExprIndexExpr
112+
| AstExprBinary
113+
| AstExprTable
114+
115+
export type AstStatBlock = {
116+
tag: "block",
117+
statements: { AstStat },
118+
}
119+
120+
export type AstStatReturn = {
121+
tag: "return",
122+
["return"]: Token<"return">,
123+
expressions: Punctuated<AstExpr>,
124+
}
125+
126+
export type AstStatExpr = {
127+
tag: "expression",
128+
expression: AstExpr,
129+
}
130+
131+
export type AstStatLocal = {
132+
tag: "local",
133+
["local"]: Token<"local">,
134+
variables: Punctuated<AstLocal>,
135+
equals: Token<"=">?,
136+
values: Punctuated<AstExpr>,
137+
}
138+
139+
export type AstStat = AstStatBlock | AstStatReturn | AstStatExpr | AstStatLocal
140+
141+
return {}

batteries/syntax/parser.luau

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
--!strict
2+
3+
local luau = require("@lute/luau")
4+
local T = require("./ast_types")
5+
6+
--- Parses Luau source code into an AstStatBlock
7+
local function parse(source: string): T.AstStatBlock
8+
return luau.parse(source).root
9+
end
10+
11+
return {
12+
parse = parse,
13+
}

batteries/syntax/printer.luau

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
--!strict
2+
local T = require("./ast_types")
3+
local visitor = require("./visitor")
4+
5+
local function exhaustiveMatch(value: never): never
6+
error(`Unknown value in exhaustive match: {value}`)
7+
end
8+
9+
local function printTrivia(trivia: T.Trivia): string
10+
if trivia.tag == "whitespace" or trivia.tag == "comment" or trivia.tag == "blockcomment" then
11+
return trivia.text
12+
else
13+
return exhaustiveMatch(trivia.tag)
14+
end
15+
end
16+
17+
local function printTriviaList(trivia: { T.Trivia })
18+
local result = ""
19+
for _, trivia in trivia do
20+
result ..= printTrivia(trivia)
21+
end
22+
return result
23+
end
24+
25+
local function printToken(token: T.Token): string
26+
return printTriviaList(token.leadingTrivia) .. token.text .. printTriviaList(token.trailingTrivia)
27+
end
28+
29+
local function printString(expr: T.AstExprConstantString): string
30+
local result = printTriviaList(expr.leadingTrivia)
31+
32+
if expr.quoteStyle == "single" then
33+
result ..= `'{expr.text}'`
34+
elseif expr.quoteStyle == "double" then
35+
result ..= `"{expr.text}"`
36+
elseif expr.quoteStyle == "block" then
37+
local equals = string.rep("=", expr.blockDepth)
38+
result ..= `[{equals}[{expr.text}]{equals}]`
39+
elseif expr.quoteStyle == "interp" then
40+
result ..= "`" .. expr.text .. "`"
41+
else
42+
return exhaustiveMatch(expr.quoteStyle)
43+
end
44+
45+
result ..= printTriviaList(expr.trailingTrivia)
46+
return result
47+
end
48+
49+
--- Returns a string representation of an AstStatBlock
50+
local function printBlock(block: T.AstStatBlock): string
51+
local printer = visitor.createVisitor()
52+
local result = ""
53+
54+
printer.visitToken = function(node: T.Token)
55+
result ..= printToken(node)
56+
return false
57+
end
58+
59+
printer.visitString = function(node: T.AstExprConstantString)
60+
result ..= printString(node)
61+
return false
62+
end
63+
64+
visitor.visitBlock(block, printer)
65+
66+
return result
67+
end
68+
69+
return {
70+
print = printBlock,
71+
}

0 commit comments

Comments
 (0)