Skip to content

Commit cc9b8b5

Browse files
authored
feat(compiler): Allow custom infix operators (#1419)
1 parent 57776f2 commit cc9b8b5

File tree

11 files changed

+2168
-2029
lines changed

11 files changed

+2168
-2029
lines changed

compiler/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"parser:interpret": "esy b menhir src/parsing/parser.mly --unused-tokens --interpret",
3030
"parser:interpret-error": "esy b menhir src/parsing/parser.mly --unused-tokens --interpret-error",
3131
"parser:list-errors": "esy b menhir src/parsing/parser.mly --unused-tokens --list-errors > src/parsing/parser.messages.generated",
32+
"parser:update-errors": "esy b menhir src/parsing/parser.mly --unused-tokens --update-errors src/parsing/parser.messages > src/parsing/parser.messages.generated && cp src/parsing/parser.messages.generated src/parsing/parser.messages",
3233
"parser:check-errors": "npm run parser:list-errors && esy b menhir src/parsing/parser.mly --unused-tokens --compare-errors src/parsing/parser.messages.generated --compare-errors src/parsing/parser.messages",
3334
"import-dependencies": "esy import-dependencies _export",
3435
"export-dependencies": "esy export-dependencies",

compiler/src/formatting/format.re

Lines changed: 52 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -8,33 +8,40 @@ module Doc = Res_doc;
88

99
let exception_primitives = [|"throw", "fail", "assert"|];
1010

11-
let op_precedence = fn =>
12-
switch (fn) {
13-
| "*"
14-
| "/"
15-
| "%" => 120
16-
| "+"
17-
| "-"
18-
| "++" => 110
19-
| "<<"
20-
| ">>"
21-
| ">>>" => 100
22-
| "<"
23-
| "<="
24-
| ">"
25-
| ">=" => 90
26-
| "=="
27-
| "!="
28-
| "is"
29-
| "isnt" => 80
30-
| "&" => 70
31-
| "^" => 60
32-
| "|" => 50
33-
| "&&" => 40
34-
| "||" => 30
35-
| "_" => 10
36-
| _ => 9999
11+
let op_precedence = fn => {
12+
let op_precedence = fn =>
13+
switch (fn) {
14+
| '*'
15+
| '/'
16+
| '%' => 120
17+
| '+'
18+
| '-' => 110
19+
| '<'
20+
| '>' => 90
21+
| '&' => 70
22+
| '^' => 60
23+
| '|' => 50
24+
| '_' => 10
25+
| _ => 9999
26+
};
27+
if (String.length(fn) > 1) {
28+
switch (String.sub(fn, 0, 2)) {
29+
| "++" => 110
30+
| "<<"
31+
| ">>" => 100
32+
| "=="
33+
| "!="
34+
| "is" => 80
35+
| "&&" => 40
36+
| "||" => 30
37+
| _ => op_precedence(fn.[0])
38+
};
39+
} else if (String.length(fn) > 0) {
40+
op_precedence(fn.[0]);
41+
} else {
42+
9999;
3743
};
44+
};
3845
let list_cons = "[...]";
3946

4047
exception IllegalParse(string);
@@ -184,37 +191,31 @@ let add_parens = (doc: Doc.t) =>
184191
]);
185192

186193
let infixop = (op: string) => {
187-
switch (op) {
188-
| "+"
189-
| "-"
190-
| "*"
191-
| "/"
192-
| "%"
193-
| "is"
194-
| "isnt"
195-
| "=="
196-
| "++"
197-
| "!="
198-
| "^"
199-
| "<"
200-
| "<<"
201-
| ">"
202-
| ">>"
203-
| ">>>"
204-
| "<="
205-
| ">="
206-
| "&"
207-
| "&&"
208-
| "|"
209-
| "||" => true
194+
switch (op.[0]) {
195+
| '+'
196+
| '-'
197+
| '*'
198+
| '/'
199+
| '%'
200+
| '='
201+
| '^'
202+
| '<'
203+
| '>'
204+
| '&'
205+
| '|' => true
206+
| _ when op == "is" => true
207+
| _ when op == "isnt" => true
208+
| _ when op == "!=" => true
210209
| _ => false
210+
| exception _ => false
211211
};
212212
};
213213

214214
let prefixop = (op: string) => {
215-
switch (op) {
216-
| "!" => true
215+
switch (op.[0]) {
216+
| '!' => true
217217
| _ => false
218+
| exception _ => false
218219
};
219220
};
220221

compiler/src/parsing/lexer.re

Lines changed: 66 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,33 @@ let lident = [%sedlex.regexp?
118118
(Sub(xid_start, lu) | '_', Star(xid_continue))
119119
];
120120

121+
let operator_char = [%sedlex.regexp?
122+
'$' | '&' | '*' | '/' | '+' | '-' | '=' | '>' | '<' | '^' | '|' | '!' | '?' |
123+
'%' |
124+
':' |
125+
'.'
126+
];
127+
let operator_chars = [%sedlex.regexp? Star(operator_char)];
128+
129+
// We make sure that `>` operators contain at least one non-`>` char to not
130+
// confuse them for `>>>` chains at the end of types, e.g. `List<Option<Box<a>>>`
131+
let rcaret_operator_char = [%sedlex.regexp? Sub(operator_char, '>')];
132+
let rcaret_operator_chars = [%sedlex.regexp?
133+
(operator_chars, rcaret_operator_char, operator_chars)
134+
];
135+
136+
// Similarly, we do this for `<` even though it's a simpler case
137+
let lcaret_operator_char = [%sedlex.regexp? Sub(operator_char, '<')];
138+
let lcaret_operator_chars = [%sedlex.regexp?
139+
(operator_chars, lcaret_operator_char, operator_chars)
140+
];
141+
142+
// Infix operators are not allowed to start with `//` or `/*` as they
143+
// indicate comments
144+
let slash_operator_chars = [%sedlex.regexp?
145+
(Sub(operator_char, '/' | '*'), operator_chars)
146+
];
147+
121148
// Tabs and space separators (https://www.compart.com/en/unicode/category/Zs)
122149
let blank = [%sedlex.regexp? Plus(zs | '\t')];
123150

@@ -203,6 +230,10 @@ let rec token = lexbuf => {
203230
| "export" => positioned(EXPORT)
204231
| "except" => positioned(EXCEPT)
205232
| "from" => positioned(FROM)
233+
| "*" => positioned(STAR)
234+
| "/" => positioned(SLASH)
235+
| "|" => positioned(PIPE)
236+
| "-" => positioned(DASH)
206237
| "->" => positioned(ARROW)
207238
| "=>" => positioned(THICKARROW)
208239
| "type" => positioned(TYPE)
@@ -223,9 +254,6 @@ let rec token = lexbuf => {
223254
| "::" => positioned(COLONCOLON)
224255
| ":=" => positioned(GETS)
225256
| ":" => positioned(COLON)
226-
| "is" => positioned(IS)
227-
| "isnt" => positioned(ISNT)
228-
| "==" => positioned(EQEQ)
229257
| "=" => positioned(EQUAL)
230258
| "," => positioned(COMMA)
231259
| ";" => positioned(SEMI)
@@ -237,32 +265,44 @@ let rec token = lexbuf => {
237265
| "[" => positioned(LBRACK)
238266
| "[>" => positioned(LBRACKRCARET)
239267
| "]" => positioned(RBRACK)
240-
| "^" => positioned(CARET)
241268
| "<" => positioned(LCARET)
242-
| "<<" => positioned(LCARETLCARET)
243269
/* We do not lex >> or >>> as a single token as type vectors can contain
244-
these, e.g. List<Option<a>> */
270+
these, e.g. List<Option<a>>. An operator like `>>>>` is lexed as four
271+
seperate tokens and the parser sorts it out. */
245272
| ">" => positioned(RCARET)
246-
| "^" => positioned(CARET)
247-
| "++" => positioned(PLUSPLUS)
248-
| "+" => positioned(PLUS)
249-
| "-" => positioned(DASH)
250-
| "*" => positioned(STAR)
251-
| "/" => positioned(SLASH)
252-
| "%" => positioned(PERCENT)
253-
| "+=" => positioned(PLUSEQ)
254-
| "-=" => positioned(DASHEQ)
255-
| "*=" => positioned(STAREQ)
256-
| "/=" => positioned(SLASHEQ)
257-
| "%=" => positioned(PERCENTEQ)
258-
| "<=" => positioned(LESSEQ)
259-
| ">=" => positioned(GREATEREQ)
260-
| "&" => positioned(AMP)
261-
| "&&" => positioned(AMPAMP)
262-
| "|" => positioned(PIPE)
263-
| "||" => positioned(PIPEPIPE)
264-
| "!" => positioned(NOT)
265-
| "!=" => positioned(NOTEQ)
273+
/* The order of these is somewhat important and is why they are
274+
not sorted by precedence */
275+
| "+="
276+
| "-="
277+
| "*="
278+
| "/="
279+
| "%=" =>
280+
positioned(INFIX_ASSIGNMENT_10(Sedlexing.Utf8.sub_lexeme(lexbuf, 0, 1)))
281+
| ("==" | "!=", operator_chars)
282+
| "is"
283+
| "isnt" => positioned(INFIX_80(Sedlexing.Utf8.lexeme(lexbuf)))
284+
| ("<<", operator_chars)
285+
| (">>", rcaret_operator_chars) =>
286+
positioned(INFIX_100(Sedlexing.Utf8.lexeme(lexbuf)))
287+
| ("<", lcaret_operator_chars)
288+
| (">", rcaret_operator_chars) =>
289+
positioned(INFIX_90(Sedlexing.Utf8.lexeme(lexbuf)))
290+
| ("^", operator_chars) =>
291+
positioned(INFIX_60(Sedlexing.Utf8.lexeme(lexbuf)))
292+
| ("+" | "-", operator_chars) =>
293+
positioned(INFIX_110(Sedlexing.Utf8.lexeme(lexbuf)))
294+
| ("*" | "%", operator_chars)
295+
| ("/", slash_operator_chars) =>
296+
positioned(INFIX_120(Sedlexing.Utf8.lexeme(lexbuf)))
297+
| ("&&", operator_chars) =>
298+
positioned(INFIX_40(Sedlexing.Utf8.lexeme(lexbuf)))
299+
| ("&", operator_chars) =>
300+
positioned(INFIX_70(Sedlexing.Utf8.lexeme(lexbuf)))
301+
| ("||" | "??", operator_chars) =>
302+
positioned(INFIX_30(Sedlexing.Utf8.lexeme(lexbuf)))
303+
| ("|", operator_chars) =>
304+
positioned(INFIX_50(Sedlexing.Utf8.lexeme(lexbuf)))
305+
| "!" => positioned(PREFIX_150(Sedlexing.Utf8.lexeme(lexbuf)))
266306
| "@" => positioned(AT)
267307
| '"' =>
268308
let (start_p, _) = Sedlexing.lexing_positions(lexbuf);

0 commit comments

Comments
 (0)